1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <netinet/in.h> 62 #include <sys/un.h> 63 #include <sys/strsun.h> 64 65 #include <sys/tiuser.h> 66 #define _SUN_TPI_VERSION 2 67 #include <sys/tihdr.h> 68 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 69 70 #include <c2/audit.h> 71 72 #include <inet/common.h> 73 #include <inet/ip.h> 74 #include <inet/ip6.h> 75 #include <inet/tcp.h> 76 #include <inet/udp_impl.h> 77 78 #include <sys/zone.h> 79 80 #include <fs/sockfs/nl7c.h> 81 #include <fs/sockfs/nl7curi.h> 82 83 #include <inet/kssl/ksslapi.h> 84 85 /* 86 * Possible failures when memory can't be allocated. The documented behavior: 87 * 88 * 5.5: 4.X: XNET: 89 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 90 * EINTR 91 * (4.X does not document EINTR but returns it) 92 * bind: ENOSR - ENOBUFS/ENOSR 93 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 94 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 95 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 96 * (4.X getpeername and getsockname do not fail in practice) 97 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 98 * listen: - - ENOBUFS 99 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 100 * EINTR 101 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 102 * EINTR 103 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 104 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 105 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 106 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 107 * 108 * Resolution. When allocation fails: 109 * recv: return EINTR 110 * send: return EINTR 111 * connect, accept: EINTR 112 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 113 * socket, socketpair: ENOBUFS 114 * getpeername, getsockname: sleep 115 * getsockopt, setsockopt: sleep 116 */ 117 118 #ifdef SOCK_TEST 119 /* 120 * Variables that make sockfs do something other than the standard TPI 121 * for the AF_INET transports. 122 * 123 * solisten_tpi_tcp: 124 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 125 * the transport is already bound. This is needed to avoid loosing the 126 * port number should listen() do a T_UNBIND_REQ followed by a 127 * O_T_BIND_REQ. 128 * 129 * soconnect_tpi_udp: 130 * UDP and ICMP can handle a T_CONN_REQ. 131 * This is needed to make the sequence of connect(), getsockname() 132 * return the local IP address used to send packets to the connected to 133 * destination. 134 * 135 * soconnect_tpi_tcp: 136 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 137 * Set this to non-zero to send TPI conformant messages to TCP in this 138 * respect. This is a performance optimization. 139 * 140 * soaccept_tpi_tcp: 141 * TCP can handle a T_CONN_REQ without the acceptor being bound. 142 * This is a performance optimization that has been picked up in XTI. 143 * 144 * soaccept_tpi_multioptions: 145 * When inheriting SOL_SOCKET options from the listener to the accepting 146 * socket send them as a single message for AF_INET{,6}. 147 */ 148 int solisten_tpi_tcp = 0; 149 int soconnect_tpi_udp = 0; 150 int soconnect_tpi_tcp = 0; 151 int soaccept_tpi_tcp = 0; 152 int soaccept_tpi_multioptions = 1; 153 #else /* SOCK_TEST */ 154 #define soconnect_tpi_tcp 0 155 #define soconnect_tpi_udp 0 156 #define solisten_tpi_tcp 0 157 #define soaccept_tpi_tcp 0 158 #define soaccept_tpi_multioptions 1 159 #endif /* SOCK_TEST */ 160 161 #ifdef SOCK_TEST 162 extern int do_useracc; 163 extern clock_t sock_test_timelimit; 164 #endif /* SOCK_TEST */ 165 166 /* 167 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 168 * applications working. Turn on this flag to disable these checks. 169 */ 170 int xnet_skip_checks = 0; 171 int xnet_check_print = 0; 172 int xnet_truncate_print = 0; 173 174 extern void sigintr(k_sigset_t *, int); 175 extern void sigunintr(k_sigset_t *); 176 177 extern void *nl7c_lookup_addr(void *, t_uscalar_t); 178 extern void *nl7c_add_addr(void *, t_uscalar_t); 179 extern void nl7c_listener_addr(void *, struct sonode *); 180 181 /* Sockets acting as an in-kernel SSL proxy */ 182 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 183 strsigset_t *, strsigset_t *, strpollset_t *); 184 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 185 strsigset_t *, strsigset_t *, strpollset_t *); 186 187 static int sotpi_unbind(struct sonode *, int); 188 189 /* TPI sockfs sonode operations */ 190 static int sotpi_accept(struct sonode *, int, struct sonode **); 191 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 192 int); 193 static int sotpi_connect(struct sonode *, const struct sockaddr *, 194 socklen_t, int, int); 195 static int sotpi_listen(struct sonode *, int); 196 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 197 struct uio *); 198 static int sotpi_shutdown(struct sonode *, int); 199 static int sotpi_getsockname(struct sonode *); 200 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 201 struct uio *, void *, t_uscalar_t, int); 202 static int sodgram_direct(struct sonode *, struct sockaddr *, 203 socklen_t, struct uio *, int); 204 205 sonodeops_t sotpi_sonodeops = { 206 sotpi_accept, /* sop_accept */ 207 sotpi_bind, /* sop_bind */ 208 sotpi_listen, /* sop_listen */ 209 sotpi_connect, /* sop_connect */ 210 sotpi_recvmsg, /* sop_recvmsg */ 211 sotpi_sendmsg, /* sop_sendmsg */ 212 sotpi_getpeername, /* sop_getpeername */ 213 sotpi_getsockname, /* sop_getsockname */ 214 sotpi_shutdown, /* sop_shutdown */ 215 sotpi_getsockopt, /* sop_getsockopt */ 216 sotpi_setsockopt /* sop_setsockopt */ 217 }; 218 219 /* 220 * Common create code for socket and accept. If tso is set the values 221 * from that node is used instead of issuing a T_INFO_REQ. 222 * 223 * Assumes that the caller has a VN_HOLD on accessvp. 224 * The VN_RELE will occur either when sotpi_create() fails or when 225 * the returned sonode is freed. 226 */ 227 struct sonode * 228 sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version, 229 struct sonode *tso, int *errorp) 230 { 231 struct sonode *so; 232 vnode_t *vp; 233 int flags, error; 234 235 ASSERT(accessvp != NULL); 236 vp = makesockvp(accessvp, domain, type, protocol); 237 ASSERT(vp != NULL); 238 so = VTOSO(vp); 239 240 flags = FREAD|FWRITE; 241 242 if ((type == SOCK_STREAM || type == SOCK_DGRAM) && 243 (domain == AF_INET || domain == AF_INET6) && 244 (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 245 protocol == IPPROTO_IP)) { 246 /* Tell tcp or udp that it's talking to sockets */ 247 flags |= SO_SOCKSTR; 248 249 /* 250 * Here we indicate to socktpi_open() our attempt to 251 * make direct calls between sockfs and transport. 252 * The final decision is left to socktpi_open(). 253 */ 254 so->so_state |= SS_DIRECT; 255 256 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 257 if (so->so_type == SOCK_STREAM && tso != NULL) { 258 if (tso->so_state & SS_DIRECT) { 259 /* 260 * Inherit SS_DIRECT from listener and pass 261 * SO_ACCEPTOR open flag to tcp, indicating 262 * that this is an accept fast-path instance. 263 */ 264 flags |= SO_ACCEPTOR; 265 } else { 266 /* 267 * SS_DIRECT is not set on listener, meaning 268 * that the listener has been converted from 269 * a socket to a stream. Ensure that the 270 * acceptor inherits these settings. 271 */ 272 so->so_state &= ~SS_DIRECT; 273 flags &= ~SO_SOCKSTR; 274 } 275 } 276 } 277 278 /* 279 * Tell local transport that it is talking to sockets. 280 */ 281 if (so->so_family == AF_UNIX) { 282 flags |= SO_SOCKSTR; 283 } 284 285 /* Initialize the kernel SSL proxy fields */ 286 so->so_kssl_type = KSSL_NO_PROXY; 287 so->so_kssl_ent = NULL; 288 so->so_kssl_ctx = NULL; 289 290 if (error = socktpi_open(&vp, flags, CRED(), NULL)) { 291 VN_RELE(vp); 292 *errorp = error; 293 return (NULL); 294 } 295 296 if (error = so_strinit(so, tso)) { 297 (void) VOP_CLOSE(vp, 0, 1, 0, CRED(), NULL); 298 VN_RELE(vp); 299 *errorp = error; 300 return (NULL); 301 } 302 303 if (version == SOV_DEFAULT) 304 version = so_default_version; 305 306 so->so_version = (short)version; 307 308 return (so); 309 } 310 311 /* 312 * Bind the socket to an unspecified address in sockfs only. 313 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 314 * required in all cases. 315 */ 316 static void 317 so_automatic_bind(struct sonode *so) 318 { 319 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 320 321 ASSERT(MUTEX_HELD(&so->so_lock)); 322 ASSERT(!(so->so_state & SS_ISBOUND)); 323 ASSERT(so->so_unbind_mp); 324 325 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 326 bzero(so->so_laddr_sa, so->so_laddr_len); 327 so->so_laddr_sa->sa_family = so->so_family; 328 so->so_state |= SS_ISBOUND; 329 } 330 331 332 /* 333 * bind the socket. 334 * 335 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 336 * are passed in we allow rebinding. Note that for backwards compatibility 337 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 338 * Thus the rebinding code is currently not executed. 339 * 340 * The constraints for rebinding are: 341 * - it is a SOCK_DGRAM, or 342 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 343 * and no listen() has been done. 344 * This rebinding code was added based on some language in the XNET book 345 * about not returning EINVAL it the protocol allows rebinding. However, 346 * this language is not present in the Posix socket draft. Thus maybe the 347 * rebinding logic should be deleted from the source. 348 * 349 * A null "name" can be used to unbind the socket if: 350 * - it is a SOCK_DGRAM, or 351 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 352 * and no listen() has been done. 353 */ 354 static int 355 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 356 socklen_t namelen, int backlog, int flags) 357 { 358 struct T_bind_req bind_req; 359 struct T_bind_ack *bind_ack; 360 int error = 0; 361 mblk_t *mp; 362 void *addr; 363 t_uscalar_t addrlen; 364 int unbind_on_err = 1; 365 boolean_t clear_acceptconn_on_err = B_FALSE; 366 boolean_t restore_backlog_on_err = B_FALSE; 367 int save_so_backlog; 368 t_scalar_t PRIM_type = O_T_BIND_REQ; 369 boolean_t tcp_udp_xport; 370 void *nl7c = NULL; 371 372 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 373 so, name, namelen, backlog, flags, 374 pr_state(so->so_state, so->so_mode))); 375 376 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 377 378 if (!(flags & _SOBIND_LOCK_HELD)) { 379 mutex_enter(&so->so_lock); 380 so_lock_single(so); /* Set SOLOCKED */ 381 } else { 382 ASSERT(MUTEX_HELD(&so->so_lock)); 383 ASSERT(so->so_flag & SOLOCKED); 384 } 385 386 /* 387 * Make sure that there is a preallocated unbind_req message 388 * before binding. This message allocated when the socket is 389 * created but it might be have been consumed. 390 */ 391 if (so->so_unbind_mp == NULL) { 392 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 393 /* NOTE: holding so_lock while sleeping */ 394 so->so_unbind_mp = 395 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 396 } 397 398 if (flags & _SOBIND_REBIND) { 399 /* 400 * Called from solisten after doing an sotpi_unbind() or 401 * potentially without the unbind (latter for AF_INET{,6}). 402 */ 403 ASSERT(name == NULL && namelen == 0); 404 405 if (so->so_family == AF_UNIX) { 406 ASSERT(so->so_ux_bound_vp); 407 addr = &so->so_ux_laddr; 408 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 409 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 410 "addr 0x%p, vp %p\n", 411 addrlen, 412 ((struct so_ux_addr *)addr)->soua_vp, 413 so->so_ux_bound_vp)); 414 } else { 415 addr = so->so_laddr_sa; 416 addrlen = (t_uscalar_t)so->so_laddr_len; 417 } 418 } else if (flags & _SOBIND_UNSPEC) { 419 ASSERT(name == NULL && namelen == 0); 420 421 /* 422 * The caller checked SS_ISBOUND but not necessarily 423 * under so_lock 424 */ 425 if (so->so_state & SS_ISBOUND) { 426 /* No error */ 427 goto done; 428 } 429 430 /* Set an initial local address */ 431 switch (so->so_family) { 432 case AF_UNIX: 433 /* 434 * Use an address with same size as struct sockaddr 435 * just like BSD. 436 */ 437 so->so_laddr_len = 438 (socklen_t)sizeof (struct sockaddr); 439 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 440 bzero(so->so_laddr_sa, so->so_laddr_len); 441 so->so_laddr_sa->sa_family = so->so_family; 442 443 /* 444 * Pass down an address with the implicit bind 445 * magic number and the rest all zeros. 446 * The transport will return a unique address. 447 */ 448 so->so_ux_laddr.soua_vp = NULL; 449 so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 450 addr = &so->so_ux_laddr; 451 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 452 break; 453 454 case AF_INET: 455 case AF_INET6: 456 /* 457 * An unspecified bind in TPI has a NULL address. 458 * Set the address in sockfs to have the sa_family. 459 */ 460 so->so_laddr_len = (so->so_family == AF_INET) ? 461 (socklen_t)sizeof (sin_t) : 462 (socklen_t)sizeof (sin6_t); 463 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 464 bzero(so->so_laddr_sa, so->so_laddr_len); 465 so->so_laddr_sa->sa_family = so->so_family; 466 addr = NULL; 467 addrlen = 0; 468 break; 469 470 default: 471 /* 472 * An unspecified bind in TPI has a NULL address. 473 * Set the address in sockfs to be zero length. 474 * 475 * Can not assume there is a sa_family for all 476 * protocol families. For example, AF_X25 does not 477 * have a family field. 478 */ 479 bzero(so->so_laddr_sa, so->so_laddr_len); 480 so->so_laddr_len = 0; /* XXX correct? */ 481 addr = NULL; 482 addrlen = 0; 483 break; 484 } 485 486 } else { 487 if (so->so_state & SS_ISBOUND) { 488 /* 489 * If it is ok to rebind the socket, first unbind 490 * with the transport. A rebind to the NULL address 491 * is interpreted as an unbind. 492 * Note that a bind to NULL in BSD does unbind the 493 * socket but it fails with EINVAL. 494 * Note that regular sockets set SOV_SOCKBSD i.e. 495 * _SOBIND_SOCKBSD gets set here hence no type of 496 * socket does currently allow rebinding. 497 * 498 * If the name is NULL just do an unbind. 499 */ 500 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 501 name != NULL) { 502 error = EINVAL; 503 unbind_on_err = 0; 504 eprintsoline(so, error); 505 goto done; 506 } 507 if ((so->so_mode & SM_CONNREQUIRED) && 508 (so->so_state & SS_CANTREBIND)) { 509 error = EINVAL; 510 unbind_on_err = 0; 511 eprintsoline(so, error); 512 goto done; 513 } 514 error = sotpi_unbind(so, 0); 515 if (error) { 516 eprintsoline(so, error); 517 goto done; 518 } 519 ASSERT(!(so->so_state & SS_ISBOUND)); 520 if (name == NULL) { 521 so->so_state &= 522 ~(SS_ISCONNECTED|SS_ISCONNECTING); 523 goto done; 524 } 525 } 526 /* X/Open requires this check */ 527 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 528 if (xnet_check_print) { 529 printf("sockfs: X/Open bind state check " 530 "caused EINVAL\n"); 531 } 532 error = EINVAL; 533 goto done; 534 } 535 536 switch (so->so_family) { 537 case AF_UNIX: 538 /* 539 * All AF_UNIX addresses are nul terminated 540 * when copied (copyin_name) in so the minimum 541 * length is 3 bytes. 542 */ 543 if (name == NULL || 544 (ssize_t)namelen <= sizeof (short) + 1) { 545 error = EISDIR; 546 eprintsoline(so, error); 547 goto done; 548 } 549 /* 550 * Verify so_family matches the bound family. 551 * BSD does not check this for AF_UNIX resulting 552 * in funny mknods. 553 */ 554 if (name->sa_family != so->so_family) { 555 error = EAFNOSUPPORT; 556 goto done; 557 } 558 break; 559 case AF_INET: 560 if (name == NULL) { 561 error = EINVAL; 562 eprintsoline(so, error); 563 goto done; 564 } 565 if ((size_t)namelen != sizeof (sin_t)) { 566 error = name->sa_family != so->so_family ? 567 EAFNOSUPPORT : EINVAL; 568 eprintsoline(so, error); 569 goto done; 570 } 571 if ((flags & _SOBIND_XPG4_2) && 572 (name->sa_family != so->so_family)) { 573 /* 574 * This check has to be made for X/Open 575 * sockets however application failures have 576 * been observed when it is applied to 577 * all sockets. 578 */ 579 error = EAFNOSUPPORT; 580 eprintsoline(so, error); 581 goto done; 582 } 583 /* 584 * Force a zero sa_family to match so_family. 585 * 586 * Some programs like inetd(1M) don't set the 587 * family field. Other programs leave 588 * sin_family set to garbage - SunOS 4.X does 589 * not check the family field on a bind. 590 * We use the family field that 591 * was passed in to the socket() call. 592 */ 593 name->sa_family = so->so_family; 594 break; 595 596 case AF_INET6: { 597 #ifdef DEBUG 598 sin6_t *sin6 = (sin6_t *)name; 599 #endif /* DEBUG */ 600 601 if (name == NULL) { 602 error = EINVAL; 603 eprintsoline(so, error); 604 goto done; 605 } 606 if ((size_t)namelen != sizeof (sin6_t)) { 607 error = name->sa_family != so->so_family ? 608 EAFNOSUPPORT : EINVAL; 609 eprintsoline(so, error); 610 goto done; 611 } 612 if (name->sa_family != so->so_family) { 613 /* 614 * With IPv6 we require the family to match 615 * unlike in IPv4. 616 */ 617 error = EAFNOSUPPORT; 618 eprintsoline(so, error); 619 goto done; 620 } 621 #ifdef DEBUG 622 /* 623 * Verify that apps don't forget to clear 624 * sin6_scope_id etc 625 */ 626 if (sin6->sin6_scope_id != 0 && 627 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 628 zcmn_err(getzoneid(), CE_WARN, 629 "bind with uninitialized sin6_scope_id " 630 "(%d) on socket. Pid = %d\n", 631 (int)sin6->sin6_scope_id, 632 (int)curproc->p_pid); 633 } 634 if (sin6->__sin6_src_id != 0) { 635 zcmn_err(getzoneid(), CE_WARN, 636 "bind with uninitialized __sin6_src_id " 637 "(%d) on socket. Pid = %d\n", 638 (int)sin6->__sin6_src_id, 639 (int)curproc->p_pid); 640 } 641 #endif /* DEBUG */ 642 break; 643 } 644 default: 645 /* 646 * Don't do any length or sa_family check to allow 647 * non-sockaddr style addresses. 648 */ 649 if (name == NULL) { 650 error = EINVAL; 651 eprintsoline(so, error); 652 goto done; 653 } 654 break; 655 } 656 657 if (namelen > (t_uscalar_t)so->so_laddr_maxlen) { 658 error = ENAMETOOLONG; 659 eprintsoline(so, error); 660 goto done; 661 } 662 /* 663 * Save local address. 664 */ 665 so->so_laddr_len = (socklen_t)namelen; 666 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 667 bcopy(name, so->so_laddr_sa, namelen); 668 669 addr = so->so_laddr_sa; 670 addrlen = (t_uscalar_t)so->so_laddr_len; 671 switch (so->so_family) { 672 case AF_INET6: 673 case AF_INET: 674 break; 675 case AF_UNIX: { 676 struct sockaddr_un *soun = 677 (struct sockaddr_un *)so->so_laddr_sa; 678 struct vnode *vp; 679 struct vattr vattr; 680 681 ASSERT(so->so_ux_bound_vp == NULL); 682 /* 683 * Create vnode for the specified path name. 684 * Keep vnode held with a reference in so_ux_bound_vp. 685 * Use the vnode pointer as the address used in the 686 * bind with the transport. 687 * 688 * Use the same mode as in BSD. In particular this does 689 * not observe the umask. 690 */ 691 /* MAXPATHLEN + soun_family + nul termination */ 692 if (so->so_laddr_len > 693 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 694 error = ENAMETOOLONG; 695 eprintsoline(so, error); 696 goto done; 697 } 698 vattr.va_type = VSOCK; 699 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 700 vattr.va_mask = AT_TYPE|AT_MODE; 701 /* NOTE: holding so_lock */ 702 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 703 EXCL, 0, &vp, CRMKNOD, 0, 0); 704 if (error) { 705 if (error == EEXIST) 706 error = EADDRINUSE; 707 eprintsoline(so, error); 708 goto done; 709 } 710 /* 711 * Establish pointer from the underlying filesystem 712 * vnode to the socket node. 713 * so_ux_bound_vp and v_stream->sd_vnode form the 714 * cross-linkage between the underlying filesystem 715 * node and the socket node. 716 */ 717 ASSERT(SOTOV(so)->v_stream); 718 mutex_enter(&vp->v_lock); 719 vp->v_stream = SOTOV(so)->v_stream; 720 so->so_ux_bound_vp = vp; 721 mutex_exit(&vp->v_lock); 722 723 /* 724 * Use the vnode pointer value as a unique address 725 * (together with the magic number to avoid conflicts 726 * with implicit binds) in the transport provider. 727 */ 728 so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp; 729 so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 730 addr = &so->so_ux_laddr; 731 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 732 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 733 addrlen, 734 ((struct so_ux_addr *)addr)->soua_vp)); 735 break; 736 } 737 } /* end switch (so->so_family) */ 738 } 739 740 /* 741 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 742 * the transport can start passing up T_CONN_IND messages 743 * as soon as it receives the bind req and strsock_proto() 744 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 745 */ 746 if (flags & _SOBIND_LISTEN) { 747 if ((so->so_state & SS_ACCEPTCONN) == 0) 748 clear_acceptconn_on_err = B_TRUE; 749 save_so_backlog = so->so_backlog; 750 restore_backlog_on_err = B_TRUE; 751 so->so_state |= SS_ACCEPTCONN; 752 so->so_backlog = backlog; 753 } 754 755 /* 756 * If NL7C addr(s) have been configured check for addr/port match, 757 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 758 * 759 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 760 * family sockets only. If match mark as such. 761 */ 762 if (nl7c_enabled && ((addr != NULL && 763 (so->so_family == AF_INET || so->so_family == AF_INET6) && 764 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 765 so->so_nl7c_flags == NL7C_AF_NCA)) { 766 /* 767 * NL7C is not supported in non-global zones, 768 * we enforce this restriction here. 769 */ 770 if (so->so_zoneid == GLOBAL_ZONEID) { 771 /* An NL7C socket, mark it */ 772 so->so_nl7c_flags |= NL7C_ENABLED; 773 if (nl7c == NULL) { 774 /* 775 * Was an AF_NCA bind() so add it to the 776 * addr list for reporting purposes. 777 */ 778 nl7c = nl7c_add_addr(addr, addrlen); 779 } 780 } else 781 nl7c = NULL; 782 } 783 /* 784 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 785 * for other transports we will send in a O_T_BIND_REQ. 786 */ 787 if (tcp_udp_xport && 788 (so->so_family == AF_INET || so->so_family == AF_INET6)) 789 PRIM_type = T_BIND_REQ; 790 791 bind_req.PRIM_type = PRIM_type; 792 bind_req.ADDR_length = addrlen; 793 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 794 bind_req.CONIND_number = backlog; 795 /* NOTE: holding so_lock while sleeping */ 796 mp = soallocproto2(&bind_req, sizeof (bind_req), 797 addr, addrlen, 0, _ALLOC_SLEEP); 798 so->so_state &= ~SS_LADDR_VALID; 799 800 /* Done using so_laddr_sa - can drop the lock */ 801 mutex_exit(&so->so_lock); 802 803 /* 804 * Intercept the bind_req message here to check if this <address/port> 805 * was configured as an SSL proxy server, or if another endpoint was 806 * already configured to act as a proxy for us. 807 * 808 * Note, only if NL7C not enabled for this socket. 809 */ 810 if (nl7c == NULL && 811 (so->so_family == AF_INET || so->so_family == AF_INET6) && 812 so->so_type == SOCK_STREAM) { 813 814 if (so->so_kssl_ent != NULL) { 815 kssl_release_ent(so->so_kssl_ent, so, so->so_kssl_type); 816 so->so_kssl_ent = NULL; 817 } 818 819 so->so_kssl_type = kssl_check_proxy(mp, so, &so->so_kssl_ent); 820 switch (so->so_kssl_type) { 821 case KSSL_NO_PROXY: 822 break; 823 824 case KSSL_HAS_PROXY: 825 mutex_enter(&so->so_lock); 826 goto skip_transport; 827 828 case KSSL_IS_PROXY: 829 break; 830 } 831 } 832 833 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 834 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 835 if (error) { 836 eprintsoline(so, error); 837 mutex_enter(&so->so_lock); 838 goto done; 839 } 840 841 mutex_enter(&so->so_lock); 842 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 843 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 844 if (error) { 845 eprintsoline(so, error); 846 goto done; 847 } 848 skip_transport: 849 ASSERT(mp); 850 /* 851 * Even if some TPI message (e.g. T_DISCON_IND) was received in 852 * strsock_proto while the lock was dropped above, the bind 853 * is allowed to complete. 854 */ 855 856 /* Mark as bound. This will be undone if we detect errors below. */ 857 if (flags & _SOBIND_NOXLATE) { 858 ASSERT(so->so_family == AF_UNIX); 859 so->so_state |= SS_FADDR_NOXLATE; 860 } 861 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 862 so->so_state |= SS_ISBOUND; 863 ASSERT(so->so_unbind_mp); 864 865 /* note that we've already set SS_ACCEPTCONN above */ 866 867 /* 868 * Recompute addrlen - an unspecied bind sent down an 869 * address of length zero but we expect the appropriate length 870 * in return. 871 */ 872 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 873 sizeof (so->so_ux_laddr) : so->so_laddr_len); 874 875 bind_ack = (struct T_bind_ack *)mp->b_rptr; 876 /* 877 * The alignment restriction is really too strict but 878 * we want enough alignment to inspect the fields of 879 * a sockaddr_in. 880 */ 881 addr = sogetoff(mp, bind_ack->ADDR_offset, 882 bind_ack->ADDR_length, 883 __TPI_ALIGN_SIZE); 884 if (addr == NULL) { 885 freemsg(mp); 886 error = EPROTO; 887 eprintsoline(so, error); 888 goto done; 889 } 890 if (!(flags & _SOBIND_UNSPEC)) { 891 /* 892 * Verify that the transport didn't return something we 893 * did not want e.g. an address other than what we asked for. 894 * 895 * NOTE: These checks would go away if/when we switch to 896 * using the new TPI (in which the transport would fail 897 * the request instead of assigning a different address). 898 * 899 * NOTE2: For protocols that we don't know (i.e. any 900 * other than AF_INET6, AF_INET and AF_UNIX), we 901 * cannot know if the transport should be expected to 902 * return the same address as that requested. 903 * 904 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 905 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 906 * 907 * For example, in the case of netatalk it may be 908 * inappropriate for the transport to return the 909 * requested address (as it may have allocated a local 910 * port number in behaviour similar to that of an 911 * AF_INET bind request with a port number of zero). 912 * 913 * Given the definition of O_T_BIND_REQ, where the 914 * transport may bind to an address other than the 915 * requested address, it's not possible to determine 916 * whether a returned address that differs from the 917 * requested address is a reason to fail (because the 918 * requested address was not available) or succeed 919 * (because the transport allocated an appropriate 920 * address and/or port). 921 * 922 * sockfs currently requires that the transport return 923 * the requested address in the T_BIND_ACK, unless 924 * there is code here to allow for any discrepancy. 925 * Such code exists for AF_INET and AF_INET6. 926 * 927 * Netatalk chooses to return the requested address 928 * rather than the (correct) allocated address. This 929 * means that netatalk violates the TPI specification 930 * (and would not function correctly if used from a 931 * TLI application), but it does mean that it works 932 * with sockfs. 933 * 934 * As noted above, using the newer XTI bind primitive 935 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 936 * allow sockfs to be more sure about whether or not 937 * the bind request had succeeded (as transports are 938 * not permitted to bind to a different address than 939 * that requested - they must return failure). 940 * Unfortunately, support for T_BIND_REQ may not be 941 * present in all transport implementations (netatalk, 942 * for example, doesn't have it), making the 943 * transition difficult. 944 */ 945 if (bind_ack->ADDR_length != addrlen) { 946 /* Assumes that the requested address was in use */ 947 freemsg(mp); 948 error = EADDRINUSE; 949 eprintsoline(so, error); 950 goto done; 951 } 952 953 switch (so->so_family) { 954 case AF_INET6: 955 case AF_INET: { 956 sin_t *rname, *aname; 957 958 rname = (sin_t *)addr; 959 aname = (sin_t *)so->so_laddr_sa; 960 961 /* 962 * Take advantage of the alignment 963 * of sin_port and sin6_port which fall 964 * in the same place in their data structures. 965 * Just use sin_port for either address family. 966 * 967 * This may become a problem if (heaven forbid) 968 * there's a separate ipv6port_reserved... :-P 969 * 970 * Binding to port 0 has the semantics of letting 971 * the transport bind to any port. 972 * 973 * If the transport is TCP or UDP since we had sent 974 * a T_BIND_REQ we would not get a port other than 975 * what we asked for. 976 */ 977 if (tcp_udp_xport) { 978 /* 979 * Pick up the new port number if we bound to 980 * port 0. 981 */ 982 if (aname->sin_port == 0) 983 aname->sin_port = rname->sin_port; 984 so->so_state |= SS_LADDR_VALID; 985 break; 986 } 987 if (aname->sin_port != 0 && 988 aname->sin_port != rname->sin_port) { 989 freemsg(mp); 990 error = EADDRINUSE; 991 eprintsoline(so, error); 992 goto done; 993 } 994 /* 995 * Pick up the new port number if we bound to port 0. 996 */ 997 aname->sin_port = rname->sin_port; 998 999 /* 1000 * Unfortunately, addresses aren't _quite_ the same. 1001 */ 1002 if (so->so_family == AF_INET) { 1003 if (aname->sin_addr.s_addr != 1004 rname->sin_addr.s_addr) { 1005 freemsg(mp); 1006 error = EADDRNOTAVAIL; 1007 eprintsoline(so, error); 1008 goto done; 1009 } 1010 } else { 1011 sin6_t *rname6 = (sin6_t *)rname; 1012 sin6_t *aname6 = (sin6_t *)aname; 1013 1014 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1015 &rname6->sin6_addr)) { 1016 freemsg(mp); 1017 error = EADDRNOTAVAIL; 1018 eprintsoline(so, error); 1019 goto done; 1020 } 1021 } 1022 break; 1023 } 1024 case AF_UNIX: 1025 if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) { 1026 freemsg(mp); 1027 error = EADDRINUSE; 1028 eprintsoline(so, error); 1029 eprintso(so, 1030 ("addrlen %d, addr 0x%x, vp %p\n", 1031 addrlen, *((int *)addr), 1032 so->so_ux_bound_vp)); 1033 goto done; 1034 } 1035 so->so_state |= SS_LADDR_VALID; 1036 break; 1037 default: 1038 /* 1039 * NOTE: This assumes that addresses can be 1040 * byte-compared for equivalence. 1041 */ 1042 if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) { 1043 freemsg(mp); 1044 error = EADDRINUSE; 1045 eprintsoline(so, error); 1046 goto done; 1047 } 1048 /* 1049 * Don't mark SS_LADDR_VALID, as we cannot be 1050 * sure that the returned address is the real 1051 * bound address when talking to an unknown 1052 * transport. 1053 */ 1054 break; 1055 } 1056 } else { 1057 /* 1058 * Save for returned address for getsockname. 1059 * Needed for unspecific bind unless transport supports 1060 * the TI_GETMYNAME ioctl. 1061 * Do this for AF_INET{,6} even though they do, as 1062 * caching info here is much better performance than 1063 * a TPI/STREAMS trip to the transport for getsockname. 1064 * Any which can't for some reason _must_ _not_ set 1065 * LADDR_VALID here for the caching version of getsockname 1066 * to not break; 1067 */ 1068 switch (so->so_family) { 1069 case AF_UNIX: 1070 /* 1071 * Record the address bound with the transport 1072 * for use by socketpair. 1073 */ 1074 bcopy(addr, &so->so_ux_laddr, addrlen); 1075 so->so_state |= SS_LADDR_VALID; 1076 break; 1077 case AF_INET: 1078 case AF_INET6: 1079 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 1080 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 1081 so->so_state |= SS_LADDR_VALID; 1082 break; 1083 default: 1084 /* 1085 * Don't mark SS_LADDR_VALID, as we cannot be 1086 * sure that the returned address is the real 1087 * bound address when talking to an unknown 1088 * transport. 1089 */ 1090 break; 1091 } 1092 } 1093 1094 if (nl7c != NULL) { 1095 /* Register listen()er sonode pointer with NL7C */ 1096 nl7c_listener_addr(nl7c, so); 1097 } 1098 1099 freemsg(mp); 1100 1101 done: 1102 if (error) { 1103 /* reset state & backlog to values held on entry */ 1104 if (clear_acceptconn_on_err == B_TRUE) 1105 so->so_state &= ~SS_ACCEPTCONN; 1106 if (restore_backlog_on_err == B_TRUE) 1107 so->so_backlog = save_so_backlog; 1108 1109 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1110 int err; 1111 1112 err = sotpi_unbind(so, 0); 1113 /* LINTED - statement has no consequent: if */ 1114 if (err) { 1115 eprintsoline(so, error); 1116 } else { 1117 ASSERT(!(so->so_state & SS_ISBOUND)); 1118 } 1119 } 1120 } 1121 if (!(flags & _SOBIND_LOCK_HELD)) { 1122 so_unlock_single(so, SOLOCKED); 1123 mutex_exit(&so->so_lock); 1124 } else { 1125 /* If the caller held the lock don't release it here */ 1126 ASSERT(MUTEX_HELD(&so->so_lock)); 1127 ASSERT(so->so_flag & SOLOCKED); 1128 } 1129 return (error); 1130 } 1131 1132 /* bind the socket */ 1133 static int 1134 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1135 int flags) 1136 { 1137 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1138 return (sotpi_bindlisten(so, name, namelen, 0, flags)); 1139 1140 flags &= ~_SOBIND_SOCKETPAIR; 1141 return (sotpi_bindlisten(so, name, namelen, 1, flags)); 1142 } 1143 1144 /* 1145 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1146 * address, or when listen needs to unbind and bind. 1147 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1148 * so that a sobind can pick them up. 1149 */ 1150 static int 1151 sotpi_unbind(struct sonode *so, int flags) 1152 { 1153 struct T_unbind_req unbind_req; 1154 int error = 0; 1155 mblk_t *mp; 1156 1157 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1158 so, flags, pr_state(so->so_state, so->so_mode))); 1159 1160 ASSERT(MUTEX_HELD(&so->so_lock)); 1161 ASSERT(so->so_flag & SOLOCKED); 1162 1163 if (!(so->so_state & SS_ISBOUND)) { 1164 error = EINVAL; 1165 eprintsoline(so, error); 1166 goto done; 1167 } 1168 1169 mutex_exit(&so->so_lock); 1170 1171 /* 1172 * Flush the read and write side (except stream head read queue) 1173 * and send down T_UNBIND_REQ. 1174 */ 1175 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1176 1177 unbind_req.PRIM_type = T_UNBIND_REQ; 1178 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1179 0, _ALLOC_SLEEP); 1180 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1181 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1182 mutex_enter(&so->so_lock); 1183 if (error) { 1184 eprintsoline(so, error); 1185 goto done; 1186 } 1187 1188 error = sowaitokack(so, T_UNBIND_REQ); 1189 if (error) { 1190 eprintsoline(so, error); 1191 goto done; 1192 } 1193 1194 /* 1195 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1196 * strsock_proto while the lock was dropped above, the unbind 1197 * is allowed to complete. 1198 */ 1199 if (!(flags & _SOUNBIND_REBIND)) { 1200 /* 1201 * Clear out bound address. 1202 */ 1203 vnode_t *vp; 1204 1205 if ((vp = so->so_ux_bound_vp) != NULL) { 1206 1207 /* Undo any SSL proxy setup */ 1208 if ((so->so_family == AF_INET || 1209 so->so_family == AF_INET6) && 1210 (so->so_type == SOCK_STREAM) && 1211 (so->so_kssl_ent != NULL)) { 1212 kssl_release_ent(so->so_kssl_ent, so, 1213 so->so_kssl_type); 1214 so->so_kssl_ent = NULL; 1215 so->so_kssl_type = KSSL_NO_PROXY; 1216 } 1217 1218 so->so_ux_bound_vp = NULL; 1219 vn_rele_stream(vp); 1220 } 1221 /* Clear out address */ 1222 so->so_laddr_len = 0; 1223 } 1224 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1225 1226 done: 1227 1228 /* If the caller held the lock don't release it here */ 1229 ASSERT(MUTEX_HELD(&so->so_lock)); 1230 ASSERT(so->so_flag & SOLOCKED); 1231 1232 return (error); 1233 } 1234 1235 /* 1236 * listen on the socket. 1237 * For TPI conforming transports this has to first unbind with the transport 1238 * and then bind again using the new backlog. 1239 */ 1240 int 1241 sotpi_listen(struct sonode *so, int backlog) 1242 { 1243 int error = 0; 1244 1245 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1246 so, backlog, pr_state(so->so_state, so->so_mode))); 1247 1248 if (so->so_serv_type == T_CLTS) 1249 return (EOPNOTSUPP); 1250 1251 /* 1252 * If the socket is ready to accept connections already, then 1253 * return without doing anything. This avoids a problem where 1254 * a second listen() call fails if a connection is pending and 1255 * leaves the socket unbound. Only when we are not unbinding 1256 * with the transport can we safely increase the backlog. 1257 */ 1258 if (so->so_state & SS_ACCEPTCONN && 1259 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1260 /*CONSTCOND*/ 1261 !solisten_tpi_tcp)) 1262 return (0); 1263 1264 if (so->so_state & SS_ISCONNECTED) 1265 return (EINVAL); 1266 1267 mutex_enter(&so->so_lock); 1268 so_lock_single(so); /* Set SOLOCKED */ 1269 1270 if (backlog < 0) 1271 backlog = 0; 1272 /* 1273 * Use the same qlimit as in BSD. BSD checks the qlimit 1274 * before queuing the next connection implying that a 1275 * listen(sock, 0) allows one connection to be queued. 1276 * BSD also uses 1.5 times the requested backlog. 1277 * 1278 * XNS Issue 4 required a strict interpretation of the backlog. 1279 * This has been waived subsequently for Issue 4 and the change 1280 * incorporated in XNS Issue 5. So we aren't required to do 1281 * anything special for XPG apps. 1282 */ 1283 if (backlog >= (INT_MAX - 1) / 3) 1284 backlog = INT_MAX; 1285 else 1286 backlog = backlog * 3 / 2 + 1; 1287 1288 /* 1289 * If the listen doesn't change the backlog we do nothing. 1290 * This avoids an EPROTO error from the transport. 1291 */ 1292 if ((so->so_state & SS_ACCEPTCONN) && 1293 so->so_backlog == backlog) 1294 goto done; 1295 1296 if (!(so->so_state & SS_ISBOUND)) { 1297 /* 1298 * Must have been explicitly bound in the UNIX domain. 1299 */ 1300 if (so->so_family == AF_UNIX) { 1301 error = EINVAL; 1302 goto done; 1303 } 1304 error = sotpi_bindlisten(so, NULL, 0, backlog, 1305 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1306 } else if (backlog > 0) { 1307 /* 1308 * AF_INET{,6} hack to avoid losing the port. 1309 * Assumes that all AF_INET{,6} transports can handle a 1310 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1311 * has already bound thus it is possible to avoid the unbind. 1312 */ 1313 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1314 /*CONSTCOND*/ 1315 !solisten_tpi_tcp)) { 1316 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1317 if (error) 1318 goto done; 1319 } 1320 error = sotpi_bindlisten(so, NULL, 0, backlog, 1321 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1322 } else { 1323 so->so_state |= SS_ACCEPTCONN; 1324 so->so_backlog = backlog; 1325 } 1326 if (error) 1327 goto done; 1328 ASSERT(so->so_state & SS_ACCEPTCONN); 1329 done: 1330 so_unlock_single(so, SOLOCKED); 1331 mutex_exit(&so->so_lock); 1332 return (error); 1333 } 1334 1335 /* 1336 * Disconnect either a specified seqno or all (-1). 1337 * The former is used on listening sockets only. 1338 * 1339 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1340 * the current use of sodisconnect(seqno == -1) is only for shutdown 1341 * so there is no point (and potentially incorrect) to unbind. 1342 */ 1343 int 1344 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1345 { 1346 struct T_discon_req discon_req; 1347 int error = 0; 1348 mblk_t *mp; 1349 1350 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1351 so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1352 1353 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1354 mutex_enter(&so->so_lock); 1355 so_lock_single(so); /* Set SOLOCKED */ 1356 } else { 1357 ASSERT(MUTEX_HELD(&so->so_lock)); 1358 ASSERT(so->so_flag & SOLOCKED); 1359 } 1360 1361 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1362 error = EINVAL; 1363 eprintsoline(so, error); 1364 goto done; 1365 } 1366 1367 mutex_exit(&so->so_lock); 1368 /* 1369 * Flush the write side (unless this is a listener) 1370 * and then send down a T_DISCON_REQ. 1371 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1372 * and other messages.) 1373 */ 1374 if (!(so->so_state & SS_ACCEPTCONN)) 1375 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1376 1377 discon_req.PRIM_type = T_DISCON_REQ; 1378 discon_req.SEQ_number = seqno; 1379 mp = soallocproto1(&discon_req, sizeof (discon_req), 1380 0, _ALLOC_SLEEP); 1381 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1382 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1383 mutex_enter(&so->so_lock); 1384 if (error) { 1385 eprintsoline(so, error); 1386 goto done; 1387 } 1388 1389 error = sowaitokack(so, T_DISCON_REQ); 1390 if (error) { 1391 eprintsoline(so, error); 1392 goto done; 1393 } 1394 /* 1395 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1396 * strsock_proto while the lock was dropped above, the disconnect 1397 * is allowed to complete. However, it is not possible to 1398 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1399 */ 1400 so->so_state &= 1401 ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID); 1402 done: 1403 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1404 so_unlock_single(so, SOLOCKED); 1405 mutex_exit(&so->so_lock); 1406 } else { 1407 /* If the caller held the lock don't release it here */ 1408 ASSERT(MUTEX_HELD(&so->so_lock)); 1409 ASSERT(so->so_flag & SOLOCKED); 1410 } 1411 return (error); 1412 } 1413 1414 int 1415 sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) 1416 { 1417 struct T_conn_ind *conn_ind; 1418 struct T_conn_res *conn_res; 1419 int error = 0; 1420 mblk_t *mp, *ctxmp, *ack_mp; 1421 struct sonode *nso; 1422 vnode_t *nvp; 1423 void *src; 1424 t_uscalar_t srclen; 1425 void *opt; 1426 t_uscalar_t optlen; 1427 t_scalar_t PRIM_type; 1428 t_scalar_t SEQ_number; 1429 size_t sinlen; 1430 1431 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1432 so, fflag, nsop, pr_state(so->so_state, so->so_mode))); 1433 1434 /* 1435 * Defer single-threading the accepting socket until 1436 * the T_CONN_IND has been received and parsed and the 1437 * new sonode has been opened. 1438 */ 1439 1440 /* Check that we are not already connected */ 1441 if ((so->so_state & SS_ACCEPTCONN) == 0) 1442 goto conn_bad; 1443 again: 1444 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1445 goto e_bad; 1446 1447 ASSERT(mp); 1448 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1449 ctxmp = mp->b_cont; 1450 1451 /* 1452 * Save SEQ_number for error paths. 1453 */ 1454 SEQ_number = conn_ind->SEQ_number; 1455 1456 srclen = conn_ind->SRC_length; 1457 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1458 if (src == NULL) { 1459 error = EPROTO; 1460 freemsg(mp); 1461 eprintsoline(so, error); 1462 goto disconnect_unlocked; 1463 } 1464 optlen = conn_ind->OPT_length; 1465 switch (so->so_family) { 1466 case AF_INET: 1467 case AF_INET6: 1468 if ((optlen == sizeof (intptr_t)) && 1469 ((so->so_state & SS_DIRECT) != 0)) { 1470 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1471 &opt, conn_ind->OPT_length); 1472 } else { 1473 /* 1474 * The transport (in this case TCP) hasn't sent up 1475 * a pointer to an instance for the accept fast-path. 1476 * Disable fast-path completely because the call to 1477 * sotpi_create() below would otherwise create an 1478 * incomplete TCP instance, which would lead to 1479 * problems when sockfs sends a normal T_CONN_RES 1480 * message down the new stream. 1481 */ 1482 if (so->so_state & SS_DIRECT) { 1483 int rval; 1484 /* 1485 * For consistency we inform tcp to disable 1486 * direct interface on the listener, though 1487 * we can certainly live without doing this 1488 * because no data will ever travel upstream 1489 * on the listening socket. 1490 */ 1491 so->so_state &= ~SS_DIRECT; 1492 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1493 0, 0, K_TO_K, CRED(), &rval); 1494 } 1495 opt = NULL; 1496 optlen = 0; 1497 } 1498 break; 1499 case AF_UNIX: 1500 default: 1501 if (optlen != 0) { 1502 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1503 __TPI_ALIGN_SIZE); 1504 if (opt == NULL) { 1505 error = EPROTO; 1506 freemsg(mp); 1507 eprintsoline(so, error); 1508 goto disconnect_unlocked; 1509 } 1510 } 1511 if (so->so_family == AF_UNIX) { 1512 if (!(so->so_state & SS_FADDR_NOXLATE)) { 1513 src = NULL; 1514 srclen = 0; 1515 } 1516 /* Extract src address from options */ 1517 if (optlen != 0) 1518 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1519 } 1520 break; 1521 } 1522 1523 /* 1524 * Create the new socket. 1525 */ 1526 VN_HOLD(so->so_accessvp); 1527 nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, 1528 so->so_protocol, so->so_version, so, &error); 1529 if (nso == NULL) { 1530 ASSERT(error != 0); 1531 /* 1532 * Accept can not fail with ENOBUFS. sotpi_create 1533 * sleeps waiting for memory until a signal is caught 1534 * so return EINTR. 1535 */ 1536 freemsg(mp); 1537 if (error == ENOBUFS) 1538 error = EINTR; 1539 goto e_disc_unl; 1540 } 1541 nvp = SOTOV(nso); 1542 1543 /* 1544 * If the transport sent up an SSL connection context, then attach 1545 * it the new socket, and set the (sd_wputdatafunc)() and 1546 * (sd_rputdatafunc)() stream head hooks to intercept and process 1547 * SSL records. 1548 */ 1549 if (ctxmp != NULL) { 1550 /* 1551 * This kssl_ctx_t is already held for us by the transport. 1552 * So, we don't need to do a kssl_hold_ctx() here. 1553 */ 1554 nso->so_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1555 freemsg(ctxmp); 1556 mp->b_cont = NULL; 1557 strsetrwputdatahooks(nvp, strsock_kssl_input, 1558 strsock_kssl_output); 1559 } 1560 #ifdef DEBUG 1561 /* 1562 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1563 * it's inherited early to allow debugging of the accept code itself. 1564 */ 1565 nso->so_options |= so->so_options & SO_DEBUG; 1566 #endif /* DEBUG */ 1567 1568 /* 1569 * Save the SRC address from the T_CONN_IND 1570 * for getpeername to work on AF_UNIX and on transports that do not 1571 * support TI_GETPEERNAME. 1572 * 1573 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1574 * copyin_name(). 1575 */ 1576 if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) { 1577 error = EINVAL; 1578 freemsg(mp); 1579 eprintsoline(so, error); 1580 goto disconnect_vp_unlocked; 1581 } 1582 nso->so_faddr_len = (socklen_t)srclen; 1583 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1584 bcopy(src, nso->so_faddr_sa, srclen); 1585 nso->so_state |= SS_FADDR_VALID; 1586 1587 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1588 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1589 cred_t *cr; 1590 1591 if ((cr = DB_CRED(mp)) != NULL) { 1592 crhold(cr); 1593 nso->so_peercred = cr; 1594 nso->so_cpid = DB_CPID(mp); 1595 } 1596 freemsg(mp); 1597 1598 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1599 sizeof (intptr_t), 0, _ALLOC_INTR); 1600 if (mp == NULL) { 1601 /* 1602 * Accept can not fail with ENOBUFS. 1603 * A signal was caught so return EINTR. 1604 */ 1605 error = EINTR; 1606 eprintsoline(so, error); 1607 goto disconnect_vp_unlocked; 1608 } 1609 conn_res = (struct T_conn_res *)mp->b_rptr; 1610 } else { 1611 nso->so_peercred = DB_CRED(mp); 1612 nso->so_cpid = DB_CPID(mp); 1613 DB_CRED(mp) = NULL; 1614 1615 mp->b_rptr = DB_BASE(mp); 1616 conn_res = (struct T_conn_res *)mp->b_rptr; 1617 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1618 } 1619 1620 /* 1621 * New socket must be bound at least in sockfs and, except for AF_INET, 1622 * (or AF_INET6) it also has to be bound in the transport provider. 1623 * We set the local address in the sonode from the T_OK_ACK of the 1624 * T_CONN_RES. For this reason the address we bind to here isn't 1625 * important. 1626 */ 1627 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1628 /*CONSTCOND*/ 1629 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1630 /* 1631 * Optimization for AF_INET{,6} transports 1632 * that can handle a T_CONN_RES without being bound. 1633 */ 1634 mutex_enter(&nso->so_lock); 1635 so_automatic_bind(nso); 1636 mutex_exit(&nso->so_lock); 1637 } else { 1638 /* Perform NULL bind with the transport provider. */ 1639 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) { 1640 ASSERT(error != ENOBUFS); 1641 freemsg(mp); 1642 eprintsoline(nso, error); 1643 goto disconnect_vp_unlocked; 1644 } 1645 } 1646 1647 /* 1648 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1649 * so that any data arriving on the new socket will cause the 1650 * appropriate signals to be delivered for the new socket. 1651 * 1652 * No other thread (except strsock_proto and strsock_misc) 1653 * can access the new socket thus we relax the locking. 1654 */ 1655 nso->so_pgrp = so->so_pgrp; 1656 nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE); 1657 1658 if (nso->so_pgrp != 0) { 1659 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1660 eprintsoline(nso, error); 1661 error = 0; 1662 nso->so_pgrp = 0; 1663 } 1664 } 1665 1666 /* 1667 * Make note of the socket level options. TCP and IP level options 1668 * are already inherited. We could do all this after accept is 1669 * successful but doing it here simplifies code and no harm done 1670 * for error case. 1671 */ 1672 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1673 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1674 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1675 nso->so_sndbuf = so->so_sndbuf; 1676 nso->so_rcvbuf = so->so_rcvbuf; 1677 if (nso->so_options & SO_LINGER) 1678 nso->so_linger = so->so_linger; 1679 1680 if ((so->so_state & SS_DIRECT) != 0) { 1681 1682 ASSERT(opt != NULL); 1683 1684 conn_res->OPT_length = optlen; 1685 conn_res->OPT_offset = MBLKL(mp); 1686 bcopy(&opt, mp->b_wptr, optlen); 1687 mp->b_wptr += optlen; 1688 conn_res->PRIM_type = T_CONN_RES; 1689 conn_res->ACCEPTOR_id = 0; 1690 PRIM_type = T_CONN_RES; 1691 1692 /* Send down the T_CONN_RES on acceptor STREAM */ 1693 error = kstrputmsg(SOTOV(nso), mp, NULL, 1694 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1695 if (error) { 1696 mutex_enter(&so->so_lock); 1697 so_lock_single(so); 1698 eprintsoline(so, error); 1699 goto disconnect_vp; 1700 } 1701 mutex_enter(&nso->so_lock); 1702 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1703 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1704 if (error) { 1705 mutex_exit(&nso->so_lock); 1706 mutex_enter(&so->so_lock); 1707 so_lock_single(so); 1708 eprintsoline(so, error); 1709 goto disconnect_vp; 1710 } 1711 if (nso->so_family == AF_INET) { 1712 sin_t *sin; 1713 1714 sin = (sin_t *)(ack_mp->b_rptr + 1715 sizeof (struct T_ok_ack)); 1716 bcopy(sin, nso->so_laddr_sa, sizeof (sin_t)); 1717 nso->so_laddr_len = sizeof (sin_t); 1718 } else { 1719 sin6_t *sin6; 1720 1721 sin6 = (sin6_t *)(ack_mp->b_rptr + 1722 sizeof (struct T_ok_ack)); 1723 bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t)); 1724 nso->so_laddr_len = sizeof (sin6_t); 1725 } 1726 freemsg(ack_mp); 1727 1728 nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID; 1729 nso->so_priv = opt; 1730 1731 if (so->so_nl7c_flags & NL7C_ENABLED) { 1732 /* 1733 * A NL7C marked listen()er so the new socket 1734 * inherits the listen()er's NL7C state, except 1735 * for NL7C_POLLIN. 1736 * 1737 * Only call NL7C to process the new socket if 1738 * the listen socket allows blocking i/o. 1739 */ 1740 nso->so_nl7c_flags = so->so_nl7c_flags & (~NL7C_POLLIN); 1741 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1742 /* 1743 * Nonblocking accept() just make it 1744 * persist to defer processing to the 1745 * read-side syscall (e.g. read). 1746 */ 1747 nso->so_nl7c_flags |= NL7C_SOPERSIST; 1748 } else if (nl7c_process(nso, B_FALSE)) { 1749 /* 1750 * NL7C has completed processing on the 1751 * socket, close the socket and back to 1752 * the top to await the next T_CONN_IND. 1753 */ 1754 mutex_exit(&nso->so_lock); 1755 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1756 CRED(), NULL); 1757 VN_RELE(nvp); 1758 goto again; 1759 } 1760 /* Pass the new socket out */ 1761 } 1762 1763 mutex_exit(&nso->so_lock); 1764 1765 /* 1766 * It's possible, through the use of autopush for example, 1767 * that the acceptor stream may not support SS_DIRECT 1768 * semantics. If the new socket does not support SS_DIRECT 1769 * we issue a _SIOCSOCKFALLBACK to inform the transport 1770 * as we would in the I_PUSH case. 1771 */ 1772 if (!(nso->so_state & SS_DIRECT)) { 1773 int rval; 1774 1775 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 1776 0, 0, K_TO_K, CRED(), &rval)) != 0) { 1777 mutex_enter(&so->so_lock); 1778 so_lock_single(so); 1779 eprintsoline(so, error); 1780 goto disconnect_vp; 1781 } 1782 } 1783 1784 /* 1785 * Pass out new socket. 1786 */ 1787 if (nsop != NULL) 1788 *nsop = nso; 1789 1790 return (0); 1791 } 1792 1793 /* 1794 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1795 * which don't support the FireEngine accept fast-path. It is also 1796 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1797 * again. Neither sockfs nor TCP attempt to find out if some other 1798 * random module has been inserted in between (in which case we 1799 * should follow TLI accept behaviour). We blindly assume the worst 1800 * case and revert back to old behaviour i.e. TCP will not send us 1801 * any option (eager) and the accept should happen on the listener 1802 * queue. Any queued T_conn_ind have already got their options removed 1803 * by so_sock2_stream() when "sockmod" was I_POP'd. 1804 */ 1805 /* 1806 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1807 */ 1808 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1809 #ifdef _ILP32 1810 queue_t *q; 1811 1812 /* 1813 * Find read queue in driver 1814 * Can safely do this since we "own" nso/nvp. 1815 */ 1816 q = strvp2wq(nvp)->q_next; 1817 while (SAMESTR(q)) 1818 q = q->q_next; 1819 q = RD(q); 1820 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1821 #else 1822 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1823 #endif /* _ILP32 */ 1824 conn_res->PRIM_type = O_T_CONN_RES; 1825 PRIM_type = O_T_CONN_RES; 1826 } else { 1827 conn_res->ACCEPTOR_id = nso->so_acceptor_id; 1828 conn_res->PRIM_type = T_CONN_RES; 1829 PRIM_type = T_CONN_RES; 1830 } 1831 conn_res->SEQ_number = SEQ_number; 1832 conn_res->OPT_length = 0; 1833 conn_res->OPT_offset = 0; 1834 1835 mutex_enter(&so->so_lock); 1836 so_lock_single(so); /* Set SOLOCKED */ 1837 mutex_exit(&so->so_lock); 1838 1839 error = kstrputmsg(SOTOV(so), mp, NULL, 1840 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1841 mutex_enter(&so->so_lock); 1842 if (error) { 1843 eprintsoline(so, error); 1844 goto disconnect_vp; 1845 } 1846 error = sowaitprim(so, PRIM_type, T_OK_ACK, 1847 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1848 if (error) { 1849 eprintsoline(so, error); 1850 goto disconnect_vp; 1851 } 1852 /* 1853 * If there is a sin/sin6 appended onto the T_OK_ACK use 1854 * that to set the local address. If this is not present 1855 * then we zero out the address and don't set the 1856 * SS_LADDR_VALID bit. For AF_UNIX endpoints we copy over 1857 * the pathname from the listening socket. 1858 */ 1859 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 1860 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 1861 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 1862 ack_mp->b_rptr += sizeof (struct T_ok_ack); 1863 bcopy(ack_mp->b_rptr, nso->so_laddr_sa, sinlen); 1864 nso->so_laddr_len = sinlen; 1865 nso->so_state |= SS_LADDR_VALID; 1866 } else if (nso->so_family == AF_UNIX) { 1867 ASSERT(so->so_family == AF_UNIX); 1868 nso->so_laddr_len = so->so_laddr_len; 1869 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1870 bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len); 1871 nso->so_state |= SS_LADDR_VALID; 1872 } else { 1873 nso->so_laddr_len = so->so_laddr_len; 1874 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1875 bzero(nso->so_laddr_sa, nso->so_addr_size); 1876 nso->so_laddr_sa->sa_family = nso->so_family; 1877 } 1878 freemsg(ack_mp); 1879 1880 so_unlock_single(so, SOLOCKED); 1881 mutex_exit(&so->so_lock); 1882 1883 nso->so_state |= SS_ISCONNECTED; 1884 1885 /* 1886 * Pass out new socket. 1887 */ 1888 if (nsop != NULL) 1889 *nsop = nso; 1890 1891 return (0); 1892 1893 1894 eproto_disc_unl: 1895 error = EPROTO; 1896 e_disc_unl: 1897 eprintsoline(so, error); 1898 goto disconnect_unlocked; 1899 1900 pr_disc_vp_unl: 1901 eprintsoline(so, error); 1902 disconnect_vp_unlocked: 1903 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 1904 VN_RELE(nvp); 1905 disconnect_unlocked: 1906 (void) sodisconnect(so, SEQ_number, 0); 1907 return (error); 1908 1909 pr_disc_vp: 1910 eprintsoline(so, error); 1911 disconnect_vp: 1912 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 1913 so_unlock_single(so, SOLOCKED); 1914 mutex_exit(&so->so_lock); 1915 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 1916 VN_RELE(nvp); 1917 return (error); 1918 1919 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 1920 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 1921 ? EOPNOTSUPP : EINVAL; 1922 e_bad: 1923 eprintsoline(so, error); 1924 return (error); 1925 } 1926 1927 /* 1928 * connect a socket. 1929 * 1930 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 1931 * unconnect (by specifying a null address). 1932 */ 1933 int 1934 sotpi_connect(struct sonode *so, 1935 const struct sockaddr *name, 1936 socklen_t namelen, 1937 int fflag, 1938 int flags) 1939 { 1940 struct T_conn_req conn_req; 1941 int error = 0; 1942 mblk_t *mp; 1943 void *src; 1944 socklen_t srclen; 1945 void *addr; 1946 socklen_t addrlen; 1947 boolean_t need_unlock; 1948 1949 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 1950 so, name, namelen, fflag, flags, 1951 pr_state(so->so_state, so->so_mode))); 1952 1953 /* 1954 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 1955 * avoid sleeping for memory with SOLOCKED held. 1956 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen 1957 * + sizeof (struct T_opthdr). 1958 * (the AF_UNIX so_ux_addr_xlate() does not make the address 1959 * exceed so_faddr_maxlen). 1960 */ 1961 mp = soallocproto(sizeof (struct T_conn_req) + 1962 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 1963 if (mp == NULL) { 1964 /* 1965 * Connect can not fail with ENOBUFS. A signal was 1966 * caught so return EINTR. 1967 */ 1968 error = EINTR; 1969 eprintsoline(so, error); 1970 return (error); 1971 } 1972 1973 mutex_enter(&so->so_lock); 1974 /* 1975 * Make sure there is a preallocated T_unbind_req message 1976 * before any binding. This message is allocated when the 1977 * socket is created. Since another thread can consume 1978 * so_unbind_mp by the time we return from so_lock_single(), 1979 * we should check the availability of so_unbind_mp after 1980 * we return from so_lock_single(). 1981 */ 1982 1983 so_lock_single(so); /* Set SOLOCKED */ 1984 need_unlock = B_TRUE; 1985 1986 if (so->so_unbind_mp == NULL) { 1987 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 1988 /* NOTE: holding so_lock while sleeping */ 1989 so->so_unbind_mp = 1990 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 1991 if (so->so_unbind_mp == NULL) { 1992 error = EINTR; 1993 goto done; 1994 } 1995 } 1996 1997 /* 1998 * Can't have done a listen before connecting. 1999 */ 2000 if (so->so_state & SS_ACCEPTCONN) { 2001 error = EOPNOTSUPP; 2002 goto done; 2003 } 2004 2005 /* 2006 * Must be bound with the transport 2007 */ 2008 if (!(so->so_state & SS_ISBOUND)) { 2009 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2010 /*CONSTCOND*/ 2011 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2012 /* 2013 * Optimization for AF_INET{,6} transports 2014 * that can handle a T_CONN_REQ without being bound. 2015 */ 2016 so_automatic_bind(so); 2017 } else { 2018 error = sotpi_bind(so, NULL, 0, 2019 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 2020 if (error) 2021 goto done; 2022 } 2023 ASSERT(so->so_state & SS_ISBOUND); 2024 flags |= _SOCONNECT_DID_BIND; 2025 } 2026 2027 /* 2028 * Handle a connect to a name parameter of type AF_UNSPEC like a 2029 * connect to a null address. This is the portable method to 2030 * unconnect a socket. 2031 */ 2032 if ((namelen >= sizeof (sa_family_t)) && 2033 (name->sa_family == AF_UNSPEC)) { 2034 name = NULL; 2035 namelen = 0; 2036 } 2037 2038 /* 2039 * Check that we are not already connected. 2040 * A connection-oriented socket cannot be reconnected. 2041 * A connected connection-less socket can be 2042 * - connected to a different address by a subsequent connect 2043 * - "unconnected" by a connect to the NULL address 2044 */ 2045 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2046 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2047 if (so->so_mode & SM_CONNREQUIRED) { 2048 /* Connection-oriented socket */ 2049 error = so->so_state & SS_ISCONNECTED ? 2050 EISCONN : EALREADY; 2051 goto done; 2052 } 2053 /* Connection-less socket */ 2054 if (name == NULL) { 2055 /* 2056 * Remove the connected state and clear SO_DGRAM_ERRIND 2057 * since it was set when the socket was connected. 2058 * If this is UDP also send down a T_DISCON_REQ. 2059 */ 2060 int val; 2061 2062 if ((so->so_family == AF_INET || 2063 so->so_family == AF_INET6) && 2064 (so->so_type == SOCK_DGRAM || 2065 so->so_type == SOCK_RAW) && 2066 /*CONSTCOND*/ 2067 !soconnect_tpi_udp) { 2068 /* XXX What about implicitly unbinding here? */ 2069 error = sodisconnect(so, -1, 2070 _SODISCONNECT_LOCK_HELD); 2071 } else { 2072 so->so_state &= 2073 ~(SS_ISCONNECTED | SS_ISCONNECTING | 2074 SS_FADDR_VALID); 2075 so->so_faddr_len = 0; 2076 } 2077 2078 so_unlock_single(so, SOLOCKED); 2079 mutex_exit(&so->so_lock); 2080 2081 val = 0; 2082 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2083 &val, (t_uscalar_t)sizeof (val)); 2084 2085 mutex_enter(&so->so_lock); 2086 so_lock_single(so); /* Set SOLOCKED */ 2087 goto done; 2088 } 2089 } 2090 ASSERT(so->so_state & SS_ISBOUND); 2091 2092 if (name == NULL || namelen == 0) { 2093 error = EINVAL; 2094 goto done; 2095 } 2096 /* 2097 * Mark the socket if so_faddr_sa represents the transport level 2098 * address. 2099 */ 2100 if (flags & _SOCONNECT_NOXLATE) { 2101 struct sockaddr_ux *soaddr_ux; 2102 2103 ASSERT(so->so_family == AF_UNIX); 2104 if (namelen != sizeof (struct sockaddr_ux)) { 2105 error = EINVAL; 2106 goto done; 2107 } 2108 soaddr_ux = (struct sockaddr_ux *)name; 2109 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2110 namelen = sizeof (soaddr_ux->sou_addr); 2111 so->so_state |= SS_FADDR_NOXLATE; 2112 } 2113 2114 /* 2115 * Length and family checks. 2116 */ 2117 error = so_addr_verify(so, name, namelen); 2118 if (error) 2119 goto bad; 2120 2121 /* 2122 * Save foreign address. Needed for AF_UNIX as well as 2123 * transport providers that do not support TI_GETPEERNAME. 2124 * Also used for cached foreign address for TCP and UDP. 2125 */ 2126 if (namelen > (t_uscalar_t)so->so_faddr_maxlen) { 2127 error = EINVAL; 2128 goto done; 2129 } 2130 so->so_faddr_len = (socklen_t)namelen; 2131 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2132 bcopy(name, so->so_faddr_sa, namelen); 2133 so->so_state |= SS_FADDR_VALID; 2134 2135 if (so->so_family == AF_UNIX) { 2136 if (so->so_state & SS_FADDR_NOXLATE) { 2137 /* 2138 * Already have a transport internal address. Do not 2139 * pass any (transport internal) source address. 2140 */ 2141 addr = so->so_faddr_sa; 2142 addrlen = (t_uscalar_t)so->so_faddr_len; 2143 src = NULL; 2144 srclen = 0; 2145 } else { 2146 /* 2147 * Pass the sockaddr_un source address as an option 2148 * and translate the remote address. 2149 * Holding so_lock thus so_laddr_sa can not change. 2150 */ 2151 src = so->so_laddr_sa; 2152 srclen = (t_uscalar_t)so->so_laddr_len; 2153 dprintso(so, 1, 2154 ("sotpi_connect UNIX: srclen %d, src %p\n", 2155 srclen, src)); 2156 error = so_ux_addr_xlate(so, 2157 so->so_faddr_sa, (socklen_t)so->so_faddr_len, 2158 (flags & _SOCONNECT_XPG4_2), 2159 &addr, &addrlen); 2160 if (error) 2161 goto bad; 2162 } 2163 } else { 2164 addr = so->so_faddr_sa; 2165 addrlen = (t_uscalar_t)so->so_faddr_len; 2166 src = NULL; 2167 srclen = 0; 2168 } 2169 /* 2170 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2171 * option which asks the transport provider to send T_UDERR_IND 2172 * messages. These T_UDERR_IND messages are used to return connected 2173 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2174 * 2175 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2176 * we send down a T_CONN_REQ. This is needed to let the 2177 * transport assign a local address that is consistent with 2178 * the remote address. Applications depend on a getsockname() 2179 * after a connect() to retrieve the "source" IP address for 2180 * the connected socket. Invalidate the cached local address 2181 * to force getsockname() to enquire of the transport. 2182 */ 2183 if (!(so->so_mode & SM_CONNREQUIRED)) { 2184 /* 2185 * Datagram socket. 2186 */ 2187 int32_t val; 2188 2189 so_unlock_single(so, SOLOCKED); 2190 mutex_exit(&so->so_lock); 2191 2192 val = 1; 2193 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2194 &val, (t_uscalar_t)sizeof (val)); 2195 2196 mutex_enter(&so->so_lock); 2197 so_lock_single(so); /* Set SOLOCKED */ 2198 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2199 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2200 soconnect_tpi_udp) { 2201 soisconnected(so); 2202 goto done; 2203 } 2204 /* 2205 * Send down T_CONN_REQ etc. 2206 * Clear fflag to avoid returning EWOULDBLOCK. 2207 */ 2208 fflag = 0; 2209 ASSERT(so->so_family != AF_UNIX); 2210 so->so_state &= ~SS_LADDR_VALID; 2211 } else if (so->so_laddr_len != 0) { 2212 /* 2213 * If the local address or port was "any" then it may be 2214 * changed by the transport as a result of the 2215 * connect. Invalidate the cached version if we have one. 2216 */ 2217 switch (so->so_family) { 2218 case AF_INET: 2219 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t)); 2220 if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr == 2221 INADDR_ANY || 2222 ((sin_t *)so->so_laddr_sa)->sin_port == 0) 2223 so->so_state &= ~SS_LADDR_VALID; 2224 break; 2225 2226 case AF_INET6: 2227 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t)); 2228 if (IN6_IS_ADDR_UNSPECIFIED( 2229 &((sin6_t *)so->so_laddr_sa) ->sin6_addr) || 2230 IN6_IS_ADDR_V4MAPPED_ANY( 2231 &((sin6_t *)so->so_laddr_sa)->sin6_addr) || 2232 ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) 2233 so->so_state &= ~SS_LADDR_VALID; 2234 break; 2235 2236 default: 2237 break; 2238 } 2239 } 2240 2241 /* 2242 * Check for failure of an earlier call 2243 */ 2244 if (so->so_error != 0) 2245 goto so_bad; 2246 2247 /* 2248 * Send down T_CONN_REQ. Message was allocated above. 2249 */ 2250 conn_req.PRIM_type = T_CONN_REQ; 2251 conn_req.DEST_length = addrlen; 2252 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2253 if (srclen == 0) { 2254 conn_req.OPT_length = 0; 2255 conn_req.OPT_offset = 0; 2256 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2257 soappendmsg(mp, addr, addrlen); 2258 } else { 2259 /* 2260 * There is a AF_UNIX sockaddr_un to include as a source 2261 * address option. 2262 */ 2263 struct T_opthdr toh; 2264 2265 toh.level = SOL_SOCKET; 2266 toh.name = SO_SRCADDR; 2267 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2268 toh.status = 0; 2269 conn_req.OPT_length = 2270 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2271 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2272 _TPI_ALIGN_TOPT(addrlen)); 2273 2274 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2275 soappendmsg(mp, addr, addrlen); 2276 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2277 soappendmsg(mp, &toh, sizeof (toh)); 2278 soappendmsg(mp, src, srclen); 2279 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2280 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2281 } 2282 /* 2283 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2284 * in order to have the right state when the T_CONN_CON shows up. 2285 */ 2286 soisconnecting(so); 2287 mutex_exit(&so->so_lock); 2288 2289 #ifdef C2_AUDIT 2290 if (audit_active) 2291 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2292 #endif /* C2_AUDIT */ 2293 2294 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2295 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2296 mp = NULL; 2297 mutex_enter(&so->so_lock); 2298 if (error != 0) 2299 goto bad; 2300 2301 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2302 goto bad; 2303 2304 /* Allow other threads to access the socket */ 2305 so_unlock_single(so, SOLOCKED); 2306 need_unlock = B_FALSE; 2307 2308 /* 2309 * Wait until we get a T_CONN_CON or an error 2310 */ 2311 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2312 so_lock_single(so); /* Set SOLOCKED */ 2313 need_unlock = B_TRUE; 2314 } 2315 2316 done: 2317 freemsg(mp); 2318 switch (error) { 2319 case EINPROGRESS: 2320 case EALREADY: 2321 case EISCONN: 2322 case EINTR: 2323 /* Non-fatal errors */ 2324 so->so_state &= ~SS_LADDR_VALID; 2325 /* FALLTHRU */ 2326 case 0: 2327 break; 2328 2329 case EHOSTUNREACH: 2330 if (flags & _SOCONNECT_XPG4_2) { 2331 /* 2332 * X/Open specification contains a requirement that 2333 * ENETUNREACH be returned but does not require 2334 * EHOSTUNREACH. In order to keep the test suite 2335 * happy we mess with the errno here. 2336 */ 2337 error = ENETUNREACH; 2338 } 2339 /* FALLTHRU */ 2340 2341 default: 2342 ASSERT(need_unlock); 2343 /* 2344 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2345 * and invalidate local-address cache 2346 */ 2347 so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID); 2348 /* A discon_ind might have already unbound us */ 2349 if ((flags & _SOCONNECT_DID_BIND) && 2350 (so->so_state & SS_ISBOUND)) { 2351 int err; 2352 2353 err = sotpi_unbind(so, 0); 2354 /* LINTED - statement has no conseq */ 2355 if (err) { 2356 eprintsoline(so, err); 2357 } 2358 } 2359 break; 2360 } 2361 if (need_unlock) 2362 so_unlock_single(so, SOLOCKED); 2363 mutex_exit(&so->so_lock); 2364 return (error); 2365 2366 so_bad: error = sogeterr(so); 2367 bad: eprintsoline(so, error); 2368 goto done; 2369 } 2370 2371 int 2372 sotpi_shutdown(struct sonode *so, int how) 2373 { 2374 struct T_ordrel_req ordrel_req; 2375 mblk_t *mp; 2376 uint_t old_state, state_change; 2377 int error = 0; 2378 2379 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2380 so, how, pr_state(so->so_state, so->so_mode))); 2381 2382 mutex_enter(&so->so_lock); 2383 so_lock_single(so); /* Set SOLOCKED */ 2384 2385 /* 2386 * SunOS 4.X has no check for datagram sockets. 2387 * 5.X checks that it is connected (ENOTCONN) 2388 * X/Open requires that we check the connected state. 2389 */ 2390 if (!(so->so_state & SS_ISCONNECTED)) { 2391 if (!xnet_skip_checks) { 2392 error = ENOTCONN; 2393 if (xnet_check_print) { 2394 printf("sockfs: X/Open shutdown check " 2395 "caused ENOTCONN\n"); 2396 } 2397 } 2398 goto done; 2399 } 2400 /* 2401 * Record the current state and then perform any state changes. 2402 * Then use the difference between the old and new states to 2403 * determine which messages need to be sent. 2404 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2405 * duplicate calls to shutdown(). 2406 */ 2407 old_state = so->so_state; 2408 2409 switch (how) { 2410 case 0: 2411 socantrcvmore(so); 2412 break; 2413 case 1: 2414 socantsendmore(so); 2415 break; 2416 case 2: 2417 socantsendmore(so); 2418 socantrcvmore(so); 2419 break; 2420 default: 2421 error = EINVAL; 2422 goto done; 2423 } 2424 2425 /* 2426 * Assumes that the SS_CANT* flags are never cleared in the above code. 2427 */ 2428 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2429 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2430 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2431 2432 switch (state_change) { 2433 case 0: 2434 dprintso(so, 1, 2435 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2436 so->so_state)); 2437 goto done; 2438 2439 case SS_CANTRCVMORE: 2440 mutex_exit(&so->so_lock); 2441 strseteof(SOTOV(so), 1); 2442 /* 2443 * strseteof takes care of read side wakeups, 2444 * pollwakeups, and signals. 2445 */ 2446 /* 2447 * Get the read lock before flushing data to avoid problems 2448 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2449 */ 2450 mutex_enter(&so->so_lock); 2451 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2452 mutex_exit(&so->so_lock); 2453 2454 /* Flush read side queue */ 2455 strflushrq(SOTOV(so), FLUSHALL); 2456 2457 mutex_enter(&so->so_lock); 2458 so_unlock_read(so); /* Clear SOREADLOCKED */ 2459 break; 2460 2461 case SS_CANTSENDMORE: 2462 mutex_exit(&so->so_lock); 2463 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2464 mutex_enter(&so->so_lock); 2465 break; 2466 2467 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2468 mutex_exit(&so->so_lock); 2469 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2470 strseteof(SOTOV(so), 1); 2471 /* 2472 * strseteof takes care of read side wakeups, 2473 * pollwakeups, and signals. 2474 */ 2475 /* 2476 * Get the read lock before flushing data to avoid problems 2477 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2478 */ 2479 mutex_enter(&so->so_lock); 2480 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2481 mutex_exit(&so->so_lock); 2482 2483 /* Flush read side queue */ 2484 strflushrq(SOTOV(so), FLUSHALL); 2485 2486 mutex_enter(&so->so_lock); 2487 so_unlock_read(so); /* Clear SOREADLOCKED */ 2488 break; 2489 } 2490 2491 ASSERT(MUTEX_HELD(&so->so_lock)); 2492 2493 /* 2494 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2495 * was set due to this call and the new state has both of them set: 2496 * Send the AF_UNIX close indication 2497 * For T_COTS send a discon_ind 2498 * 2499 * If cantsend was set due to this call: 2500 * For T_COTSORD send an ordrel_ind 2501 * 2502 * Note that for T_CLTS there is no message sent here. 2503 */ 2504 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2505 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2506 /* 2507 * For SunOS 4.X compatibility we tell the other end 2508 * that we are unable to receive at this point. 2509 */ 2510 if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS) 2511 so_unix_close(so); 2512 2513 if (so->so_serv_type == T_COTS) 2514 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2515 } 2516 if ((state_change & SS_CANTSENDMORE) && 2517 (so->so_serv_type == T_COTS_ORD)) { 2518 /* Send an orderly release */ 2519 ordrel_req.PRIM_type = T_ORDREL_REQ; 2520 2521 mutex_exit(&so->so_lock); 2522 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2523 0, _ALLOC_SLEEP); 2524 /* 2525 * Send down the T_ORDREL_REQ even if there is flow control. 2526 * This prevents shutdown from blocking. 2527 * Note that there is no T_OK_ACK for ordrel_req. 2528 */ 2529 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2530 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2531 mutex_enter(&so->so_lock); 2532 if (error) { 2533 eprintsoline(so, error); 2534 goto done; 2535 } 2536 } 2537 2538 done: 2539 so_unlock_single(so, SOLOCKED); 2540 mutex_exit(&so->so_lock); 2541 return (error); 2542 } 2543 2544 /* 2545 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2546 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2547 * that we have closed. 2548 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2549 * T_UNITDATA_REQ containing the same option. 2550 * 2551 * For SOCK_DGRAM half-connections (somebody connected to this end 2552 * but this end is not connect) we don't know where to send any 2553 * SO_UNIX_CLOSE. 2554 * 2555 * We have to ignore stream head errors just in case there has been 2556 * a shutdown(output). 2557 * Ignore any flow control to try to get the message more quickly to the peer. 2558 * While locally ignoring flow control solves the problem when there 2559 * is only the loopback transport on the stream it would not provide 2560 * the correct AF_UNIX socket semantics when one or more modules have 2561 * been pushed. 2562 */ 2563 void 2564 so_unix_close(struct sonode *so) 2565 { 2566 int error; 2567 struct T_opthdr toh; 2568 mblk_t *mp; 2569 2570 ASSERT(MUTEX_HELD(&so->so_lock)); 2571 2572 ASSERT(so->so_family == AF_UNIX); 2573 2574 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2575 (SS_ISCONNECTED|SS_ISBOUND)) 2576 return; 2577 2578 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2579 so, pr_state(so->so_state, so->so_mode))); 2580 2581 toh.level = SOL_SOCKET; 2582 toh.name = SO_UNIX_CLOSE; 2583 2584 /* zero length + header */ 2585 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2586 toh.status = 0; 2587 2588 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2589 struct T_optdata_req tdr; 2590 2591 tdr.PRIM_type = T_OPTDATA_REQ; 2592 tdr.DATA_flag = 0; 2593 2594 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2595 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2596 2597 /* NOTE: holding so_lock while sleeping */ 2598 mp = soallocproto2(&tdr, sizeof (tdr), 2599 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2600 } else { 2601 struct T_unitdata_req tudr; 2602 void *addr; 2603 socklen_t addrlen; 2604 void *src; 2605 socklen_t srclen; 2606 struct T_opthdr toh2; 2607 t_scalar_t size; 2608 2609 /* Connecteded DGRAM socket */ 2610 2611 /* 2612 * For AF_UNIX the destination address is translated to 2613 * an internal name and the source address is passed as 2614 * an option. 2615 */ 2616 /* 2617 * Length and family checks. 2618 */ 2619 error = so_addr_verify(so, so->so_faddr_sa, 2620 (t_uscalar_t)so->so_faddr_len); 2621 if (error) { 2622 eprintsoline(so, error); 2623 return; 2624 } 2625 if (so->so_state & SS_FADDR_NOXLATE) { 2626 /* 2627 * Already have a transport internal address. Do not 2628 * pass any (transport internal) source address. 2629 */ 2630 addr = so->so_faddr_sa; 2631 addrlen = (t_uscalar_t)so->so_faddr_len; 2632 src = NULL; 2633 srclen = 0; 2634 } else { 2635 /* 2636 * Pass the sockaddr_un source address as an option 2637 * and translate the remote address. 2638 * Holding so_lock thus so_laddr_sa can not change. 2639 */ 2640 src = so->so_laddr_sa; 2641 srclen = (socklen_t)so->so_laddr_len; 2642 dprintso(so, 1, 2643 ("so_ux_close: srclen %d, src %p\n", 2644 srclen, src)); 2645 error = so_ux_addr_xlate(so, 2646 so->so_faddr_sa, 2647 (socklen_t)so->so_faddr_len, 0, 2648 &addr, &addrlen); 2649 if (error) { 2650 eprintsoline(so, error); 2651 return; 2652 } 2653 } 2654 tudr.PRIM_type = T_UNITDATA_REQ; 2655 tudr.DEST_length = addrlen; 2656 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2657 if (srclen == 0) { 2658 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2659 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2660 _TPI_ALIGN_TOPT(addrlen)); 2661 2662 size = tudr.OPT_offset + tudr.OPT_length; 2663 /* NOTE: holding so_lock while sleeping */ 2664 mp = soallocproto2(&tudr, sizeof (tudr), 2665 addr, addrlen, size, _ALLOC_SLEEP); 2666 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2667 soappendmsg(mp, &toh, sizeof (toh)); 2668 } else { 2669 /* 2670 * There is a AF_UNIX sockaddr_un to include as a 2671 * source address option. 2672 */ 2673 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2674 _TPI_ALIGN_TOPT(srclen)); 2675 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2676 _TPI_ALIGN_TOPT(addrlen)); 2677 2678 toh2.level = SOL_SOCKET; 2679 toh2.name = SO_SRCADDR; 2680 toh2.len = (t_uscalar_t)(srclen + 2681 sizeof (struct T_opthdr)); 2682 toh2.status = 0; 2683 2684 size = tudr.OPT_offset + tudr.OPT_length; 2685 2686 /* NOTE: holding so_lock while sleeping */ 2687 mp = soallocproto2(&tudr, sizeof (tudr), 2688 addr, addrlen, size, _ALLOC_SLEEP); 2689 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2690 soappendmsg(mp, &toh, sizeof (toh)); 2691 soappendmsg(mp, &toh2, sizeof (toh2)); 2692 soappendmsg(mp, src, srclen); 2693 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2694 } 2695 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2696 } 2697 mutex_exit(&so->so_lock); 2698 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2699 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2700 mutex_enter(&so->so_lock); 2701 } 2702 2703 /* 2704 * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 2705 */ 2706 int 2707 sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) 2708 { 2709 mblk_t *mp, *nmp; 2710 int error; 2711 2712 dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", so, msg, flags)); 2713 2714 /* 2715 * There is never any oob data with addresses or control since 2716 * the T_EXDATA_IND does not carry any options. 2717 */ 2718 msg->msg_controllen = 0; 2719 msg->msg_namelen = 0; 2720 2721 mutex_enter(&so->so_lock); 2722 ASSERT(so_verify_oobstate(so)); 2723 if ((so->so_options & SO_OOBINLINE) || 2724 (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 2725 dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 2726 mutex_exit(&so->so_lock); 2727 return (EINVAL); 2728 } 2729 if (!(so->so_state & SS_HAVEOOBDATA)) { 2730 dprintso(so, 1, ("sorecvoob: no data yet\n")); 2731 mutex_exit(&so->so_lock); 2732 return (EWOULDBLOCK); 2733 } 2734 ASSERT(so->so_oobmsg != NULL); 2735 mp = so->so_oobmsg; 2736 if (flags & MSG_PEEK) { 2737 /* 2738 * Since recv* can not return ENOBUFS we can not use dupmsg. 2739 * Instead we revert to the consolidation private 2740 * allocb_wait plus bcopy. 2741 */ 2742 mblk_t *mp1; 2743 2744 mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 2745 ASSERT(mp1); 2746 2747 while (mp != NULL) { 2748 ssize_t size; 2749 2750 size = MBLKL(mp); 2751 bcopy(mp->b_rptr, mp1->b_wptr, size); 2752 mp1->b_wptr += size; 2753 ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 2754 mp = mp->b_cont; 2755 } 2756 mp = mp1; 2757 } else { 2758 /* 2759 * Update the state indicating that the data has been consumed. 2760 * Keep SS_OOBPEND set until data is consumed past the mark. 2761 */ 2762 so->so_oobmsg = NULL; 2763 so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 2764 } 2765 dprintso(so, 1, 2766 ("after recvoob(%p): counts %d/%d state %s\n", 2767 so, so->so_oobsigcnt, 2768 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2769 ASSERT(so_verify_oobstate(so)); 2770 mutex_exit(&so->so_lock); 2771 2772 error = 0; 2773 nmp = mp; 2774 while (nmp != NULL && uiop->uio_resid > 0) { 2775 ssize_t n = MBLKL(nmp); 2776 2777 n = MIN(n, uiop->uio_resid); 2778 if (n > 0) 2779 error = uiomove(nmp->b_rptr, n, 2780 UIO_READ, uiop); 2781 if (error) 2782 break; 2783 nmp = nmp->b_cont; 2784 } 2785 freemsg(mp); 2786 return (error); 2787 } 2788 2789 /* 2790 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2791 * In addition, the caller typically verifies that there is some 2792 * potential state to clear by checking 2793 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2794 * before calling this routine. 2795 * Note that such a check can be made without holding so_lock since 2796 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2797 * decrements so_oobsigcnt. 2798 * 2799 * When data is read *after* the point that all pending 2800 * oob data has been consumed the oob indication is cleared. 2801 * 2802 * This logic keeps select/poll returning POLLRDBAND and 2803 * SIOCATMARK returning true until we have read past 2804 * the mark. 2805 */ 2806 static void 2807 sorecv_update_oobstate(struct sonode *so) 2808 { 2809 mutex_enter(&so->so_lock); 2810 ASSERT(so_verify_oobstate(so)); 2811 dprintso(so, 1, 2812 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2813 so->so_oobsigcnt, 2814 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2815 if (so->so_oobsigcnt == 0) { 2816 /* No more pending oob indications */ 2817 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2818 freemsg(so->so_oobmsg); 2819 so->so_oobmsg = NULL; 2820 } 2821 ASSERT(so_verify_oobstate(so)); 2822 mutex_exit(&so->so_lock); 2823 } 2824 2825 /* 2826 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2827 */ 2828 static int 2829 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2830 { 2831 int error = 0; 2832 mblk_t *tmp = NULL; 2833 mblk_t *pmp = NULL; 2834 mblk_t *nmp = so->so_nl7c_rcv_mp; 2835 2836 ASSERT(nmp != NULL); 2837 2838 while (nmp != NULL && uiop->uio_resid > 0) { 2839 ssize_t n; 2840 2841 if (DB_TYPE(nmp) == M_DATA) { 2842 /* 2843 * We have some data, uiomove up to resid bytes. 2844 */ 2845 n = MIN(MBLKL(nmp), uiop->uio_resid); 2846 if (n > 0) 2847 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2848 nmp->b_rptr += n; 2849 if (nmp->b_rptr == nmp->b_wptr) { 2850 pmp = nmp; 2851 nmp = nmp->b_cont; 2852 } 2853 if (error) 2854 break; 2855 } else { 2856 /* 2857 * We only handle data, save for caller to handle. 2858 */ 2859 if (pmp != NULL) { 2860 pmp->b_cont = nmp->b_cont; 2861 } 2862 nmp->b_cont = NULL; 2863 if (*rmp == NULL) { 2864 *rmp = nmp; 2865 } else { 2866 tmp->b_cont = nmp; 2867 } 2868 nmp = nmp->b_cont; 2869 tmp = nmp; 2870 } 2871 } 2872 if (pmp != NULL) { 2873 /* Free any mblk_t(s) which we have consumed */ 2874 pmp->b_cont = NULL; 2875 freemsg(so->so_nl7c_rcv_mp); 2876 } 2877 if ((so->so_nl7c_rcv_mp = nmp) == NULL) { 2878 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 2879 if (error == 0) { 2880 rval_t *p = (rval_t *)&so->so_nl7c_rcv_rval; 2881 2882 error = p->r_v.r_v2; 2883 p->r_v.r_v2 = 0; 2884 } 2885 rp->r_vals = so->so_nl7c_rcv_rval; 2886 so->so_nl7c_rcv_rval = 0; 2887 } else { 2888 /* More mblk_t(s) to process so no rval to return */ 2889 rp->r_vals = 0; 2890 } 2891 return (error); 2892 } 2893 2894 /* 2895 * Receive the next message on the queue. 2896 * If msg_controllen is non-zero when called the caller is interested in 2897 * any received control info (options). 2898 * If msg_namelen is non-zero when called the caller is interested in 2899 * any received source address. 2900 * The routine returns with msg_control and msg_name pointing to 2901 * kmem_alloc'ed memory which the caller has to free. 2902 */ 2903 int 2904 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2905 { 2906 union T_primitives *tpr; 2907 mblk_t *mp; 2908 uchar_t pri; 2909 int pflag, opflag; 2910 void *control; 2911 t_uscalar_t controllen; 2912 t_uscalar_t namelen; 2913 int so_state = so->so_state; /* Snapshot */ 2914 ssize_t saved_resid; 2915 int error; 2916 rval_t rval; 2917 int flags; 2918 clock_t timout; 2919 int first; 2920 2921 flags = msg->msg_flags; 2922 msg->msg_flags = 0; 2923 2924 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2925 so, msg, flags, 2926 pr_state(so->so_state, so->so_mode), so->so_error)); 2927 2928 /* 2929 * If we are not connected because we have never been connected 2930 * we return ENOTCONN. If we have been connected (but are no longer 2931 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2932 * the EOF. 2933 * 2934 * An alternative would be to post an ENOTCONN error in stream head 2935 * (read+write) and clear it when we're connected. However, that error 2936 * would cause incorrect poll/select behavior! 2937 */ 2938 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2939 (so->so_mode & SM_CONNREQUIRED)) { 2940 return (ENOTCONN); 2941 } 2942 2943 /* 2944 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2945 * after checking that the read queue is empty) and returns zero. 2946 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2947 * is zero. 2948 */ 2949 2950 if (flags & MSG_OOB) { 2951 /* Check that the transport supports OOB */ 2952 if (!(so->so_mode & SM_EXDATA)) 2953 return (EOPNOTSUPP); 2954 return (sorecvoob(so, msg, uiop, flags)); 2955 } 2956 2957 /* 2958 * Set msg_controllen and msg_namelen to zero here to make it 2959 * simpler in the cases that no control or name is returned. 2960 */ 2961 controllen = msg->msg_controllen; 2962 namelen = msg->msg_namelen; 2963 msg->msg_controllen = 0; 2964 msg->msg_namelen = 0; 2965 2966 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2967 namelen, controllen)); 2968 2969 mutex_enter(&so->so_lock); 2970 /* 2971 * If an NL7C enabled socket and not waiting for write data. 2972 */ 2973 if ((so->so_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 2974 NL7C_ENABLED) { 2975 if (so->so_nl7c_uri) { 2976 /* Close uri processing for a previous request */ 2977 nl7c_close(so); 2978 } 2979 if ((so_state & SS_CANTRCVMORE) && so->so_nl7c_rcv_mp == NULL) { 2980 /* Nothing to process, EOF */ 2981 mutex_exit(&so->so_lock); 2982 return (0); 2983 } else if (so->so_nl7c_flags & NL7C_SOPERSIST) { 2984 /* Persistent NL7C socket, try to process request */ 2985 boolean_t ret; 2986 2987 ret = nl7c_process(so, 2988 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 2989 rval.r_vals = so->so_nl7c_rcv_rval; 2990 error = rval.r_v.r_v2; 2991 if (error) { 2992 /* Error of some sort, return it */ 2993 mutex_exit(&so->so_lock); 2994 return (error); 2995 } 2996 if (so->so_nl7c_flags && 2997 ! (so->so_nl7c_flags & NL7C_WAITWRITE)) { 2998 /* 2999 * Still an NL7C socket and no data 3000 * to pass up to the caller. 3001 */ 3002 mutex_exit(&so->so_lock); 3003 if (ret) { 3004 /* EOF */ 3005 return (0); 3006 } else { 3007 /* Need more data */ 3008 return (EAGAIN); 3009 } 3010 } 3011 } else { 3012 /* 3013 * Not persistent so no further NL7C processing. 3014 */ 3015 so->so_nl7c_flags = 0; 3016 } 3017 } 3018 /* 3019 * Only one reader is allowed at any given time. This is needed 3020 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3021 * 3022 * This is slightly different that BSD behavior in that it fails with 3023 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3024 * is single-threaded using sblock(), which is dropped while waiting 3025 * for data to appear. The difference shows up e.g. if one 3026 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3027 * does use nonblocking io and different threads are reading each 3028 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3029 * in this case as long as the read queue doesn't get empty. 3030 * In this implementation the thread using nonblocking io can 3031 * get an EWOULDBLOCK error due to the blocking thread executing 3032 * e.g. in the uiomove in kstrgetmsg. 3033 * This difference is not believed to be significant. 3034 */ 3035 /* Set SOREADLOCKED */ 3036 error = so_lock_read_intr(so, 3037 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3038 mutex_exit(&so->so_lock); 3039 if (error) 3040 return (error); 3041 3042 /* 3043 * Tell kstrgetmsg to not inspect the stream head errors until all 3044 * queued data has been consumed. 3045 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3046 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3047 * 3048 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3049 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3050 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3051 */ 3052 pflag = MSG_ANY | MSG_DELAYERROR; 3053 if (flags & MSG_PEEK) { 3054 pflag |= MSG_IPEEK; 3055 flags &= ~MSG_WAITALL; 3056 } 3057 if (so->so_mode & SM_ATOMIC) 3058 pflag |= MSG_DISCARDTAIL; 3059 3060 if (flags & MSG_DONTWAIT) 3061 timout = 0; 3062 else 3063 timout = -1; 3064 opflag = pflag; 3065 first = 1; 3066 3067 retry: 3068 saved_resid = uiop->uio_resid; 3069 pri = 0; 3070 mp = NULL; 3071 if (so->so_nl7c_rcv_mp != NULL) { 3072 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3073 error = nl7c_sorecv(so, &mp, uiop, &rval); 3074 } else { 3075 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3076 timout, &rval); 3077 } 3078 if (error) { 3079 switch (error) { 3080 case EINTR: 3081 case EWOULDBLOCK: 3082 if (!first) 3083 error = 0; 3084 break; 3085 case ETIME: 3086 /* Returned from kstrgetmsg when timeout expires */ 3087 if (!first) 3088 error = 0; 3089 else 3090 error = EWOULDBLOCK; 3091 break; 3092 default: 3093 eprintsoline(so, error); 3094 break; 3095 } 3096 mutex_enter(&so->so_lock); 3097 so_unlock_read(so); /* Clear SOREADLOCKED */ 3098 mutex_exit(&so->so_lock); 3099 return (error); 3100 } 3101 /* 3102 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3103 * For non-datagrams MOREDATA is used to set MSG_EOR. 3104 */ 3105 ASSERT(!(rval.r_val1 & MORECTL)); 3106 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3107 msg->msg_flags |= MSG_TRUNC; 3108 3109 if (mp == NULL) { 3110 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3111 /* 3112 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3113 * The draft Posix socket spec states that the mark should 3114 * not be cleared when peeking. We follow the latter. 3115 */ 3116 if ((so->so_state & 3117 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3118 (uiop->uio_resid != saved_resid) && 3119 !(flags & MSG_PEEK)) { 3120 sorecv_update_oobstate(so); 3121 } 3122 3123 mutex_enter(&so->so_lock); 3124 /* Set MSG_EOR based on MOREDATA */ 3125 if (!(rval.r_val1 & MOREDATA)) { 3126 if (so->so_state & SS_SAVEDEOR) { 3127 msg->msg_flags |= MSG_EOR; 3128 so->so_state &= ~SS_SAVEDEOR; 3129 } 3130 } 3131 /* 3132 * If some data was received (i.e. not EOF) and the 3133 * read/recv* has not been satisfied wait for some more. 3134 */ 3135 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3136 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3137 mutex_exit(&so->so_lock); 3138 first = 0; 3139 pflag = opflag | MSG_NOMARK; 3140 goto retry; 3141 } 3142 so_unlock_read(so); /* Clear SOREADLOCKED */ 3143 mutex_exit(&so->so_lock); 3144 return (0); 3145 } 3146 3147 /* strsock_proto has already verified length and alignment */ 3148 tpr = (union T_primitives *)mp->b_rptr; 3149 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3150 3151 switch (tpr->type) { 3152 case T_DATA_IND: { 3153 if ((so->so_state & 3154 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3155 (uiop->uio_resid != saved_resid) && 3156 !(flags & MSG_PEEK)) { 3157 sorecv_update_oobstate(so); 3158 } 3159 3160 /* 3161 * Set msg_flags to MSG_EOR based on 3162 * MORE_flag and MOREDATA. 3163 */ 3164 mutex_enter(&so->so_lock); 3165 so->so_state &= ~SS_SAVEDEOR; 3166 if (!(tpr->data_ind.MORE_flag & 1)) { 3167 if (!(rval.r_val1 & MOREDATA)) 3168 msg->msg_flags |= MSG_EOR; 3169 else 3170 so->so_state |= SS_SAVEDEOR; 3171 } 3172 freemsg(mp); 3173 /* 3174 * If some data was received (i.e. not EOF) and the 3175 * read/recv* has not been satisfied wait for some more. 3176 */ 3177 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3178 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3179 mutex_exit(&so->so_lock); 3180 first = 0; 3181 pflag = opflag | MSG_NOMARK; 3182 goto retry; 3183 } 3184 so_unlock_read(so); /* Clear SOREADLOCKED */ 3185 mutex_exit(&so->so_lock); 3186 return (0); 3187 } 3188 case T_UNITDATA_IND: { 3189 void *addr; 3190 t_uscalar_t addrlen; 3191 void *abuf; 3192 t_uscalar_t optlen; 3193 void *opt; 3194 3195 if ((so->so_state & 3196 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3197 (uiop->uio_resid != saved_resid) && 3198 !(flags & MSG_PEEK)) { 3199 sorecv_update_oobstate(so); 3200 } 3201 3202 if (namelen != 0) { 3203 /* Caller wants source address */ 3204 addrlen = tpr->unitdata_ind.SRC_length; 3205 addr = sogetoff(mp, 3206 tpr->unitdata_ind.SRC_offset, 3207 addrlen, 1); 3208 if (addr == NULL) { 3209 freemsg(mp); 3210 error = EPROTO; 3211 eprintsoline(so, error); 3212 goto err; 3213 } 3214 if (so->so_family == AF_UNIX) { 3215 /* 3216 * Can not use the transport level address. 3217 * If there is a SO_SRCADDR option carrying 3218 * the socket level address it will be 3219 * extracted below. 3220 */ 3221 addr = NULL; 3222 addrlen = 0; 3223 } 3224 } 3225 optlen = tpr->unitdata_ind.OPT_length; 3226 if (optlen != 0) { 3227 t_uscalar_t ncontrollen; 3228 3229 /* 3230 * Extract any source address option. 3231 * Determine how large cmsg buffer is needed. 3232 */ 3233 opt = sogetoff(mp, 3234 tpr->unitdata_ind.OPT_offset, 3235 optlen, __TPI_ALIGN_SIZE); 3236 3237 if (opt == NULL) { 3238 freemsg(mp); 3239 error = EPROTO; 3240 eprintsoline(so, error); 3241 goto err; 3242 } 3243 if (so->so_family == AF_UNIX) 3244 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3245 ncontrollen = so_cmsglen(mp, opt, optlen, 3246 !(flags & MSG_XPG4_2)); 3247 if (controllen != 0) 3248 controllen = ncontrollen; 3249 else if (ncontrollen != 0) 3250 msg->msg_flags |= MSG_CTRUNC; 3251 } else { 3252 controllen = 0; 3253 } 3254 3255 if (namelen != 0) { 3256 /* 3257 * Return address to caller. 3258 * Caller handles truncation if length 3259 * exceeds msg_namelen. 3260 * NOTE: AF_UNIX NUL termination is ensured by 3261 * the sender's copyin_name(). 3262 */ 3263 abuf = kmem_alloc(addrlen, KM_SLEEP); 3264 3265 bcopy(addr, abuf, addrlen); 3266 msg->msg_name = abuf; 3267 msg->msg_namelen = addrlen; 3268 } 3269 3270 if (controllen != 0) { 3271 /* 3272 * Return control msg to caller. 3273 * Caller handles truncation if length 3274 * exceeds msg_controllen. 3275 */ 3276 control = kmem_zalloc(controllen, KM_SLEEP); 3277 3278 error = so_opt2cmsg(mp, opt, optlen, 3279 !(flags & MSG_XPG4_2), 3280 control, controllen); 3281 if (error) { 3282 freemsg(mp); 3283 if (msg->msg_namelen != 0) 3284 kmem_free(msg->msg_name, 3285 msg->msg_namelen); 3286 kmem_free(control, controllen); 3287 eprintsoline(so, error); 3288 goto err; 3289 } 3290 msg->msg_control = control; 3291 msg->msg_controllen = controllen; 3292 } 3293 3294 freemsg(mp); 3295 mutex_enter(&so->so_lock); 3296 so_unlock_read(so); /* Clear SOREADLOCKED */ 3297 mutex_exit(&so->so_lock); 3298 return (0); 3299 } 3300 case T_OPTDATA_IND: { 3301 struct T_optdata_req *tdr; 3302 void *opt; 3303 t_uscalar_t optlen; 3304 3305 if ((so->so_state & 3306 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3307 (uiop->uio_resid != saved_resid) && 3308 !(flags & MSG_PEEK)) { 3309 sorecv_update_oobstate(so); 3310 } 3311 3312 tdr = (struct T_optdata_req *)mp->b_rptr; 3313 optlen = tdr->OPT_length; 3314 if (optlen != 0) { 3315 t_uscalar_t ncontrollen; 3316 /* 3317 * Determine how large cmsg buffer is needed. 3318 */ 3319 opt = sogetoff(mp, 3320 tpr->optdata_ind.OPT_offset, 3321 optlen, __TPI_ALIGN_SIZE); 3322 3323 if (opt == NULL) { 3324 freemsg(mp); 3325 error = EPROTO; 3326 eprintsoline(so, error); 3327 goto err; 3328 } 3329 3330 ncontrollen = so_cmsglen(mp, opt, optlen, 3331 !(flags & MSG_XPG4_2)); 3332 if (controllen != 0) 3333 controllen = ncontrollen; 3334 else if (ncontrollen != 0) 3335 msg->msg_flags |= MSG_CTRUNC; 3336 } else { 3337 controllen = 0; 3338 } 3339 3340 if (controllen != 0) { 3341 /* 3342 * Return control msg to caller. 3343 * Caller handles truncation if length 3344 * exceeds msg_controllen. 3345 */ 3346 control = kmem_zalloc(controllen, KM_SLEEP); 3347 3348 error = so_opt2cmsg(mp, opt, optlen, 3349 !(flags & MSG_XPG4_2), 3350 control, controllen); 3351 if (error) { 3352 freemsg(mp); 3353 kmem_free(control, controllen); 3354 eprintsoline(so, error); 3355 goto err; 3356 } 3357 msg->msg_control = control; 3358 msg->msg_controllen = controllen; 3359 } 3360 3361 /* 3362 * Set msg_flags to MSG_EOR based on 3363 * DATA_flag and MOREDATA. 3364 */ 3365 mutex_enter(&so->so_lock); 3366 so->so_state &= ~SS_SAVEDEOR; 3367 if (!(tpr->data_ind.MORE_flag & 1)) { 3368 if (!(rval.r_val1 & MOREDATA)) 3369 msg->msg_flags |= MSG_EOR; 3370 else 3371 so->so_state |= SS_SAVEDEOR; 3372 } 3373 freemsg(mp); 3374 /* 3375 * If some data was received (i.e. not EOF) and the 3376 * read/recv* has not been satisfied wait for some more. 3377 * Not possible to wait if control info was received. 3378 */ 3379 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3380 controllen == 0 && 3381 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3382 mutex_exit(&so->so_lock); 3383 first = 0; 3384 pflag = opflag | MSG_NOMARK; 3385 goto retry; 3386 } 3387 so_unlock_read(so); /* Clear SOREADLOCKED */ 3388 mutex_exit(&so->so_lock); 3389 return (0); 3390 } 3391 case T_EXDATA_IND: { 3392 dprintso(so, 1, 3393 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3394 "state %s\n", 3395 so->so_oobsigcnt, so->so_oobcnt, 3396 saved_resid - uiop->uio_resid, 3397 pr_state(so->so_state, so->so_mode))); 3398 /* 3399 * kstrgetmsg handles MSGMARK so there is nothing to 3400 * inspect in the T_EXDATA_IND. 3401 * strsock_proto makes the stream head queue the T_EXDATA_IND 3402 * as a separate message with no M_DATA component. Furthermore, 3403 * the stream head does not consolidate M_DATA messages onto 3404 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3405 * remains a message by itself. This is needed since MSGMARK 3406 * marks both the whole message as well as the last byte 3407 * of the message. 3408 */ 3409 freemsg(mp); 3410 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3411 if (flags & MSG_PEEK) { 3412 /* 3413 * Even though we are peeking we consume the 3414 * T_EXDATA_IND thereby moving the mark information 3415 * to SS_RCVATMARK. Then the oob code below will 3416 * retry the peeking kstrgetmsg. 3417 * Note that the stream head read queue is 3418 * never flushed without holding SOREADLOCKED 3419 * thus the T_EXDATA_IND can not disappear 3420 * underneath us. 3421 */ 3422 dprintso(so, 1, 3423 ("sotpi_recvmsg: consume EXDATA_IND " 3424 "counts %d/%d state %s\n", 3425 so->so_oobsigcnt, 3426 so->so_oobcnt, 3427 pr_state(so->so_state, so->so_mode))); 3428 3429 pflag = MSG_ANY | MSG_DELAYERROR; 3430 if (so->so_mode & SM_ATOMIC) 3431 pflag |= MSG_DISCARDTAIL; 3432 3433 pri = 0; 3434 mp = NULL; 3435 3436 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3437 &pri, &pflag, (clock_t)-1, &rval); 3438 ASSERT(uiop->uio_resid == saved_resid); 3439 3440 if (error) { 3441 #ifdef SOCK_DEBUG 3442 if (error != EWOULDBLOCK && error != EINTR) { 3443 eprintsoline(so, error); 3444 } 3445 #endif /* SOCK_DEBUG */ 3446 mutex_enter(&so->so_lock); 3447 so_unlock_read(so); /* Clear SOREADLOCKED */ 3448 mutex_exit(&so->so_lock); 3449 return (error); 3450 } 3451 ASSERT(mp); 3452 tpr = (union T_primitives *)mp->b_rptr; 3453 ASSERT(tpr->type == T_EXDATA_IND); 3454 freemsg(mp); 3455 } /* end "if (flags & MSG_PEEK)" */ 3456 3457 /* 3458 * Decrement the number of queued and pending oob. 3459 * 3460 * SS_RCVATMARK is cleared when we read past a mark. 3461 * SS_HAVEOOBDATA is cleared when we've read past the 3462 * last mark. 3463 * SS_OOBPEND is cleared if we've read past the last 3464 * mark and no (new) SIGURG has been posted. 3465 */ 3466 mutex_enter(&so->so_lock); 3467 ASSERT(so_verify_oobstate(so)); 3468 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 3469 ASSERT(so->so_oobsigcnt > 0); 3470 so->so_oobsigcnt--; 3471 ASSERT(so->so_oobcnt > 0); 3472 so->so_oobcnt--; 3473 /* 3474 * Since the T_EXDATA_IND has been removed from the stream 3475 * head, but we have not read data past the mark, 3476 * sockfs needs to track that the socket is still at the mark. 3477 * 3478 * Since no data was received call kstrgetmsg again to wait 3479 * for data. 3480 */ 3481 so->so_state |= SS_RCVATMARK; 3482 mutex_exit(&so->so_lock); 3483 dprintso(so, 1, 3484 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3485 so->so_oobsigcnt, so->so_oobcnt, 3486 pr_state(so->so_state, so->so_mode))); 3487 pflag = opflag; 3488 goto retry; 3489 } 3490 default: 3491 ASSERT(0); 3492 freemsg(mp); 3493 error = EPROTO; 3494 eprintsoline(so, error); 3495 goto err; 3496 } 3497 /* NOTREACHED */ 3498 err: 3499 mutex_enter(&so->so_lock); 3500 so_unlock_read(so); /* Clear SOREADLOCKED */ 3501 mutex_exit(&so->so_lock); 3502 return (error); 3503 } 3504 3505 /* 3506 * Sending data with options on a datagram socket. 3507 * Assumes caller has verified that SS_ISBOUND etc. are set. 3508 */ 3509 static int 3510 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3511 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3512 { 3513 struct T_unitdata_req tudr; 3514 mblk_t *mp; 3515 int error; 3516 void *addr; 3517 socklen_t addrlen; 3518 void *src; 3519 socklen_t srclen; 3520 ssize_t len; 3521 int size; 3522 struct T_opthdr toh; 3523 struct fdbuf *fdbuf; 3524 t_uscalar_t optlen; 3525 void *fds; 3526 int fdlen; 3527 3528 ASSERT(name && namelen); 3529 ASSERT(control && controllen); 3530 3531 len = uiop->uio_resid; 3532 if (len > (ssize_t)so->so_tidu_size) { 3533 return (EMSGSIZE); 3534 } 3535 3536 /* 3537 * For AF_UNIX the destination address is translated to an internal 3538 * name and the source address is passed as an option. 3539 * Also, file descriptors are passed as file pointers in an 3540 * option. 3541 */ 3542 3543 /* 3544 * Length and family checks. 3545 */ 3546 error = so_addr_verify(so, name, namelen); 3547 if (error) { 3548 eprintsoline(so, error); 3549 return (error); 3550 } 3551 if (so->so_family == AF_UNIX) { 3552 if (so->so_state & SS_FADDR_NOXLATE) { 3553 /* 3554 * Already have a transport internal address. Do not 3555 * pass any (transport internal) source address. 3556 */ 3557 addr = name; 3558 addrlen = namelen; 3559 src = NULL; 3560 srclen = 0; 3561 } else { 3562 /* 3563 * Pass the sockaddr_un source address as an option 3564 * and translate the remote address. 3565 * 3566 * Note that this code does not prevent so_laddr_sa 3567 * from changing while it is being used. Thus 3568 * if an unbind+bind occurs concurrently with this 3569 * send the peer might see a partially new and a 3570 * partially old "from" address. 3571 */ 3572 src = so->so_laddr_sa; 3573 srclen = (t_uscalar_t)so->so_laddr_len; 3574 dprintso(so, 1, 3575 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3576 srclen, src)); 3577 error = so_ux_addr_xlate(so, name, namelen, 3578 (flags & MSG_XPG4_2), 3579 &addr, &addrlen); 3580 if (error) { 3581 eprintsoline(so, error); 3582 return (error); 3583 } 3584 } 3585 } else { 3586 addr = name; 3587 addrlen = namelen; 3588 src = NULL; 3589 srclen = 0; 3590 } 3591 optlen = so_optlen(control, controllen, 3592 !(flags & MSG_XPG4_2)); 3593 tudr.PRIM_type = T_UNITDATA_REQ; 3594 tudr.DEST_length = addrlen; 3595 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3596 if (srclen != 0) 3597 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3598 _TPI_ALIGN_TOPT(srclen)); 3599 else 3600 tudr.OPT_length = optlen; 3601 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3602 _TPI_ALIGN_TOPT(addrlen)); 3603 3604 size = tudr.OPT_offset + tudr.OPT_length; 3605 3606 /* 3607 * File descriptors only when SM_FDPASSING set. 3608 */ 3609 error = so_getfdopt(control, controllen, 3610 !(flags & MSG_XPG4_2), &fds, &fdlen); 3611 if (error) 3612 return (error); 3613 if (fdlen != -1) { 3614 if (!(so->so_mode & SM_FDPASSING)) 3615 return (EOPNOTSUPP); 3616 3617 error = fdbuf_create(fds, fdlen, &fdbuf); 3618 if (error) 3619 return (error); 3620 mp = fdbuf_allocmsg(size, fdbuf); 3621 } else { 3622 mp = soallocproto(size, _ALLOC_INTR); 3623 if (mp == NULL) { 3624 /* 3625 * Caught a signal waiting for memory. 3626 * Let send* return EINTR. 3627 */ 3628 return (EINTR); 3629 } 3630 } 3631 soappendmsg(mp, &tudr, sizeof (tudr)); 3632 soappendmsg(mp, addr, addrlen); 3633 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3634 3635 if (fdlen != -1) { 3636 ASSERT(fdbuf != NULL); 3637 toh.level = SOL_SOCKET; 3638 toh.name = SO_FILEP; 3639 toh.len = fdbuf->fd_size + 3640 (t_uscalar_t)sizeof (struct T_opthdr); 3641 toh.status = 0; 3642 soappendmsg(mp, &toh, sizeof (toh)); 3643 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3644 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3645 } 3646 if (srclen != 0) { 3647 /* 3648 * There is a AF_UNIX sockaddr_un to include as a source 3649 * address option. 3650 */ 3651 toh.level = SOL_SOCKET; 3652 toh.name = SO_SRCADDR; 3653 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3654 toh.status = 0; 3655 soappendmsg(mp, &toh, sizeof (toh)); 3656 soappendmsg(mp, src, srclen); 3657 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3658 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3659 } 3660 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3661 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3662 /* At most 3 bytes left in the message */ 3663 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3664 ASSERT(MBLKL(mp) <= (ssize_t)size); 3665 3666 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3667 #ifdef C2_AUDIT 3668 if (audit_active) 3669 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3670 #endif /* C2_AUDIT */ 3671 3672 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3673 #ifdef SOCK_DEBUG 3674 if (error) { 3675 eprintsoline(so, error); 3676 } 3677 #endif /* SOCK_DEBUG */ 3678 return (error); 3679 } 3680 3681 /* 3682 * Sending data with options on a connected stream socket. 3683 * Assumes caller has verified that SS_ISCONNECTED is set. 3684 */ 3685 static int 3686 sosend_svccmsg(struct sonode *so, 3687 struct uio *uiop, 3688 int more, 3689 void *control, 3690 t_uscalar_t controllen, 3691 int flags) 3692 { 3693 struct T_optdata_req tdr; 3694 mblk_t *mp; 3695 int error; 3696 ssize_t iosize; 3697 int first = 1; 3698 int size; 3699 struct fdbuf *fdbuf; 3700 t_uscalar_t optlen; 3701 void *fds; 3702 int fdlen; 3703 struct T_opthdr toh; 3704 3705 dprintso(so, 1, 3706 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3707 3708 /* 3709 * Has to be bound and connected. However, since no locks are 3710 * held the state could have changed after sotpi_sendmsg checked it 3711 * thus it is not possible to ASSERT on the state. 3712 */ 3713 3714 /* Options on connection-oriented only when SM_OPTDATA set. */ 3715 if (!(so->so_mode & SM_OPTDATA)) 3716 return (EOPNOTSUPP); 3717 3718 do { 3719 /* 3720 * Set the MORE flag if uio_resid does not fit in this 3721 * message or if the caller passed in "more". 3722 * Error for transports with zero tidu_size. 3723 */ 3724 tdr.PRIM_type = T_OPTDATA_REQ; 3725 iosize = so->so_tidu_size; 3726 if (iosize <= 0) 3727 return (EMSGSIZE); 3728 if (uiop->uio_resid > iosize) { 3729 tdr.DATA_flag = 1; 3730 } else { 3731 if (more) 3732 tdr.DATA_flag = 1; 3733 else 3734 tdr.DATA_flag = 0; 3735 iosize = uiop->uio_resid; 3736 } 3737 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3738 tdr.DATA_flag, iosize)); 3739 3740 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3741 tdr.OPT_length = optlen; 3742 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3743 3744 size = (int)sizeof (tdr) + optlen; 3745 /* 3746 * File descriptors only when SM_FDPASSING set. 3747 */ 3748 error = so_getfdopt(control, controllen, 3749 !(flags & MSG_XPG4_2), &fds, &fdlen); 3750 if (error) 3751 return (error); 3752 if (fdlen != -1) { 3753 if (!(so->so_mode & SM_FDPASSING)) 3754 return (EOPNOTSUPP); 3755 3756 error = fdbuf_create(fds, fdlen, &fdbuf); 3757 if (error) 3758 return (error); 3759 mp = fdbuf_allocmsg(size, fdbuf); 3760 } else { 3761 mp = soallocproto(size, _ALLOC_INTR); 3762 if (mp == NULL) { 3763 /* 3764 * Caught a signal waiting for memory. 3765 * Let send* return EINTR. 3766 */ 3767 return (first ? EINTR : 0); 3768 } 3769 } 3770 soappendmsg(mp, &tdr, sizeof (tdr)); 3771 3772 if (fdlen != -1) { 3773 ASSERT(fdbuf != NULL); 3774 toh.level = SOL_SOCKET; 3775 toh.name = SO_FILEP; 3776 toh.len = fdbuf->fd_size + 3777 (t_uscalar_t)sizeof (struct T_opthdr); 3778 toh.status = 0; 3779 soappendmsg(mp, &toh, sizeof (toh)); 3780 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3781 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3782 } 3783 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3784 /* At most 3 bytes left in the message */ 3785 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3786 ASSERT(MBLKL(mp) <= (ssize_t)size); 3787 3788 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3789 3790 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3791 0, MSG_BAND, 0); 3792 if (error) { 3793 if (!first && error == EWOULDBLOCK) 3794 return (0); 3795 eprintsoline(so, error); 3796 return (error); 3797 } 3798 control = NULL; 3799 first = 0; 3800 if (uiop->uio_resid > 0) { 3801 /* 3802 * Recheck for fatal errors. Fail write even though 3803 * some data have been written. This is consistent 3804 * with strwrite semantics and BSD sockets semantics. 3805 */ 3806 if (so->so_state & SS_CANTSENDMORE) { 3807 tsignal(curthread, SIGPIPE); 3808 eprintsoline(so, error); 3809 return (EPIPE); 3810 } 3811 if (so->so_error != 0) { 3812 mutex_enter(&so->so_lock); 3813 error = sogeterr(so); 3814 mutex_exit(&so->so_lock); 3815 if (error != 0) { 3816 eprintsoline(so, error); 3817 return (error); 3818 } 3819 } 3820 } 3821 } while (uiop->uio_resid > 0); 3822 return (0); 3823 } 3824 3825 /* 3826 * Sending data on a datagram socket. 3827 * Assumes caller has verified that SS_ISBOUND etc. are set. 3828 * 3829 * For AF_UNIX the destination address is translated to an internal 3830 * name and the source address is passed as an option. 3831 */ 3832 int 3833 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3834 struct uio *uiop, int flags) 3835 { 3836 struct T_unitdata_req tudr; 3837 mblk_t *mp; 3838 int error; 3839 void *addr; 3840 socklen_t addrlen; 3841 void *src; 3842 socklen_t srclen; 3843 ssize_t len; 3844 3845 ASSERT(name != NULL && namelen != 0); 3846 3847 len = uiop->uio_resid; 3848 if (len > so->so_tidu_size) { 3849 error = EMSGSIZE; 3850 goto done; 3851 } 3852 3853 /* Length and family checks */ 3854 error = so_addr_verify(so, name, namelen); 3855 if (error != 0) 3856 goto done; 3857 3858 if (so->so_state & SS_DIRECT) 3859 return (sodgram_direct(so, name, namelen, uiop, flags)); 3860 3861 if (so->so_family == AF_UNIX) { 3862 if (so->so_state & SS_FADDR_NOXLATE) { 3863 /* 3864 * Already have a transport internal address. Do not 3865 * pass any (transport internal) source address. 3866 */ 3867 addr = name; 3868 addrlen = namelen; 3869 src = NULL; 3870 srclen = 0; 3871 } else { 3872 /* 3873 * Pass the sockaddr_un source address as an option 3874 * and translate the remote address. 3875 * 3876 * Note that this code does not prevent so_laddr_sa 3877 * from changing while it is being used. Thus 3878 * if an unbind+bind occurs concurrently with this 3879 * send the peer might see a partially new and a 3880 * partially old "from" address. 3881 */ 3882 src = so->so_laddr_sa; 3883 srclen = (socklen_t)so->so_laddr_len; 3884 dprintso(so, 1, 3885 ("sosend_dgram UNIX: srclen %d, src %p\n", 3886 srclen, src)); 3887 error = so_ux_addr_xlate(so, name, namelen, 3888 (flags & MSG_XPG4_2), 3889 &addr, &addrlen); 3890 if (error) { 3891 eprintsoline(so, error); 3892 goto done; 3893 } 3894 } 3895 } else { 3896 addr = name; 3897 addrlen = namelen; 3898 src = NULL; 3899 srclen = 0; 3900 } 3901 tudr.PRIM_type = T_UNITDATA_REQ; 3902 tudr.DEST_length = addrlen; 3903 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3904 if (srclen == 0) { 3905 tudr.OPT_length = 0; 3906 tudr.OPT_offset = 0; 3907 3908 mp = soallocproto2(&tudr, sizeof (tudr), 3909 addr, addrlen, 0, _ALLOC_INTR); 3910 if (mp == NULL) { 3911 /* 3912 * Caught a signal waiting for memory. 3913 * Let send* return EINTR. 3914 */ 3915 error = EINTR; 3916 goto done; 3917 } 3918 } else { 3919 /* 3920 * There is a AF_UNIX sockaddr_un to include as a source 3921 * address option. 3922 */ 3923 struct T_opthdr toh; 3924 ssize_t size; 3925 3926 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3927 _TPI_ALIGN_TOPT(srclen)); 3928 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3929 _TPI_ALIGN_TOPT(addrlen)); 3930 3931 toh.level = SOL_SOCKET; 3932 toh.name = SO_SRCADDR; 3933 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3934 toh.status = 0; 3935 3936 size = tudr.OPT_offset + tudr.OPT_length; 3937 mp = soallocproto2(&tudr, sizeof (tudr), 3938 addr, addrlen, size, _ALLOC_INTR); 3939 if (mp == NULL) { 3940 /* 3941 * Caught a signal waiting for memory. 3942 * Let send* return EINTR. 3943 */ 3944 error = EINTR; 3945 goto done; 3946 } 3947 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3948 soappendmsg(mp, &toh, sizeof (toh)); 3949 soappendmsg(mp, src, srclen); 3950 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3951 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3952 } 3953 3954 #ifdef C2_AUDIT 3955 if (audit_active) 3956 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3957 #endif /* C2_AUDIT */ 3958 3959 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3960 done: 3961 #ifdef SOCK_DEBUG 3962 if (error) { 3963 eprintsoline(so, error); 3964 } 3965 #endif /* SOCK_DEBUG */ 3966 return (error); 3967 } 3968 3969 /* 3970 * Sending data on a connected stream socket. 3971 * Assumes caller has verified that SS_ISCONNECTED is set. 3972 */ 3973 int 3974 sosend_svc(struct sonode *so, 3975 struct uio *uiop, 3976 t_scalar_t prim, 3977 int more, 3978 int sflag) 3979 { 3980 struct T_data_req tdr; 3981 mblk_t *mp; 3982 int error; 3983 ssize_t iosize; 3984 int first = 1; 3985 3986 dprintso(so, 1, 3987 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 3988 so, uiop->uio_resid, prim, sflag)); 3989 3990 /* 3991 * Has to be bound and connected. However, since no locks are 3992 * held the state could have changed after sotpi_sendmsg checked it 3993 * thus it is not possible to ASSERT on the state. 3994 */ 3995 3996 do { 3997 /* 3998 * Set the MORE flag if uio_resid does not fit in this 3999 * message or if the caller passed in "more". 4000 * Error for transports with zero tidu_size. 4001 */ 4002 tdr.PRIM_type = prim; 4003 iosize = so->so_tidu_size; 4004 if (iosize <= 0) 4005 return (EMSGSIZE); 4006 if (uiop->uio_resid > iosize) { 4007 tdr.MORE_flag = 1; 4008 } else { 4009 if (more) 4010 tdr.MORE_flag = 1; 4011 else 4012 tdr.MORE_flag = 0; 4013 iosize = uiop->uio_resid; 4014 } 4015 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4016 prim, tdr.MORE_flag, iosize)); 4017 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 4018 if (mp == NULL) { 4019 /* 4020 * Caught a signal waiting for memory. 4021 * Let send* return EINTR. 4022 */ 4023 if (first) 4024 return (EINTR); 4025 else 4026 return (0); 4027 } 4028 4029 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4030 0, sflag | MSG_BAND, 0); 4031 if (error) { 4032 if (!first && error == EWOULDBLOCK) 4033 return (0); 4034 eprintsoline(so, error); 4035 return (error); 4036 } 4037 first = 0; 4038 if (uiop->uio_resid > 0) { 4039 /* 4040 * Recheck for fatal errors. Fail write even though 4041 * some data have been written. This is consistent 4042 * with strwrite semantics and BSD sockets semantics. 4043 */ 4044 if (so->so_state & SS_CANTSENDMORE) { 4045 tsignal(curthread, SIGPIPE); 4046 eprintsoline(so, error); 4047 return (EPIPE); 4048 } 4049 if (so->so_error != 0) { 4050 mutex_enter(&so->so_lock); 4051 error = sogeterr(so); 4052 mutex_exit(&so->so_lock); 4053 if (error != 0) { 4054 eprintsoline(so, error); 4055 return (error); 4056 } 4057 } 4058 } 4059 } while (uiop->uio_resid > 0); 4060 return (0); 4061 } 4062 4063 /* 4064 * Check the state for errors and call the appropriate send function. 4065 * 4066 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4067 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4068 * after sending the message. 4069 */ 4070 static int 4071 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 4072 { 4073 int so_state; 4074 int so_mode; 4075 int error; 4076 struct sockaddr *name; 4077 t_uscalar_t namelen; 4078 int dontroute; 4079 int flags; 4080 4081 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4082 so, msg, msg->msg_flags, 4083 pr_state(so->so_state, so->so_mode), so->so_error)); 4084 4085 mutex_enter(&so->so_lock); 4086 so_state = so->so_state; 4087 4088 if (so_state & SS_CANTSENDMORE) { 4089 mutex_exit(&so->so_lock); 4090 tsignal(curthread, SIGPIPE); 4091 return (EPIPE); 4092 } 4093 4094 if (so->so_error != 0) { 4095 error = sogeterr(so); 4096 if (error != 0) { 4097 mutex_exit(&so->so_lock); 4098 return (error); 4099 } 4100 } 4101 4102 name = (struct sockaddr *)msg->msg_name; 4103 namelen = msg->msg_namelen; 4104 4105 so_mode = so->so_mode; 4106 4107 if (name == NULL) { 4108 if (!(so_state & SS_ISCONNECTED)) { 4109 mutex_exit(&so->so_lock); 4110 if (so_mode & SM_CONNREQUIRED) 4111 return (ENOTCONN); 4112 else 4113 return (EDESTADDRREQ); 4114 } 4115 if (so_mode & SM_CONNREQUIRED) { 4116 name = NULL; 4117 namelen = 0; 4118 } else { 4119 /* 4120 * Note that this code does not prevent so_faddr_sa 4121 * from changing while it is being used. Thus 4122 * if an "unconnect"+connect occurs concurrently with 4123 * this send the datagram might be delivered to a 4124 * garbaled address. 4125 */ 4126 ASSERT(so->so_faddr_sa); 4127 name = so->so_faddr_sa; 4128 namelen = (t_uscalar_t)so->so_faddr_len; 4129 } 4130 } else { 4131 if (!(so_state & SS_ISCONNECTED) && 4132 (so_mode & SM_CONNREQUIRED)) { 4133 /* Required but not connected */ 4134 mutex_exit(&so->so_lock); 4135 return (ENOTCONN); 4136 } 4137 /* 4138 * Ignore the address on connection-oriented sockets. 4139 * Just like BSD this code does not generate an error for 4140 * TCP (a CONNREQUIRED socket) when sending to an address 4141 * passed in with sendto/sendmsg. Instead the data is 4142 * delivered on the connection as if no address had been 4143 * supplied. 4144 */ 4145 if ((so_state & SS_ISCONNECTED) && 4146 !(so_mode & SM_CONNREQUIRED)) { 4147 mutex_exit(&so->so_lock); 4148 return (EISCONN); 4149 } 4150 if (!(so_state & SS_ISBOUND)) { 4151 so_lock_single(so); /* Set SOLOCKED */ 4152 error = sotpi_bind(so, NULL, 0, 4153 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 4154 so_unlock_single(so, SOLOCKED); 4155 if (error) { 4156 mutex_exit(&so->so_lock); 4157 eprintsoline(so, error); 4158 return (error); 4159 } 4160 } 4161 /* 4162 * Handle delayed datagram errors. These are only queued 4163 * when the application sets SO_DGRAM_ERRIND. 4164 * Return the error if we are sending to the address 4165 * that was returned in the last T_UDERROR_IND. 4166 * If sending to some other address discard the delayed 4167 * error indication. 4168 */ 4169 if (so->so_delayed_error) { 4170 struct T_uderror_ind *tudi; 4171 void *addr; 4172 t_uscalar_t addrlen; 4173 boolean_t match = B_FALSE; 4174 4175 ASSERT(so->so_eaddr_mp); 4176 error = so->so_delayed_error; 4177 so->so_delayed_error = 0; 4178 tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; 4179 addrlen = tudi->DEST_length; 4180 addr = sogetoff(so->so_eaddr_mp, 4181 tudi->DEST_offset, 4182 addrlen, 1); 4183 ASSERT(addr); /* Checked by strsock_proto */ 4184 switch (so->so_family) { 4185 case AF_INET: { 4186 /* Compare just IP address and port */ 4187 sin_t *sin1 = (sin_t *)name; 4188 sin_t *sin2 = (sin_t *)addr; 4189 4190 if (addrlen == sizeof (sin_t) && 4191 namelen == addrlen && 4192 sin1->sin_port == sin2->sin_port && 4193 sin1->sin_addr.s_addr == 4194 sin2->sin_addr.s_addr) 4195 match = B_TRUE; 4196 break; 4197 } 4198 case AF_INET6: { 4199 /* Compare just IP address and port. Not flow */ 4200 sin6_t *sin1 = (sin6_t *)name; 4201 sin6_t *sin2 = (sin6_t *)addr; 4202 4203 if (addrlen == sizeof (sin6_t) && 4204 namelen == addrlen && 4205 sin1->sin6_port == sin2->sin6_port && 4206 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4207 &sin2->sin6_addr)) 4208 match = B_TRUE; 4209 break; 4210 } 4211 case AF_UNIX: 4212 default: 4213 if (namelen == addrlen && 4214 bcmp(name, addr, namelen) == 0) 4215 match = B_TRUE; 4216 } 4217 if (match) { 4218 freemsg(so->so_eaddr_mp); 4219 so->so_eaddr_mp = NULL; 4220 mutex_exit(&so->so_lock); 4221 #ifdef DEBUG 4222 dprintso(so, 0, 4223 ("sockfs delayed error %d for %s\n", 4224 error, 4225 pr_addr(so->so_family, name, namelen))); 4226 #endif /* DEBUG */ 4227 return (error); 4228 } 4229 freemsg(so->so_eaddr_mp); 4230 so->so_eaddr_mp = NULL; 4231 } 4232 } 4233 mutex_exit(&so->so_lock); 4234 4235 flags = msg->msg_flags; 4236 dontroute = 0; 4237 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4238 uint32_t val; 4239 4240 val = 1; 4241 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4242 &val, (t_uscalar_t)sizeof (val)); 4243 if (error) 4244 return (error); 4245 dontroute = 1; 4246 } 4247 4248 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4249 error = EOPNOTSUPP; 4250 goto done; 4251 } 4252 if (msg->msg_controllen != 0) { 4253 if (!(so_mode & SM_CONNREQUIRED)) { 4254 error = sosend_dgramcmsg(so, name, namelen, uiop, 4255 msg->msg_control, msg->msg_controllen, flags); 4256 } else { 4257 if (flags & MSG_OOB) { 4258 /* Can't generate T_EXDATA_REQ with options */ 4259 error = EOPNOTSUPP; 4260 goto done; 4261 } 4262 error = sosend_svccmsg(so, uiop, 4263 !(flags & MSG_EOR), 4264 msg->msg_control, msg->msg_controllen, 4265 flags); 4266 } 4267 goto done; 4268 } 4269 4270 if (!(so_mode & SM_CONNREQUIRED)) { 4271 /* 4272 * If there is no SO_DONTROUTE to turn off return immediately 4273 * from send_dgram. This can allow tail-call optimizations. 4274 */ 4275 if (!dontroute) { 4276 return (sosend_dgram(so, name, namelen, uiop, flags)); 4277 } 4278 error = sosend_dgram(so, name, namelen, uiop, flags); 4279 } else { 4280 t_scalar_t prim; 4281 int sflag; 4282 4283 /* Ignore msg_name in the connected state */ 4284 if (flags & MSG_OOB) { 4285 prim = T_EXDATA_REQ; 4286 /* 4287 * Send down T_EXDATA_REQ even if there is flow 4288 * control for data. 4289 */ 4290 sflag = MSG_IGNFLOW; 4291 } else { 4292 if (so_mode & SM_BYTESTREAM) { 4293 /* Byte stream transport - use write */ 4294 4295 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4296 /* 4297 * If there is no SO_DONTROUTE to turn off, 4298 * SS_DIRECT is on, and there is no flow 4299 * control, we can take the fast path. 4300 */ 4301 if (!dontroute && 4302 (so_state & SS_DIRECT) && 4303 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4304 return (sostream_direct(so, uiop, 4305 NULL, CRED())); 4306 } 4307 error = strwrite(SOTOV(so), uiop, CRED()); 4308 goto done; 4309 } 4310 prim = T_DATA_REQ; 4311 sflag = 0; 4312 } 4313 /* 4314 * If there is no SO_DONTROUTE to turn off return immediately 4315 * from sosend_svc. This can allow tail-call optimizations. 4316 */ 4317 if (!dontroute) 4318 return (sosend_svc(so, uiop, prim, 4319 !(flags & MSG_EOR), sflag)); 4320 error = sosend_svc(so, uiop, prim, 4321 !(flags & MSG_EOR), sflag); 4322 } 4323 ASSERT(dontroute); 4324 done: 4325 if (dontroute) { 4326 uint32_t val; 4327 4328 val = 0; 4329 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4330 &val, (t_uscalar_t)sizeof (val)); 4331 } 4332 return (error); 4333 } 4334 4335 /* 4336 * Sending data on a datagram socket. 4337 * Assumes caller has verified that SS_ISBOUND etc. are set. 4338 */ 4339 /* ARGSUSED */ 4340 static int 4341 sodgram_direct(struct sonode *so, struct sockaddr *name, 4342 socklen_t namelen, struct uio *uiop, int flags) 4343 { 4344 struct T_unitdata_req tudr; 4345 mblk_t *mp = NULL; 4346 int error = 0; 4347 void *addr; 4348 socklen_t addrlen; 4349 ssize_t len; 4350 struct stdata *stp = SOTOV(so)->v_stream; 4351 int so_state; 4352 queue_t *udp_wq; 4353 boolean_t connected; 4354 mblk_t *mpdata = NULL; 4355 4356 ASSERT(name != NULL && namelen != 0); 4357 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4358 ASSERT(!(so->so_mode & SM_EXDATA)); 4359 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4360 ASSERT(SOTOV(so)->v_type == VSOCK); 4361 4362 /* Caller checked for proper length */ 4363 len = uiop->uio_resid; 4364 ASSERT(len <= so->so_tidu_size); 4365 4366 /* Length and family checks have been done by caller */ 4367 ASSERT(name->sa_family == so->so_family); 4368 ASSERT(so->so_family == AF_INET || 4369 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4370 ASSERT(so->so_family == AF_INET6 || 4371 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4372 4373 addr = name; 4374 addrlen = namelen; 4375 4376 if (stp->sd_sidp != NULL && 4377 (error = straccess(stp, JCWRITE)) != 0) 4378 goto done; 4379 4380 so_state = so->so_state; 4381 4382 connected = so_state & SS_ISCONNECTED; 4383 if (!connected) { 4384 tudr.PRIM_type = T_UNITDATA_REQ; 4385 tudr.DEST_length = addrlen; 4386 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4387 tudr.OPT_length = 0; 4388 tudr.OPT_offset = 0; 4389 4390 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4391 _ALLOC_INTR); 4392 if (mp == NULL) { 4393 /* 4394 * Caught a signal waiting for memory. 4395 * Let send* return EINTR. 4396 */ 4397 error = EINTR; 4398 goto done; 4399 } 4400 } 4401 4402 /* 4403 * For UDP we don't break up the copyin into smaller pieces 4404 * as in the TCP case. That means if ENOMEM is returned by 4405 * mcopyinuio() then the uio vector has not been modified at 4406 * all and we fallback to either strwrite() or kstrputmsg() 4407 * below. Note also that we never generate priority messages 4408 * from here. 4409 */ 4410 udp_wq = stp->sd_wrq->q_next; 4411 if (canput(udp_wq) && 4412 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4413 ASSERT(DB_TYPE(mpdata) == M_DATA); 4414 ASSERT(uiop->uio_resid == 0); 4415 if (!connected) 4416 linkb(mp, mpdata); 4417 else 4418 mp = mpdata; 4419 #ifdef C2_AUDIT 4420 if (audit_active) 4421 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4422 #endif /* C2_AUDIT */ 4423 4424 udp_wput(udp_wq, mp); 4425 return (0); 4426 } 4427 4428 ASSERT(mpdata == NULL); 4429 if (error != 0 && error != ENOMEM) { 4430 freemsg(mp); 4431 return (error); 4432 } 4433 4434 /* 4435 * For connected, let strwrite() handle the blocking case. 4436 * Otherwise we fall thru and use kstrputmsg(). 4437 */ 4438 if (connected) 4439 return (strwrite(SOTOV(so), uiop, CRED())); 4440 4441 #ifdef C2_AUDIT 4442 if (audit_active) 4443 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4444 #endif /* C2_AUDIT */ 4445 4446 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4447 done: 4448 #ifdef SOCK_DEBUG 4449 if (error != 0) { 4450 eprintsoline(so, error); 4451 } 4452 #endif /* SOCK_DEBUG */ 4453 return (error); 4454 } 4455 4456 int 4457 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4458 { 4459 struct stdata *stp = SOTOV(so)->v_stream; 4460 ssize_t iosize, rmax, maxblk; 4461 queue_t *tcp_wq = stp->sd_wrq->q_next; 4462 mblk_t *newmp; 4463 int error = 0, wflag = 0; 4464 4465 ASSERT(so->so_mode & SM_BYTESTREAM); 4466 ASSERT(SOTOV(so)->v_type == VSOCK); 4467 4468 if (stp->sd_sidp != NULL && 4469 (error = straccess(stp, JCWRITE)) != 0) 4470 return (error); 4471 4472 if (uiop == NULL) { 4473 /* 4474 * kstrwritemp() should have checked sd_flag and 4475 * flow-control before coming here. If we end up 4476 * here it means that we can simply pass down the 4477 * data to tcp. 4478 */ 4479 ASSERT(mp != NULL); 4480 if (stp->sd_wputdatafunc != NULL) { 4481 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4482 NULL, NULL, NULL); 4483 if (newmp == NULL) { 4484 /* The caller will free mp */ 4485 return (ECOMM); 4486 } 4487 mp = newmp; 4488 } 4489 tcp_wput(tcp_wq, mp); 4490 return (0); 4491 } 4492 4493 /* Fallback to strwrite() to do proper error handling */ 4494 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4495 return (strwrite(SOTOV(so), uiop, cr)); 4496 4497 rmax = stp->sd_qn_maxpsz; 4498 ASSERT(rmax >= 0 || rmax == INFPSZ); 4499 if (rmax == 0 || uiop->uio_resid <= 0) 4500 return (0); 4501 4502 if (rmax == INFPSZ) 4503 rmax = uiop->uio_resid; 4504 4505 maxblk = stp->sd_maxblk; 4506 4507 for (;;) { 4508 iosize = MIN(uiop->uio_resid, rmax); 4509 4510 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4511 if (mp == NULL) { 4512 /* 4513 * Fallback to strwrite() for ENOMEM; if this 4514 * is our first time in this routine and the uio 4515 * vector has not been modified, we will end up 4516 * calling strwrite() without any flag set. 4517 */ 4518 if (error == ENOMEM) 4519 goto slow_send; 4520 else 4521 return (error); 4522 } 4523 ASSERT(uiop->uio_resid >= 0); 4524 /* 4525 * If mp is non-NULL and ENOMEM is set, it means that 4526 * mcopyinuio() was able to break down some of the user 4527 * data into one or more mblks. Send the partial data 4528 * to tcp and let the rest be handled in strwrite(). 4529 */ 4530 ASSERT(error == 0 || error == ENOMEM); 4531 if (stp->sd_wputdatafunc != NULL) { 4532 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4533 NULL, NULL, NULL); 4534 if (newmp == NULL) { 4535 /* The caller will free mp */ 4536 return (ECOMM); 4537 } 4538 mp = newmp; 4539 } 4540 tcp_wput(tcp_wq, mp); 4541 4542 wflag |= NOINTR; 4543 4544 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4545 ASSERT(error == 0); 4546 break; 4547 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4548 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4549 slow_send: 4550 /* 4551 * We were able to send down partial data using 4552 * the direct call interface, but are now relying 4553 * on strwrite() to handle the non-fastpath cases. 4554 * If the socket is blocking we will sleep in 4555 * strwaitq() until write is permitted, otherwise, 4556 * we will need to return the amount of bytes 4557 * written so far back to the app. This is the 4558 * reason why we pass NOINTR flag to strwrite() 4559 * for non-blocking socket, because we don't want 4560 * to return EAGAIN when portion of the user data 4561 * has actually been sent down. 4562 */ 4563 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4564 } 4565 } 4566 return (0); 4567 } 4568 4569 /* 4570 * Update so_faddr by asking the transport (unless AF_UNIX). 4571 */ 4572 int 4573 sotpi_getpeername(struct sonode *so) 4574 { 4575 struct strbuf strbuf; 4576 int error = 0, res; 4577 void *addr; 4578 t_uscalar_t addrlen; 4579 k_sigset_t smask; 4580 4581 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4582 so, pr_state(so->so_state, so->so_mode))); 4583 4584 mutex_enter(&so->so_lock); 4585 so_lock_single(so); /* Set SOLOCKED */ 4586 if (!(so->so_state & SS_ISCONNECTED)) { 4587 error = ENOTCONN; 4588 goto done; 4589 } 4590 /* Added this check for X/Open */ 4591 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4592 error = EINVAL; 4593 if (xnet_check_print) { 4594 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4595 } 4596 goto done; 4597 } 4598 #ifdef DEBUG 4599 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4600 pr_addr(so->so_family, so->so_faddr_sa, 4601 (t_uscalar_t)so->so_faddr_len))); 4602 #endif /* DEBUG */ 4603 4604 if (so->so_family == AF_UNIX) { 4605 /* Transport has different name space - return local info */ 4606 error = 0; 4607 goto done; 4608 } 4609 4610 ASSERT(so->so_faddr_sa); 4611 /* Allocate local buffer to use with ioctl */ 4612 addrlen = (t_uscalar_t)so->so_faddr_maxlen; 4613 mutex_exit(&so->so_lock); 4614 addr = kmem_alloc(addrlen, KM_SLEEP); 4615 4616 /* 4617 * Issue TI_GETPEERNAME with signals masked. 4618 * Put the result in so_faddr_sa so that getpeername works after 4619 * a shutdown(output). 4620 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4621 * back to the socket. 4622 */ 4623 strbuf.buf = addr; 4624 strbuf.maxlen = addrlen; 4625 strbuf.len = 0; 4626 4627 sigintr(&smask, 0); 4628 res = 0; 4629 ASSERT(CRED()); 4630 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4631 0, K_TO_K, CRED(), &res); 4632 sigunintr(&smask); 4633 4634 mutex_enter(&so->so_lock); 4635 /* 4636 * If there is an error record the error in so_error put don't fail 4637 * the getpeername. Instead fallback on the recorded 4638 * so->so_faddr_sa. 4639 */ 4640 if (error) { 4641 /* 4642 * Various stream head errors can be returned to the ioctl. 4643 * However, it is impossible to determine which ones of 4644 * these are really socket level errors that were incorrectly 4645 * consumed by the ioctl. Thus this code silently ignores the 4646 * error - to code explicitly does not reinstate the error 4647 * using soseterror(). 4648 * Experiments have shows that at least this set of 4649 * errors are reported and should not be reinstated on the 4650 * socket: 4651 * EINVAL E.g. if an I_LINK was in effect when 4652 * getpeername was called. 4653 * EPIPE The ioctl error semantics prefer the write 4654 * side error over the read side error. 4655 * ENOTCONN The transport just got disconnected but 4656 * sockfs had not yet seen the T_DISCON_IND 4657 * when issuing the ioctl. 4658 */ 4659 error = 0; 4660 } else if (res == 0 && strbuf.len > 0 && 4661 (so->so_state & SS_ISCONNECTED)) { 4662 ASSERT(strbuf.len <= (int)so->so_faddr_maxlen); 4663 so->so_faddr_len = (socklen_t)strbuf.len; 4664 bcopy(addr, so->so_faddr_sa, so->so_faddr_len); 4665 so->so_state |= SS_FADDR_VALID; 4666 } 4667 kmem_free(addr, addrlen); 4668 #ifdef DEBUG 4669 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4670 pr_addr(so->so_family, so->so_faddr_sa, 4671 (t_uscalar_t)so->so_faddr_len))); 4672 #endif /* DEBUG */ 4673 done: 4674 so_unlock_single(so, SOLOCKED); 4675 mutex_exit(&so->so_lock); 4676 return (error); 4677 } 4678 4679 /* 4680 * Update so_laddr by asking the transport (unless AF_UNIX). 4681 */ 4682 int 4683 sotpi_getsockname(struct sonode *so) 4684 { 4685 struct strbuf strbuf; 4686 int error = 0, res; 4687 void *addr; 4688 t_uscalar_t addrlen; 4689 k_sigset_t smask; 4690 4691 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4692 so, pr_state(so->so_state, so->so_mode))); 4693 4694 mutex_enter(&so->so_lock); 4695 so_lock_single(so); /* Set SOLOCKED */ 4696 if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) { 4697 /* Return an all zero address except for the family */ 4698 if (so->so_family == AF_INET) 4699 so->so_laddr_len = (socklen_t)sizeof (sin_t); 4700 else if (so->so_family == AF_INET6) 4701 so->so_laddr_len = (socklen_t)sizeof (sin6_t); 4702 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 4703 bzero(so->so_laddr_sa, so->so_laddr_len); 4704 /* 4705 * Can not assume there is a sa_family for all 4706 * protocol families. 4707 */ 4708 if (so->so_family == AF_INET || so->so_family == AF_INET6) 4709 so->so_laddr_sa->sa_family = so->so_family; 4710 } 4711 #ifdef DEBUG 4712 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4713 pr_addr(so->so_family, so->so_laddr_sa, 4714 (t_uscalar_t)so->so_laddr_len))); 4715 #endif /* DEBUG */ 4716 if (so->so_family == AF_UNIX) { 4717 /* Transport has different name space - return local info */ 4718 error = 0; 4719 goto done; 4720 } 4721 if (!(so->so_state & SS_ISBOUND)) { 4722 /* If not bound, then nothing to return. */ 4723 error = 0; 4724 goto done; 4725 } 4726 /* Allocate local buffer to use with ioctl */ 4727 addrlen = (t_uscalar_t)so->so_laddr_maxlen; 4728 mutex_exit(&so->so_lock); 4729 addr = kmem_alloc(addrlen, KM_SLEEP); 4730 4731 /* 4732 * Issue TI_GETMYNAME with signals masked. 4733 * Put the result in so_laddr_sa so that getsockname works after 4734 * a shutdown(output). 4735 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4736 * back to the socket. 4737 */ 4738 strbuf.buf = addr; 4739 strbuf.maxlen = addrlen; 4740 strbuf.len = 0; 4741 4742 sigintr(&smask, 0); 4743 res = 0; 4744 ASSERT(CRED()); 4745 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4746 0, K_TO_K, CRED(), &res); 4747 sigunintr(&smask); 4748 4749 mutex_enter(&so->so_lock); 4750 /* 4751 * If there is an error record the error in so_error put don't fail 4752 * the getsockname. Instead fallback on the recorded 4753 * so->so_laddr_sa. 4754 */ 4755 if (error) { 4756 /* 4757 * Various stream head errors can be returned to the ioctl. 4758 * However, it is impossible to determine which ones of 4759 * these are really socket level errors that were incorrectly 4760 * consumed by the ioctl. Thus this code silently ignores the 4761 * error - to code explicitly does not reinstate the error 4762 * using soseterror(). 4763 * Experiments have shows that at least this set of 4764 * errors are reported and should not be reinstated on the 4765 * socket: 4766 * EINVAL E.g. if an I_LINK was in effect when 4767 * getsockname was called. 4768 * EPIPE The ioctl error semantics prefer the write 4769 * side error over the read side error. 4770 */ 4771 error = 0; 4772 } else if (res == 0 && strbuf.len > 0 && 4773 (so->so_state & SS_ISBOUND)) { 4774 ASSERT(strbuf.len <= (int)so->so_laddr_maxlen); 4775 so->so_laddr_len = (socklen_t)strbuf.len; 4776 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 4777 so->so_state |= SS_LADDR_VALID; 4778 } 4779 kmem_free(addr, addrlen); 4780 #ifdef DEBUG 4781 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4782 pr_addr(so->so_family, so->so_laddr_sa, 4783 (t_uscalar_t)so->so_laddr_len))); 4784 #endif /* DEBUG */ 4785 done: 4786 so_unlock_single(so, SOLOCKED); 4787 mutex_exit(&so->so_lock); 4788 return (error); 4789 } 4790 4791 /* 4792 * Get socket options. For SOL_SOCKET options some options are handled 4793 * by the sockfs while others use the value recorded in the sonode as a 4794 * fallback should the T_SVR4_OPTMGMT_REQ fail. 4795 * 4796 * On the return most *optlenp bytes are copied to optval. 4797 */ 4798 int 4799 sotpi_getsockopt(struct sonode *so, int level, int option_name, 4800 void *optval, socklen_t *optlenp, int flags) 4801 { 4802 struct T_optmgmt_req optmgmt_req; 4803 struct T_optmgmt_ack *optmgmt_ack; 4804 struct opthdr oh; 4805 struct opthdr *opt_res; 4806 mblk_t *mp = NULL; 4807 int error = 0; 4808 void *option = NULL; /* Set if fallback value */ 4809 t_uscalar_t maxlen = *optlenp; 4810 t_uscalar_t len; 4811 uint32_t value; 4812 4813 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 4814 so, level, option_name, optval, optlenp, 4815 pr_state(so->so_state, so->so_mode))); 4816 4817 mutex_enter(&so->so_lock); 4818 so_lock_single(so); /* Set SOLOCKED */ 4819 4820 /* 4821 * Check for SOL_SOCKET options. 4822 * Certain SOL_SOCKET options are returned directly whereas 4823 * others only provide a default (fallback) value should 4824 * the T_SVR4_OPTMGMT_REQ fail. 4825 */ 4826 if (level == SOL_SOCKET) { 4827 /* Check parameters */ 4828 switch (option_name) { 4829 case SO_TYPE: 4830 case SO_ERROR: 4831 case SO_DEBUG: 4832 case SO_ACCEPTCONN: 4833 case SO_REUSEADDR: 4834 case SO_KEEPALIVE: 4835 case SO_DONTROUTE: 4836 case SO_BROADCAST: 4837 case SO_USELOOPBACK: 4838 case SO_OOBINLINE: 4839 case SO_SNDBUF: 4840 case SO_RCVBUF: 4841 #ifdef notyet 4842 case SO_SNDLOWAT: 4843 case SO_RCVLOWAT: 4844 case SO_SNDTIMEO: 4845 case SO_RCVTIMEO: 4846 #endif /* notyet */ 4847 case SO_DOMAIN: 4848 case SO_DGRAM_ERRIND: 4849 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 4850 error = EINVAL; 4851 eprintsoline(so, error); 4852 goto done2; 4853 } 4854 break; 4855 case SO_LINGER: 4856 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 4857 error = EINVAL; 4858 eprintsoline(so, error); 4859 goto done2; 4860 } 4861 break; 4862 } 4863 4864 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 4865 4866 switch (option_name) { 4867 case SO_TYPE: 4868 value = so->so_type; 4869 option = &value; 4870 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4871 4872 case SO_ERROR: 4873 value = sogeterr(so); 4874 option = &value; 4875 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4876 4877 case SO_ACCEPTCONN: 4878 if (so->so_state & SS_ACCEPTCONN) 4879 value = SO_ACCEPTCONN; 4880 else 4881 value = 0; 4882 #ifdef DEBUG 4883 if (value) { 4884 dprintso(so, 1, 4885 ("sotpi_getsockopt: 0x%x is set\n", 4886 option_name)); 4887 } else { 4888 dprintso(so, 1, 4889 ("sotpi_getsockopt: 0x%x not set\n", 4890 option_name)); 4891 } 4892 #endif /* DEBUG */ 4893 option = &value; 4894 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4895 4896 case SO_DEBUG: 4897 case SO_REUSEADDR: 4898 case SO_KEEPALIVE: 4899 case SO_DONTROUTE: 4900 case SO_BROADCAST: 4901 case SO_USELOOPBACK: 4902 case SO_OOBINLINE: 4903 case SO_DGRAM_ERRIND: 4904 value = (so->so_options & option_name); 4905 #ifdef DEBUG 4906 if (value) { 4907 dprintso(so, 1, 4908 ("sotpi_getsockopt: 0x%x is set\n", 4909 option_name)); 4910 } else { 4911 dprintso(so, 1, 4912 ("sotpi_getsockopt: 0x%x not set\n", 4913 option_name)); 4914 } 4915 #endif /* DEBUG */ 4916 option = &value; 4917 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4918 4919 /* 4920 * The following options are only returned by sockfs when the 4921 * T_SVR4_OPTMGMT_REQ fails. 4922 */ 4923 case SO_LINGER: 4924 option = &so->so_linger; 4925 len = (t_uscalar_t)sizeof (struct linger); 4926 break; 4927 case SO_SNDBUF: { 4928 ssize_t lvalue; 4929 4930 /* 4931 * If the option has not been set then get a default 4932 * value from the read queue. This value is 4933 * returned if the transport fails 4934 * the T_SVR4_OPTMGMT_REQ. 4935 */ 4936 lvalue = so->so_sndbuf; 4937 if (lvalue == 0) { 4938 mutex_exit(&so->so_lock); 4939 (void) strqget(strvp2wq(SOTOV(so))->q_next, 4940 QHIWAT, 0, &lvalue); 4941 mutex_enter(&so->so_lock); 4942 dprintso(so, 1, 4943 ("got SO_SNDBUF %ld from q\n", lvalue)); 4944 } 4945 value = (int)lvalue; 4946 option = &value; 4947 len = (t_uscalar_t)sizeof (so->so_sndbuf); 4948 break; 4949 } 4950 case SO_RCVBUF: { 4951 ssize_t lvalue; 4952 4953 /* 4954 * If the option has not been set then get a default 4955 * value from the read queue. This value is 4956 * returned if the transport fails 4957 * the T_SVR4_OPTMGMT_REQ. 4958 * 4959 * XXX If SO_RCVBUF has been set and this is an 4960 * XPG 4.2 application then do not ask the transport 4961 * since the transport might adjust the value and not 4962 * return exactly what was set by the application. 4963 * For non-XPG 4.2 application we return the value 4964 * that the transport is actually using. 4965 */ 4966 lvalue = so->so_rcvbuf; 4967 if (lvalue == 0) { 4968 mutex_exit(&so->so_lock); 4969 (void) strqget(RD(strvp2wq(SOTOV(so))), 4970 QHIWAT, 0, &lvalue); 4971 mutex_enter(&so->so_lock); 4972 dprintso(so, 1, 4973 ("got SO_RCVBUF %ld from q\n", lvalue)); 4974 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 4975 value = (int)lvalue; 4976 option = &value; 4977 goto copyout; /* skip asking transport */ 4978 } 4979 value = (int)lvalue; 4980 option = &value; 4981 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 4982 break; 4983 } 4984 case SO_DOMAIN: 4985 value = so->so_family; 4986 option = &value; 4987 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4988 4989 #ifdef notyet 4990 /* 4991 * We do not implement the semantics of these options 4992 * thus we shouldn't implement the options either. 4993 */ 4994 case SO_SNDLOWAT: 4995 value = so->so_sndlowat; 4996 option = &value; 4997 break; 4998 case SO_RCVLOWAT: 4999 value = so->so_rcvlowat; 5000 option = &value; 5001 break; 5002 case SO_SNDTIMEO: 5003 value = so->so_sndtimeo; 5004 option = &value; 5005 break; 5006 case SO_RCVTIMEO: 5007 value = so->so_rcvtimeo; 5008 option = &value; 5009 break; 5010 #endif /* notyet */ 5011 } 5012 } 5013 5014 mutex_exit(&so->so_lock); 5015 5016 /* Send request */ 5017 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5018 optmgmt_req.MGMT_flags = T_CHECK; 5019 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5020 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5021 5022 oh.level = level; 5023 oh.name = option_name; 5024 oh.len = maxlen; 5025 5026 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5027 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 5028 /* Let option management work in the presence of data flow control */ 5029 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5030 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5031 mp = NULL; 5032 mutex_enter(&so->so_lock); 5033 if (error) { 5034 eprintsoline(so, error); 5035 goto done2; 5036 } 5037 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5038 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5039 if (error) { 5040 if (option != NULL) { 5041 /* We have a fallback value */ 5042 error = 0; 5043 goto copyout; 5044 } 5045 eprintsoline(so, error); 5046 goto done2; 5047 } 5048 ASSERT(mp); 5049 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5050 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5051 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5052 if (opt_res == NULL) { 5053 if (option != NULL) { 5054 /* We have a fallback value */ 5055 error = 0; 5056 goto copyout; 5057 } 5058 error = EPROTO; 5059 eprintsoline(so, error); 5060 goto done; 5061 } 5062 option = &opt_res[1]; 5063 5064 /* check to ensure that the option is within bounds */ 5065 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5066 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5067 if (option != NULL) { 5068 /* We have a fallback value */ 5069 error = 0; 5070 goto copyout; 5071 } 5072 error = EPROTO; 5073 eprintsoline(so, error); 5074 goto done; 5075 } 5076 5077 len = opt_res->len; 5078 5079 copyout: { 5080 t_uscalar_t size = MIN(len, maxlen); 5081 bcopy(option, optval, size); 5082 bcopy(&size, optlenp, sizeof (size)); 5083 } 5084 done: 5085 freemsg(mp); 5086 done2: 5087 so_unlock_single(so, SOLOCKED); 5088 mutex_exit(&so->so_lock); 5089 return (error); 5090 } 5091 5092 /* 5093 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5094 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5095 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5096 * setsockopt has to work even if the transport does not support the option. 5097 */ 5098 int 5099 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5100 const void *optval, t_uscalar_t optlen) 5101 { 5102 struct T_optmgmt_req optmgmt_req; 5103 struct opthdr oh; 5104 mblk_t *mp; 5105 int error = 0; 5106 boolean_t handled = B_FALSE; 5107 5108 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5109 so, level, option_name, optval, optlen, 5110 pr_state(so->so_state, so->so_mode))); 5111 5112 5113 /* X/Open requires this check */ 5114 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5115 if (xnet_check_print) 5116 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5117 return (EINVAL); 5118 } 5119 5120 /* Caller allocates aligned optval, or passes null */ 5121 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 5122 /* If optval is null optlen is 0, and vice-versa */ 5123 ASSERT(optval != NULL || optlen == 0); 5124 ASSERT(optlen != 0 || optval == NULL); 5125 5126 mutex_enter(&so->so_lock); 5127 so_lock_single(so); /* Set SOLOCKED */ 5128 mutex_exit(&so->so_lock); 5129 5130 /* 5131 * For SOCKET or TCP level options, try to set it here itself 5132 * provided socket has not been popped and we know the tcp 5133 * structure (stored in so_priv). 5134 */ 5135 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5136 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5137 (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) { 5138 tcp_t *tcp = so->so_priv; 5139 boolean_t onoff; 5140 5141 #define intvalue (*(int32_t *)optval) 5142 5143 switch (level) { 5144 case SOL_SOCKET: 5145 switch (option_name) { /* Check length param */ 5146 case SO_DEBUG: 5147 case SO_REUSEADDR: 5148 case SO_DONTROUTE: 5149 case SO_BROADCAST: 5150 case SO_USELOOPBACK: 5151 case SO_OOBINLINE: 5152 case SO_DGRAM_ERRIND: 5153 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5154 error = EINVAL; 5155 eprintsoline(so, error); 5156 mutex_enter(&so->so_lock); 5157 goto done2; 5158 } 5159 ASSERT(optval); 5160 onoff = intvalue != 0; 5161 handled = B_TRUE; 5162 break; 5163 case SO_LINGER: 5164 if (optlen != 5165 (t_uscalar_t)sizeof (struct linger)) { 5166 error = EINVAL; 5167 eprintsoline(so, error); 5168 mutex_enter(&so->so_lock); 5169 goto done2; 5170 } 5171 ASSERT(optval); 5172 handled = B_TRUE; 5173 break; 5174 } 5175 5176 switch (option_name) { /* Do actions */ 5177 case SO_LINGER: { 5178 struct linger *lgr = (struct linger *)optval; 5179 5180 if (lgr->l_onoff) { 5181 tcp->tcp_linger = 1; 5182 tcp->tcp_lingertime = lgr->l_linger; 5183 so->so_linger.l_onoff = SO_LINGER; 5184 so->so_options |= SO_LINGER; 5185 } else { 5186 tcp->tcp_linger = 0; 5187 tcp->tcp_lingertime = 0; 5188 so->so_linger.l_onoff = 0; 5189 so->so_options &= ~SO_LINGER; 5190 } 5191 so->so_linger.l_linger = lgr->l_linger; 5192 handled = B_TRUE; 5193 break; 5194 } 5195 case SO_DEBUG: 5196 tcp->tcp_debug = onoff; 5197 #ifdef SOCK_TEST 5198 if (intvalue & 2) 5199 sock_test_timelimit = 10 * hz; 5200 else 5201 sock_test_timelimit = 0; 5202 5203 if (intvalue & 4) 5204 do_useracc = 0; 5205 else 5206 do_useracc = 1; 5207 #endif /* SOCK_TEST */ 5208 break; 5209 case SO_DONTROUTE: 5210 /* 5211 * SO_DONTROUTE, SO_USELOOPBACK and 5212 * SO_BROADCAST are only of interest to IP. 5213 * We track them here only so 5214 * that we can report their current value. 5215 */ 5216 tcp->tcp_dontroute = onoff; 5217 if (onoff) 5218 so->so_options |= option_name; 5219 else 5220 so->so_options &= ~option_name; 5221 break; 5222 case SO_USELOOPBACK: 5223 tcp->tcp_useloopback = onoff; 5224 if (onoff) 5225 so->so_options |= option_name; 5226 else 5227 so->so_options &= ~option_name; 5228 break; 5229 case SO_BROADCAST: 5230 tcp->tcp_broadcast = onoff; 5231 if (onoff) 5232 so->so_options |= option_name; 5233 else 5234 so->so_options &= ~option_name; 5235 break; 5236 case SO_REUSEADDR: 5237 tcp->tcp_reuseaddr = onoff; 5238 if (onoff) 5239 so->so_options |= option_name; 5240 else 5241 so->so_options &= ~option_name; 5242 break; 5243 case SO_OOBINLINE: 5244 tcp->tcp_oobinline = onoff; 5245 if (onoff) 5246 so->so_options |= option_name; 5247 else 5248 so->so_options &= ~option_name; 5249 break; 5250 case SO_DGRAM_ERRIND: 5251 tcp->tcp_dgram_errind = onoff; 5252 if (onoff) 5253 so->so_options |= option_name; 5254 else 5255 so->so_options &= ~option_name; 5256 break; 5257 } 5258 break; 5259 case IPPROTO_TCP: 5260 switch (option_name) { 5261 case TCP_NODELAY: 5262 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5263 error = EINVAL; 5264 eprintsoline(so, error); 5265 mutex_enter(&so->so_lock); 5266 goto done2; 5267 } 5268 ASSERT(optval); 5269 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5270 handled = B_TRUE; 5271 break; 5272 } 5273 break; 5274 default: 5275 handled = B_FALSE; 5276 break; 5277 } 5278 } 5279 5280 if (handled) { 5281 mutex_enter(&so->so_lock); 5282 goto done2; 5283 } 5284 5285 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5286 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5287 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5288 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5289 5290 oh.level = level; 5291 oh.name = option_name; 5292 oh.len = optlen; 5293 5294 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5295 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 5296 /* Let option management work in the presence of data flow control */ 5297 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5298 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5299 mp = NULL; 5300 mutex_enter(&so->so_lock); 5301 if (error) { 5302 eprintsoline(so, error); 5303 goto done; 5304 } 5305 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5306 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5307 if (error) { 5308 eprintsoline(so, error); 5309 goto done; 5310 } 5311 ASSERT(mp); 5312 /* No need to verify T_optmgmt_ack */ 5313 freemsg(mp); 5314 done: 5315 /* 5316 * Check for SOL_SOCKET options and record their values. 5317 * If we know about a SOL_SOCKET parameter and the transport 5318 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5319 * EPROTO) we let the setsockopt succeed. 5320 */ 5321 if (level == SOL_SOCKET) { 5322 /* Check parameters */ 5323 switch (option_name) { 5324 case SO_DEBUG: 5325 case SO_REUSEADDR: 5326 case SO_KEEPALIVE: 5327 case SO_DONTROUTE: 5328 case SO_BROADCAST: 5329 case SO_USELOOPBACK: 5330 case SO_OOBINLINE: 5331 case SO_SNDBUF: 5332 case SO_RCVBUF: 5333 #ifdef notyet 5334 case SO_SNDLOWAT: 5335 case SO_RCVLOWAT: 5336 case SO_SNDTIMEO: 5337 case SO_RCVTIMEO: 5338 #endif /* notyet */ 5339 case SO_DGRAM_ERRIND: 5340 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5341 error = EINVAL; 5342 eprintsoline(so, error); 5343 goto done2; 5344 } 5345 ASSERT(optval); 5346 handled = B_TRUE; 5347 break; 5348 case SO_LINGER: 5349 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5350 error = EINVAL; 5351 eprintsoline(so, error); 5352 goto done2; 5353 } 5354 ASSERT(optval); 5355 handled = B_TRUE; 5356 break; 5357 } 5358 5359 #define intvalue (*(int32_t *)optval) 5360 5361 switch (option_name) { 5362 case SO_TYPE: 5363 case SO_ERROR: 5364 case SO_ACCEPTCONN: 5365 /* Can't be set */ 5366 error = ENOPROTOOPT; 5367 goto done2; 5368 case SO_LINGER: { 5369 struct linger *l = (struct linger *)optval; 5370 5371 so->so_linger.l_linger = l->l_linger; 5372 if (l->l_onoff) { 5373 so->so_linger.l_onoff = SO_LINGER; 5374 so->so_options |= SO_LINGER; 5375 } else { 5376 so->so_linger.l_onoff = 0; 5377 so->so_options &= ~SO_LINGER; 5378 } 5379 break; 5380 } 5381 5382 case SO_DEBUG: 5383 #ifdef SOCK_TEST 5384 if (intvalue & 2) 5385 sock_test_timelimit = 10 * hz; 5386 else 5387 sock_test_timelimit = 0; 5388 5389 if (intvalue & 4) 5390 do_useracc = 0; 5391 else 5392 do_useracc = 1; 5393 #endif /* SOCK_TEST */ 5394 /* FALLTHRU */ 5395 case SO_REUSEADDR: 5396 case SO_KEEPALIVE: 5397 case SO_DONTROUTE: 5398 case SO_BROADCAST: 5399 case SO_USELOOPBACK: 5400 case SO_OOBINLINE: 5401 case SO_DGRAM_ERRIND: 5402 if (intvalue != 0) { 5403 dprintso(so, 1, 5404 ("sotpi_setsockopt: setting 0x%x\n", 5405 option_name)); 5406 so->so_options |= option_name; 5407 } else { 5408 dprintso(so, 1, 5409 ("sotpi_setsockopt: clearing 0x%x\n", 5410 option_name)); 5411 so->so_options &= ~option_name; 5412 } 5413 break; 5414 /* 5415 * The following options are only returned by us when the 5416 * T_SVR4_OPTMGMT_REQ fails. 5417 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5418 * since the transport might adjust the value and not 5419 * return exactly what was set by the application. 5420 */ 5421 case SO_SNDBUF: 5422 so->so_sndbuf = intvalue; 5423 break; 5424 case SO_RCVBUF: 5425 so->so_rcvbuf = intvalue; 5426 break; 5427 #ifdef notyet 5428 /* 5429 * We do not implement the semantics of these options 5430 * thus we shouldn't implement the options either. 5431 */ 5432 case SO_SNDLOWAT: 5433 so->so_sndlowat = intvalue; 5434 break; 5435 case SO_RCVLOWAT: 5436 so->so_rcvlowat = intvalue; 5437 break; 5438 case SO_SNDTIMEO: 5439 so->so_sndtimeo = intvalue; 5440 break; 5441 case SO_RCVTIMEO: 5442 so->so_rcvtimeo = intvalue; 5443 break; 5444 #endif /* notyet */ 5445 } 5446 #undef intvalue 5447 5448 if (error) { 5449 if ((error == ENOPROTOOPT || error == EPROTO || 5450 error == EINVAL) && handled) { 5451 dprintso(so, 1, 5452 ("setsockopt: ignoring error %d for 0x%x\n", 5453 error, option_name)); 5454 error = 0; 5455 } 5456 } 5457 } 5458 done2: 5459 ret: 5460 so_unlock_single(so, SOLOCKED); 5461 mutex_exit(&so->so_lock); 5462 return (error); 5463 } 5464