1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <netinet/in.h> 62 #include <sys/un.h> 63 #include <sys/strsun.h> 64 65 #include <sys/tiuser.h> 66 #define _SUN_TPI_VERSION 2 67 #include <sys/tihdr.h> 68 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 69 70 #include <c2/audit.h> 71 72 #include <inet/common.h> 73 #include <inet/ip.h> 74 #include <inet/ip6.h> 75 #include <inet/tcp.h> 76 #include <inet/udp_impl.h> 77 78 #include <fs/sockfs/nl7c.h> 79 #include <sys/zone.h> 80 81 #include <inet/kssl/ksslapi.h> 82 83 /* 84 * Possible failures when memory can't be allocated. The documented behavior: 85 * 86 * 5.5: 4.X: XNET: 87 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 88 * EINTR 89 * (4.X does not document EINTR but returns it) 90 * bind: ENOSR - ENOBUFS/ENOSR 91 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 92 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 93 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 94 * (4.X getpeername and getsockname do not fail in practice) 95 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 96 * listen: - - ENOBUFS 97 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 98 * EINTR 99 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 100 * EINTR 101 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 102 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 103 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 104 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 105 * 106 * Resolution. When allocation fails: 107 * recv: return EINTR 108 * send: return EINTR 109 * connect, accept: EINTR 110 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 111 * socket, socketpair: ENOBUFS 112 * getpeername, getsockname: sleep 113 * getsockopt, setsockopt: sleep 114 */ 115 116 #ifdef SOCK_TEST 117 /* 118 * Variables that make sockfs do something other than the standard TPI 119 * for the AF_INET transports. 120 * 121 * solisten_tpi_tcp: 122 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 123 * the transport is already bound. This is needed to avoid loosing the 124 * port number should listen() do a T_UNBIND_REQ followed by a 125 * O_T_BIND_REQ. 126 * 127 * soconnect_tpi_udp: 128 * UDP and ICMP can handle a T_CONN_REQ. 129 * This is needed to make the sequence of connect(), getsockname() 130 * return the local IP address used to send packets to the connected to 131 * destination. 132 * 133 * soconnect_tpi_tcp: 134 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 135 * Set this to non-zero to send TPI conformant messages to TCP in this 136 * respect. This is a performance optimization. 137 * 138 * soaccept_tpi_tcp: 139 * TCP can handle a T_CONN_REQ without the acceptor being bound. 140 * This is a performance optimization that has been picked up in XTI. 141 * 142 * soaccept_tpi_multioptions: 143 * When inheriting SOL_SOCKET options from the listener to the accepting 144 * socket send them as a single message for AF_INET{,6}. 145 */ 146 int solisten_tpi_tcp = 0; 147 int soconnect_tpi_udp = 0; 148 int soconnect_tpi_tcp = 0; 149 int soaccept_tpi_tcp = 0; 150 int soaccept_tpi_multioptions = 1; 151 #else /* SOCK_TEST */ 152 #define soconnect_tpi_tcp 0 153 #define soconnect_tpi_udp 0 154 #define solisten_tpi_tcp 0 155 #define soaccept_tpi_tcp 0 156 #define soaccept_tpi_multioptions 1 157 #endif /* SOCK_TEST */ 158 159 #ifdef SOCK_TEST 160 extern int do_useracc; 161 extern clock_t sock_test_timelimit; 162 #endif /* SOCK_TEST */ 163 164 /* 165 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 166 * applications working. Turn on this flag to disable these checks. 167 */ 168 int xnet_skip_checks = 0; 169 int xnet_check_print = 0; 170 int xnet_truncate_print = 0; 171 172 extern void sigintr(k_sigset_t *, int); 173 extern void sigunintr(k_sigset_t *); 174 175 extern void *nl7c_lookup_addr(void *, t_uscalar_t); 176 extern void *nl7c_add_addr(void *, t_uscalar_t); 177 extern void nl7c_listener_addr(void *, queue_t *); 178 179 /* Sockets acting as an in-kernel SSL proxy */ 180 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 181 strsigset_t *, strsigset_t *, strpollset_t *); 182 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 183 strsigset_t *, strsigset_t *, strpollset_t *); 184 185 static int sotpi_unbind(struct sonode *, int); 186 187 /* TPI sockfs sonode operations */ 188 static int sotpi_accept(struct sonode *, int, struct sonode **); 189 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 190 int); 191 static int sotpi_connect(struct sonode *, const struct sockaddr *, 192 socklen_t, int, int); 193 static int sotpi_listen(struct sonode *, int); 194 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 195 struct uio *); 196 static int sotpi_shutdown(struct sonode *, int); 197 static int sotpi_getsockname(struct sonode *); 198 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 199 struct uio *, void *, t_uscalar_t, int); 200 static int sodgram_direct(struct sonode *, struct sockaddr *, 201 socklen_t, struct uio *, int); 202 203 sonodeops_t sotpi_sonodeops = { 204 sotpi_accept, /* sop_accept */ 205 sotpi_bind, /* sop_bind */ 206 sotpi_listen, /* sop_listen */ 207 sotpi_connect, /* sop_connect */ 208 sotpi_recvmsg, /* sop_recvmsg */ 209 sotpi_sendmsg, /* sop_sendmsg */ 210 sotpi_getpeername, /* sop_getpeername */ 211 sotpi_getsockname, /* sop_getsockname */ 212 sotpi_shutdown, /* sop_shutdown */ 213 sotpi_getsockopt, /* sop_getsockopt */ 214 sotpi_setsockopt /* sop_setsockopt */ 215 }; 216 217 /* 218 * Common create code for socket and accept. If tso is set the values 219 * from that node is used instead of issuing a T_INFO_REQ. 220 * 221 * Assumes that the caller has a VN_HOLD on accessvp. 222 * The VN_RELE will occur either when sotpi_create() fails or when 223 * the returned sonode is freed. 224 */ 225 struct sonode * 226 sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version, 227 struct sonode *tso, int *errorp) 228 { 229 struct sonode *so; 230 vnode_t *vp; 231 int flags, error; 232 233 ASSERT(accessvp != NULL); 234 vp = makesockvp(accessvp, domain, type, protocol); 235 ASSERT(vp != NULL); 236 so = VTOSO(vp); 237 238 flags = FREAD|FWRITE; 239 240 if ((type == SOCK_STREAM || type == SOCK_DGRAM) && 241 (domain == AF_INET || domain == AF_INET6) && 242 (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 243 protocol == IPPROTO_IP)) { 244 /* Tell tcp or udp that it's talking to sockets */ 245 flags |= SO_SOCKSTR; 246 247 /* 248 * Here we indicate to socktpi_open() our attempt to 249 * make direct calls between sockfs and transport. 250 * The final decision is left to socktpi_open(). 251 */ 252 so->so_state |= SS_DIRECT; 253 254 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 255 if (so->so_type == SOCK_STREAM && tso != NULL) { 256 if (tso->so_state & SS_DIRECT) { 257 /* 258 * Inherit SS_DIRECT from listener and pass 259 * SO_ACCEPTOR open flag to tcp, indicating 260 * that this is an accept fast-path instance. 261 */ 262 flags |= SO_ACCEPTOR; 263 } else { 264 /* 265 * SS_DIRECT is not set on listener, meaning 266 * that the listener has been converted from 267 * a socket to a stream. Ensure that the 268 * acceptor inherits these settings. 269 */ 270 so->so_state &= ~SS_DIRECT; 271 flags &= ~SO_SOCKSTR; 272 } 273 } 274 } 275 276 /* 277 * Tell local transport that it is talking to sockets. 278 */ 279 if (so->so_family == AF_UNIX) { 280 flags |= SO_SOCKSTR; 281 } 282 283 if (error = socktpi_open(&vp, flags, CRED())) { 284 VN_RELE(vp); 285 *errorp = error; 286 return (NULL); 287 } 288 289 if (error = so_strinit(so, tso)) { 290 (void) VOP_CLOSE(vp, 0, 1, 0, CRED()); 291 VN_RELE(vp); 292 *errorp = error; 293 return (NULL); 294 } 295 296 if (version == SOV_DEFAULT) 297 version = so_default_version; 298 299 so->so_version = (short)version; 300 301 /* Initialize the kernel SSL proxy fields */ 302 so->so_kssl_type = KSSL_NO_PROXY; 303 so->so_kssl_ent = NULL; 304 so->so_kssl_ctx = NULL; 305 306 return (so); 307 } 308 309 /* 310 * Bind the socket to an unspecified address in sockfs only. 311 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 312 * required in all cases. 313 */ 314 static void 315 so_automatic_bind(struct sonode *so) 316 { 317 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 318 319 ASSERT(MUTEX_HELD(&so->so_lock)); 320 ASSERT(!(so->so_state & SS_ISBOUND)); 321 ASSERT(so->so_unbind_mp); 322 323 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 324 bzero(so->so_laddr_sa, so->so_laddr_len); 325 so->so_laddr_sa->sa_family = so->so_family; 326 so->so_state |= SS_ISBOUND; 327 } 328 329 330 /* 331 * bind the socket. 332 * 333 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 334 * are passed in we allow rebinding. Note that for backwards compatibility 335 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 336 * Thus the rebinding code is currently not executed. 337 * 338 * The constraints for rebinding are: 339 * - it is a SOCK_DGRAM, or 340 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 341 * and no listen() has been done. 342 * This rebinding code was added based on some language in the XNET book 343 * about not returning EINVAL it the protocol allows rebinding. However, 344 * this language is not present in the Posix socket draft. Thus maybe the 345 * rebinding logic should be deleted from the source. 346 * 347 * A null "name" can be used to unbind the socket if: 348 * - it is a SOCK_DGRAM, or 349 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 350 * and no listen() has been done. 351 */ 352 static int 353 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 354 socklen_t namelen, int backlog, int flags) 355 { 356 struct T_bind_req bind_req; 357 struct T_bind_ack *bind_ack; 358 int error = 0; 359 mblk_t *mp; 360 void *addr; 361 t_uscalar_t addrlen; 362 int unbind_on_err = 1; 363 boolean_t clear_acceptconn_on_err = B_FALSE; 364 boolean_t restore_backlog_on_err = B_FALSE; 365 int save_so_backlog; 366 t_scalar_t PRIM_type = O_T_BIND_REQ; 367 boolean_t tcp_udp_xport; 368 void *nl7c = NULL; 369 370 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 371 so, name, namelen, backlog, flags, 372 pr_state(so->so_state, so->so_mode))); 373 374 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 375 376 if (!(flags & _SOBIND_LOCK_HELD)) { 377 mutex_enter(&so->so_lock); 378 so_lock_single(so); /* Set SOLOCKED */ 379 } else { 380 ASSERT(MUTEX_HELD(&so->so_lock)); 381 ASSERT(so->so_flag & SOLOCKED); 382 } 383 384 /* 385 * Make sure that there is a preallocated unbind_req message 386 * before binding. This message allocated when the socket is 387 * created but it might be have been consumed. 388 */ 389 if (so->so_unbind_mp == NULL) { 390 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 391 /* NOTE: holding so_lock while sleeping */ 392 so->so_unbind_mp = 393 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 394 } 395 396 if (flags & _SOBIND_REBIND) { 397 /* 398 * Called from solisten after doing an sotpi_unbind() or 399 * potentially without the unbind (latter for AF_INET{,6}). 400 */ 401 ASSERT(name == NULL && namelen == 0); 402 403 if (so->so_family == AF_UNIX) { 404 ASSERT(so->so_ux_bound_vp); 405 addr = &so->so_ux_laddr; 406 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 407 dprintso(so, 1, 408 ("sobind rebind UNIX: addrlen %d, addr 0x%p, vp %p\n", 409 addrlen, 410 ((struct so_ux_addr *)addr)->soua_vp, 411 so->so_ux_bound_vp)); 412 } else { 413 addr = so->so_laddr_sa; 414 addrlen = (t_uscalar_t)so->so_laddr_len; 415 } 416 } else if (flags & _SOBIND_UNSPEC) { 417 ASSERT(name == NULL && namelen == 0); 418 419 /* 420 * The caller checked SS_ISBOUND but not necessarily 421 * under so_lock 422 */ 423 if (so->so_state & SS_ISBOUND) { 424 /* No error */ 425 goto done; 426 } 427 428 /* Set an initial local address */ 429 switch (so->so_family) { 430 case AF_UNIX: 431 /* 432 * Use an address with same size as struct sockaddr 433 * just like BSD. 434 */ 435 so->so_laddr_len = 436 (socklen_t)sizeof (struct sockaddr); 437 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 438 bzero(so->so_laddr_sa, so->so_laddr_len); 439 so->so_laddr_sa->sa_family = so->so_family; 440 441 /* 442 * Pass down an address with the implicit bind 443 * magic number and the rest all zeros. 444 * The transport will return a unique address. 445 */ 446 so->so_ux_laddr.soua_vp = NULL; 447 so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 448 addr = &so->so_ux_laddr; 449 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 450 break; 451 452 case AF_INET: 453 case AF_INET6: 454 /* 455 * An unspecified bind in TPI has a NULL address. 456 * Set the address in sockfs to have the sa_family. 457 */ 458 so->so_laddr_len = (so->so_family == AF_INET) ? 459 (socklen_t)sizeof (sin_t) : 460 (socklen_t)sizeof (sin6_t); 461 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 462 bzero(so->so_laddr_sa, so->so_laddr_len); 463 so->so_laddr_sa->sa_family = so->so_family; 464 addr = NULL; 465 addrlen = 0; 466 break; 467 468 default: 469 /* 470 * An unspecified bind in TPI has a NULL address. 471 * Set the address in sockfs to be zero length. 472 * 473 * Can not assume there is a sa_family for all 474 * protocol families. For example, AF_X25 does not 475 * have a family field. 476 */ 477 so->so_laddr_len = 0; /* XXX correct? */ 478 bzero(so->so_laddr_sa, so->so_laddr_len); 479 addr = NULL; 480 addrlen = 0; 481 break; 482 } 483 484 } else { 485 if (so->so_state & SS_ISBOUND) { 486 /* 487 * If it is ok to rebind the socket, first unbind 488 * with the transport. A rebind to the NULL address 489 * is interpreted as an unbind. 490 * Note that a bind to NULL in BSD does unbind the 491 * socket but it fails with EINVAL. 492 * Note that regular sockets set SOV_SOCKBSD i.e. 493 * _SOBIND_SOCKBSD gets set here hence no type of 494 * socket does currently allow rebinding. 495 * 496 * If the name is NULL just do an unbind. 497 */ 498 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 499 name != NULL) { 500 error = EINVAL; 501 unbind_on_err = 0; 502 eprintsoline(so, error); 503 goto done; 504 } 505 if ((so->so_mode & SM_CONNREQUIRED) && 506 (so->so_state & SS_CANTREBIND)) { 507 error = EINVAL; 508 unbind_on_err = 0; 509 eprintsoline(so, error); 510 goto done; 511 } 512 error = sotpi_unbind(so, 0); 513 if (error) { 514 eprintsoline(so, error); 515 goto done; 516 } 517 ASSERT(!(so->so_state & SS_ISBOUND)); 518 if (name == NULL) { 519 so->so_state &= 520 ~(SS_ISCONNECTED|SS_ISCONNECTING); 521 goto done; 522 } 523 } 524 /* X/Open requires this check */ 525 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 526 if (xnet_check_print) { 527 printf("sockfs: X/Open bind state check " 528 "caused EINVAL\n"); 529 } 530 error = EINVAL; 531 goto done; 532 } 533 534 switch (so->so_family) { 535 case AF_UNIX: 536 /* 537 * All AF_UNIX addresses are nul terminated 538 * when copied (copyin_name) in so the minimum 539 * length is 3 bytes. 540 */ 541 if (name == NULL || 542 (ssize_t)namelen <= sizeof (short) + 1) { 543 error = EISDIR; 544 eprintsoline(so, error); 545 goto done; 546 } 547 /* 548 * Verify so_family matches the bound family. 549 * BSD does not check this for AF_UNIX resulting 550 * in funny mknods. 551 */ 552 if (name->sa_family != so->so_family) { 553 error = EAFNOSUPPORT; 554 goto done; 555 } 556 break; 557 case AF_INET: 558 if (name == NULL) { 559 error = EINVAL; 560 eprintsoline(so, error); 561 goto done; 562 } 563 if ((size_t)namelen != sizeof (sin_t)) { 564 error = name->sa_family != so->so_family ? 565 EAFNOSUPPORT : EINVAL; 566 eprintsoline(so, error); 567 goto done; 568 } 569 if ((flags & _SOBIND_XPG4_2) && 570 (name->sa_family != so->so_family)) { 571 /* 572 * This check has to be made for X/Open 573 * sockets however application failures have 574 * been observed when it is applied to 575 * all sockets. 576 */ 577 error = EAFNOSUPPORT; 578 eprintsoline(so, error); 579 goto done; 580 } 581 /* 582 * Force a zero sa_family to match so_family. 583 * 584 * Some programs like inetd(1M) don't set the 585 * family field. Other programs leave 586 * sin_family set to garbage - SunOS 4.X does 587 * not check the family field on a bind. 588 * We use the family field that 589 * was passed in to the socket() call. 590 */ 591 name->sa_family = so->so_family; 592 break; 593 594 case AF_INET6: { 595 #ifdef DEBUG 596 sin6_t *sin6 = (sin6_t *)name; 597 #endif /* DEBUG */ 598 599 if (name == NULL) { 600 error = EINVAL; 601 eprintsoline(so, error); 602 goto done; 603 } 604 if ((size_t)namelen != sizeof (sin6_t)) { 605 error = name->sa_family != so->so_family ? 606 EAFNOSUPPORT : EINVAL; 607 eprintsoline(so, error); 608 goto done; 609 } 610 if (name->sa_family != so->so_family) { 611 /* 612 * With IPv6 we require the family to match 613 * unlike in IPv4. 614 */ 615 error = EAFNOSUPPORT; 616 eprintsoline(so, error); 617 goto done; 618 } 619 #ifdef DEBUG 620 /* 621 * Verify that apps don't forget to clear 622 * sin6_scope_id etc 623 */ 624 if (sin6->sin6_scope_id != 0 && 625 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 626 cmn_err(CE_WARN, 627 "bind with uninitialized sin6_scope_id " 628 "(%d) on socket. Pid = %d\n", 629 (int)sin6->sin6_scope_id, 630 (int)curproc->p_pid); 631 } 632 if (sin6->__sin6_src_id != 0) { 633 cmn_err(CE_WARN, 634 "bind with uninitialized __sin6_src_id " 635 "(%d) on socket. Pid = %d\n", 636 (int)sin6->__sin6_src_id, 637 (int)curproc->p_pid); 638 } 639 #endif /* DEBUG */ 640 break; 641 } 642 default: 643 /* 644 * Don't do any length or sa_family check to allow 645 * non-sockaddr style addresses. 646 */ 647 if (name == NULL) { 648 error = EINVAL; 649 eprintsoline(so, error); 650 goto done; 651 } 652 break; 653 } 654 655 if (namelen > (t_uscalar_t)so->so_laddr_maxlen) { 656 error = ENAMETOOLONG; 657 eprintsoline(so, error); 658 goto done; 659 } 660 /* 661 * Save local address. 662 */ 663 so->so_laddr_len = (socklen_t)namelen; 664 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 665 bcopy(name, so->so_laddr_sa, namelen); 666 667 addr = so->so_laddr_sa; 668 addrlen = (t_uscalar_t)so->so_laddr_len; 669 switch (so->so_family) { 670 case AF_INET6: 671 case AF_INET: 672 break; 673 case AF_UNIX: { 674 struct sockaddr_un *soun = 675 (struct sockaddr_un *)so->so_laddr_sa; 676 struct vnode *vp; 677 struct vattr vattr; 678 679 ASSERT(so->so_ux_bound_vp == NULL); 680 /* 681 * Create vnode for the specified path name. 682 * Keep vnode held with a reference in so_ux_bound_vp. 683 * Use the vnode pointer as the address used in the 684 * bind with the transport. 685 * 686 * Use the same mode as in BSD. In particular this does 687 * not observe the umask. 688 */ 689 /* MAXPATHLEN + soun_family + nul termination */ 690 if (so->so_laddr_len > 691 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 692 error = ENAMETOOLONG; 693 eprintsoline(so, error); 694 goto done; 695 } 696 vattr.va_type = VSOCK; 697 vattr.va_mode = 0777 & ~u.u_cmask; 698 vattr.va_mask = AT_TYPE|AT_MODE; 699 /* NOTE: holding so_lock */ 700 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 701 EXCL, 0, &vp, CRMKNOD, 0, 0); 702 if (error) { 703 if (error == EEXIST) 704 error = EADDRINUSE; 705 eprintsoline(so, error); 706 goto done; 707 } 708 /* 709 * Establish pointer from the underlying filesystem 710 * vnode to the socket node. 711 * so_ux_bound_vp and v_stream->sd_vnode form the 712 * cross-linkage between the underlying filesystem 713 * node and the socket node. 714 */ 715 ASSERT(SOTOV(so)->v_stream); 716 mutex_enter(&vp->v_lock); 717 vp->v_stream = SOTOV(so)->v_stream; 718 so->so_ux_bound_vp = vp; 719 mutex_exit(&vp->v_lock); 720 721 /* 722 * Use the vnode pointer value as a unique address 723 * (together with the magic number to avoid conflicts 724 * with implicit binds) in the transport provider. 725 */ 726 so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp; 727 so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 728 addr = &so->so_ux_laddr; 729 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 730 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 731 addrlen, 732 ((struct so_ux_addr *)addr)->soua_vp)); 733 break; 734 } 735 } /* end switch (so->so_family) */ 736 } 737 738 /* 739 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 740 * the transport can start passing up T_CONN_IND messages 741 * as soon as it receives the bind req and strsock_proto() 742 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 743 */ 744 if (flags & _SOBIND_LISTEN) { 745 if ((so->so_state & SS_ACCEPTCONN) == 0) 746 clear_acceptconn_on_err = B_TRUE; 747 save_so_backlog = so->so_backlog; 748 restore_backlog_on_err = B_TRUE; 749 so->so_state |= SS_ACCEPTCONN; 750 so->so_backlog = backlog; 751 } 752 753 /* 754 * If NL7C addr(s) have been configured check for addr/port match, 755 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 756 * 757 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 758 * family sockets only. If match mark as such. 759 */ 760 if ((nl7c_enabled && addr != NULL && 761 (so->so_family == AF_INET || so->so_family == AF_INET6) && 762 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 763 so->so_nl7c_flags == NL7C_AF_NCA) { 764 /* 765 * NL7C is not supported in non-global zones, 766 * we enforce this restriction here. 767 */ 768 if (so->so_zoneid == GLOBAL_ZONEID) { 769 /* An NL7C socket, mark it */ 770 so->so_nl7c_flags |= NL7C_ENABLED; 771 } else 772 nl7c = NULL; 773 } 774 /* 775 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 776 * for other transports we will send in a O_T_BIND_REQ. 777 */ 778 if (tcp_udp_xport && 779 (so->so_family == AF_INET || so->so_family == AF_INET6)) 780 PRIM_type = T_BIND_REQ; 781 782 bind_req.PRIM_type = PRIM_type; 783 bind_req.ADDR_length = addrlen; 784 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 785 bind_req.CONIND_number = backlog; 786 /* NOTE: holding so_lock while sleeping */ 787 mp = soallocproto2(&bind_req, sizeof (bind_req), 788 addr, addrlen, 0, _ALLOC_SLEEP); 789 so->so_state &= ~SS_LADDR_VALID; 790 791 /* Done using so_laddr_sa - can drop the lock */ 792 mutex_exit(&so->so_lock); 793 794 /* 795 * Intercept the bind_req message here to check if this <address/port> 796 * was configured as an SSL proxy server, or if another endpoint was 797 * already configured to act as a proxy for us. 798 */ 799 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 800 so->so_type == SOCK_STREAM) { 801 802 if (so->so_kssl_ent != NULL) { 803 kssl_release_ent(so->so_kssl_ent, so, so->so_kssl_type); 804 so->so_kssl_ent = NULL; 805 } 806 807 so->so_kssl_type = kssl_check_proxy(mp, so, &so->so_kssl_ent); 808 switch (so->so_kssl_type) { 809 case KSSL_NO_PROXY: 810 break; 811 812 case KSSL_HAS_PROXY: 813 mutex_enter(&so->so_lock); 814 goto skip_transport; 815 816 case KSSL_IS_PROXY: 817 break; 818 } 819 } 820 821 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 822 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 823 if (error) { 824 eprintsoline(so, error); 825 mutex_enter(&so->so_lock); 826 goto done; 827 } 828 829 mutex_enter(&so->so_lock); 830 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 831 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 832 if (error) { 833 eprintsoline(so, error); 834 goto done; 835 } 836 skip_transport: 837 ASSERT(mp); 838 /* 839 * Even if some TPI message (e.g. T_DISCON_IND) was received in 840 * strsock_proto while the lock was dropped above, the bind 841 * is allowed to complete. 842 */ 843 844 /* Mark as bound. This will be undone if we detect errors below. */ 845 if (flags & _SOBIND_NOXLATE) { 846 ASSERT(so->so_family == AF_UNIX); 847 so->so_state |= SS_FADDR_NOXLATE; 848 } 849 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 850 so->so_state |= SS_ISBOUND; 851 ASSERT(so->so_unbind_mp); 852 853 /* note that we've already set SS_ACCEPTCONN above */ 854 855 /* 856 * Recompute addrlen - an unspecied bind sent down an 857 * address of length zero but we expect the appropriate length 858 * in return. 859 */ 860 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 861 sizeof (so->so_ux_laddr) : so->so_laddr_len); 862 863 bind_ack = (struct T_bind_ack *)mp->b_rptr; 864 /* 865 * The alignment restriction is really too strict but 866 * we want enough alignment to inspect the fields of 867 * a sockaddr_in. 868 */ 869 addr = sogetoff(mp, bind_ack->ADDR_offset, 870 bind_ack->ADDR_length, 871 __TPI_ALIGN_SIZE); 872 if (addr == NULL) { 873 freemsg(mp); 874 error = EPROTO; 875 eprintsoline(so, error); 876 goto done; 877 } 878 if (!(flags & _SOBIND_UNSPEC)) { 879 /* 880 * Verify that the transport didn't return something we 881 * did not want e.g. an address other than what we asked for. 882 * 883 * NOTE: These checks would go away if/when we switch to 884 * using the new TPI (in which the transport would fail 885 * the request instead of assigning a different address). 886 * 887 * NOTE2: For protocols that we don't know (i.e. any 888 * other than AF_INET6, AF_INET and AF_UNIX), we 889 * cannot know if the transport should be expected to 890 * return the same address as that requested. 891 * 892 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 893 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 894 * 895 * For example, in the case of netatalk it may be 896 * inappropriate for the transport to return the 897 * requested address (as it may have allocated a local 898 * port number in behaviour similar to that of an 899 * AF_INET bind request with a port number of zero). 900 * 901 * Given the definition of O_T_BIND_REQ, where the 902 * transport may bind to an address other than the 903 * requested address, it's not possible to determine 904 * whether a returned address that differs from the 905 * requested address is a reason to fail (because the 906 * requested address was not available) or succeed 907 * (because the transport allocated an appropriate 908 * address and/or port). 909 * 910 * sockfs currently requires that the transport return 911 * the requested address in the T_BIND_ACK, unless 912 * there is code here to allow for any discrepancy. 913 * Such code exists for AF_INET and AF_INET6. 914 * 915 * Netatalk chooses to return the requested address 916 * rather than the (correct) allocated address. This 917 * means that netatalk violates the TPI specification 918 * (and would not function correctly if used from a 919 * TLI application), but it does mean that it works 920 * with sockfs. 921 * 922 * As noted above, using the newer XTI bind primitive 923 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 924 * allow sockfs to be more sure about whether or not 925 * the bind request had succeeded (as transports are 926 * not permitted to bind to a different address than 927 * that requested - they must return failure). 928 * Unfortunately, support for T_BIND_REQ may not be 929 * present in all transport implementations (netatalk, 930 * for example, doesn't have it), making the 931 * transition difficult. 932 */ 933 if (bind_ack->ADDR_length != addrlen) { 934 /* Assumes that the requested address was in use */ 935 freemsg(mp); 936 error = EADDRINUSE; 937 eprintsoline(so, error); 938 goto done; 939 } 940 941 switch (so->so_family) { 942 case AF_INET6: 943 case AF_INET: { 944 sin_t *rname, *aname; 945 946 rname = (sin_t *)addr; 947 aname = (sin_t *)so->so_laddr_sa; 948 949 /* 950 * Take advantage of the alignment 951 * of sin_port and sin6_port which fall 952 * in the same place in their data structures. 953 * Just use sin_port for either address family. 954 * 955 * This may become a problem if (heaven forbid) 956 * there's a separate ipv6port_reserved... :-P 957 * 958 * Binding to port 0 has the semantics of letting 959 * the transport bind to any port. 960 * 961 * If the transport is TCP or UDP since we had sent 962 * a T_BIND_REQ we would not get a port other than 963 * what we asked for. 964 */ 965 if (tcp_udp_xport) { 966 /* 967 * Pick up the new port number if we bound to 968 * port 0. 969 */ 970 if (aname->sin_port == 0) 971 aname->sin_port = rname->sin_port; 972 so->so_state |= SS_LADDR_VALID; 973 break; 974 } 975 if (aname->sin_port != 0 && 976 aname->sin_port != rname->sin_port) { 977 freemsg(mp); 978 error = EADDRINUSE; 979 eprintsoline(so, error); 980 goto done; 981 } 982 /* 983 * Pick up the new port number if we bound to port 0. 984 */ 985 aname->sin_port = rname->sin_port; 986 987 /* 988 * Unfortunately, addresses aren't _quite_ the same. 989 */ 990 if (so->so_family == AF_INET) { 991 if (aname->sin_addr.s_addr != 992 rname->sin_addr.s_addr) { 993 freemsg(mp); 994 error = EADDRNOTAVAIL; 995 eprintsoline(so, error); 996 goto done; 997 } 998 } else { 999 sin6_t *rname6 = (sin6_t *)rname; 1000 sin6_t *aname6 = (sin6_t *)aname; 1001 1002 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1003 &rname6->sin6_addr)) { 1004 freemsg(mp); 1005 error = EADDRNOTAVAIL; 1006 eprintsoline(so, error); 1007 goto done; 1008 } 1009 } 1010 break; 1011 } 1012 case AF_UNIX: 1013 if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) { 1014 freemsg(mp); 1015 error = EADDRINUSE; 1016 eprintsoline(so, error); 1017 eprintso(so, 1018 ("addrlen %d, addr 0x%x, vp %p\n", 1019 addrlen, *((int *)addr), 1020 so->so_ux_bound_vp)); 1021 goto done; 1022 } 1023 so->so_state |= SS_LADDR_VALID; 1024 break; 1025 default: 1026 /* 1027 * NOTE: This assumes that addresses can be 1028 * byte-compared for equivalence. 1029 */ 1030 if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) { 1031 freemsg(mp); 1032 error = EADDRINUSE; 1033 eprintsoline(so, error); 1034 goto done; 1035 } 1036 /* 1037 * Don't mark SS_LADDR_VALID, as we cannot be 1038 * sure that the returned address is the real 1039 * bound address when talking to an unknown 1040 * transport. 1041 */ 1042 break; 1043 } 1044 } else { 1045 /* 1046 * Save for returned address for getsockname. 1047 * Needed for unspecific bind unless transport supports 1048 * the TI_GETMYNAME ioctl. 1049 * Do this for AF_INET{,6} even though they do, as 1050 * caching info here is much better performance than 1051 * a TPI/STREAMS trip to the transport for getsockname. 1052 * Any which can't for some reason _must_ _not_ set 1053 * LADDR_VALID here for the caching version of getsockname 1054 * to not break; 1055 */ 1056 switch (so->so_family) { 1057 case AF_UNIX: 1058 /* 1059 * Record the address bound with the transport 1060 * for use by socketpair. 1061 */ 1062 bcopy(addr, &so->so_ux_laddr, addrlen); 1063 so->so_state |= SS_LADDR_VALID; 1064 break; 1065 case AF_INET: 1066 case AF_INET6: 1067 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 1068 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 1069 so->so_state |= SS_LADDR_VALID; 1070 break; 1071 default: 1072 /* 1073 * Don't mark SS_LADDR_VALID, as we cannot be 1074 * sure that the returned address is the real 1075 * bound address when talking to an unknown 1076 * transport. 1077 */ 1078 break; 1079 } 1080 } 1081 1082 if (nl7c == NULL && (so->so_nl7c_flags & NL7C_AF_NCA) && 1083 (so->so_nl7c_flags & NL7C_ENABLED)) { 1084 /* 1085 * Was an AF_NCA bind() so add it to the addr list for 1086 * reporting purposes. 1087 */ 1088 nl7c = nl7c_add_addr(addr, addrlen); 1089 } 1090 if (nl7c != NULL) { 1091 nl7c_listener_addr(nl7c, strvp2wq(SOTOV(so))); 1092 } 1093 1094 freemsg(mp); 1095 1096 done: 1097 if (error) { 1098 /* reset state & backlog to values held on entry */ 1099 if (clear_acceptconn_on_err == B_TRUE) 1100 so->so_state &= ~SS_ACCEPTCONN; 1101 if (restore_backlog_on_err == B_TRUE) 1102 so->so_backlog = save_so_backlog; 1103 1104 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1105 int err; 1106 1107 err = sotpi_unbind(so, 0); 1108 /* LINTED - statement has no consequent: if */ 1109 if (err) { 1110 eprintsoline(so, error); 1111 } else { 1112 ASSERT(!(so->so_state & SS_ISBOUND)); 1113 } 1114 } 1115 } 1116 if (!(flags & _SOBIND_LOCK_HELD)) { 1117 so_unlock_single(so, SOLOCKED); 1118 mutex_exit(&so->so_lock); 1119 } else { 1120 /* If the caller held the lock don't release it here */ 1121 ASSERT(MUTEX_HELD(&so->so_lock)); 1122 ASSERT(so->so_flag & SOLOCKED); 1123 } 1124 return (error); 1125 } 1126 1127 /* bind the socket */ 1128 static int 1129 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1130 int flags) 1131 { 1132 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1133 return (sotpi_bindlisten(so, name, namelen, 0, flags)); 1134 1135 flags &= ~_SOBIND_SOCKETPAIR; 1136 return (sotpi_bindlisten(so, name, namelen, 1, flags)); 1137 } 1138 1139 /* 1140 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1141 * address, or when listen needs to unbind and bind. 1142 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1143 * so that a sobind can pick them up. 1144 */ 1145 static int 1146 sotpi_unbind(struct sonode *so, int flags) 1147 { 1148 struct T_unbind_req unbind_req; 1149 int error = 0; 1150 mblk_t *mp; 1151 1152 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1153 so, flags, pr_state(so->so_state, so->so_mode))); 1154 1155 ASSERT(MUTEX_HELD(&so->so_lock)); 1156 ASSERT(so->so_flag & SOLOCKED); 1157 1158 if (!(so->so_state & SS_ISBOUND)) { 1159 error = EINVAL; 1160 eprintsoline(so, error); 1161 goto done; 1162 } 1163 1164 mutex_exit(&so->so_lock); 1165 1166 /* 1167 * Flush the read and write side (except stream head read queue) 1168 * and send down T_UNBIND_REQ. 1169 */ 1170 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1171 1172 unbind_req.PRIM_type = T_UNBIND_REQ; 1173 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1174 0, _ALLOC_SLEEP); 1175 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1176 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1177 mutex_enter(&so->so_lock); 1178 if (error) { 1179 eprintsoline(so, error); 1180 goto done; 1181 } 1182 1183 error = sowaitokack(so, T_UNBIND_REQ); 1184 if (error) { 1185 eprintsoline(so, error); 1186 goto done; 1187 } 1188 1189 /* 1190 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1191 * strsock_proto while the lock was dropped above, the unbind 1192 * is allowed to complete. 1193 */ 1194 if (!(flags & _SOUNBIND_REBIND)) { 1195 /* 1196 * Clear out bound address. 1197 */ 1198 vnode_t *vp; 1199 1200 if ((vp = so->so_ux_bound_vp) != NULL) { 1201 1202 /* Undo any SSL proxy setup */ 1203 if ((so->so_family == AF_INET || 1204 so->so_family == AF_INET6) && 1205 (so->so_type == SOCK_STREAM) && 1206 (so->so_kssl_ent != NULL)) { 1207 kssl_release_ent(so->so_kssl_ent, so, 1208 so->so_kssl_type); 1209 so->so_kssl_ent = NULL; 1210 so->so_kssl_type = KSSL_NO_PROXY; 1211 } 1212 1213 so->so_ux_bound_vp = NULL; 1214 vn_rele_stream(vp); 1215 } 1216 /* Clear out address */ 1217 so->so_laddr_len = 0; 1218 } 1219 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1220 done: 1221 1222 /* If the caller held the lock don't release it here */ 1223 ASSERT(MUTEX_HELD(&so->so_lock)); 1224 ASSERT(so->so_flag & SOLOCKED); 1225 1226 return (error); 1227 } 1228 1229 /* 1230 * listen on the socket. 1231 * For TPI conforming transports this has to first unbind with the transport 1232 * and then bind again using the new backlog. 1233 */ 1234 int 1235 sotpi_listen(struct sonode *so, int backlog) 1236 { 1237 int error = 0; 1238 1239 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1240 so, backlog, pr_state(so->so_state, so->so_mode))); 1241 1242 if (so->so_serv_type == T_CLTS) 1243 return (EOPNOTSUPP); 1244 1245 /* 1246 * If the socket is ready to accept connections already, then 1247 * return without doing anything. This avoids a problem where 1248 * a second listen() call fails if a connection is pending and 1249 * leaves the socket unbound. Only when we are not unbinding 1250 * with the transport can we safely increase the backlog. 1251 */ 1252 if (so->so_state & SS_ACCEPTCONN && 1253 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1254 /*CONSTCOND*/ 1255 !solisten_tpi_tcp)) 1256 return (0); 1257 1258 if (so->so_state & SS_ISCONNECTED) 1259 return (EINVAL); 1260 1261 mutex_enter(&so->so_lock); 1262 so_lock_single(so); /* Set SOLOCKED */ 1263 1264 if (backlog < 0) 1265 backlog = 0; 1266 /* 1267 * Use the same qlimit as in BSD. BSD checks the qlimit 1268 * before queuing the next connection implying that a 1269 * listen(sock, 0) allows one connection to be queued. 1270 * BSD also uses 1.5 times the requested backlog. 1271 * 1272 * XNS Issue 4 required a strict interpretation of the backlog. 1273 * This has been waived subsequently for Issue 4 and the change 1274 * incorporated in XNS Issue 5. So we aren't required to do 1275 * anything special for XPG apps. 1276 */ 1277 if (backlog >= (INT_MAX - 1) / 3) 1278 backlog = INT_MAX; 1279 else 1280 backlog = backlog * 3 / 2 + 1; 1281 1282 /* 1283 * If the listen doesn't change the backlog we do nothing. 1284 * This avoids an EPROTO error from the transport. 1285 */ 1286 if ((so->so_state & SS_ACCEPTCONN) && 1287 so->so_backlog == backlog) 1288 goto done; 1289 1290 if (!(so->so_state & SS_ISBOUND)) { 1291 /* 1292 * Must have been explicitly bound in the UNIX domain. 1293 */ 1294 if (so->so_family == AF_UNIX) { 1295 error = EINVAL; 1296 goto done; 1297 } 1298 error = sotpi_bindlisten(so, NULL, 0, backlog, 1299 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1300 } else if (backlog > 0) { 1301 /* 1302 * AF_INET{,6} hack to avoid losing the port. 1303 * Assumes that all AF_INET{,6} transports can handle a 1304 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1305 * has already bound thus it is possible to avoid the unbind. 1306 */ 1307 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1308 /*CONSTCOND*/ 1309 !solisten_tpi_tcp)) { 1310 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1311 if (error) 1312 goto done; 1313 } 1314 error = sotpi_bindlisten(so, NULL, 0, backlog, 1315 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1316 } else { 1317 so->so_state |= SS_ACCEPTCONN; 1318 so->so_backlog = backlog; 1319 } 1320 if (error) 1321 goto done; 1322 ASSERT(so->so_state & SS_ACCEPTCONN); 1323 done: 1324 so_unlock_single(so, SOLOCKED); 1325 mutex_exit(&so->so_lock); 1326 return (error); 1327 } 1328 1329 /* 1330 * Disconnect either a specified seqno or all (-1). 1331 * The former is used on listening sockets only. 1332 * 1333 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1334 * the current use of sodisconnect(seqno == -1) is only for shutdown 1335 * so there is no point (and potentially incorrect) to unbind. 1336 */ 1337 int 1338 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1339 { 1340 struct T_discon_req discon_req; 1341 int error = 0; 1342 mblk_t *mp; 1343 1344 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1345 so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1346 1347 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1348 mutex_enter(&so->so_lock); 1349 so_lock_single(so); /* Set SOLOCKED */ 1350 } else { 1351 ASSERT(MUTEX_HELD(&so->so_lock)); 1352 ASSERT(so->so_flag & SOLOCKED); 1353 } 1354 1355 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1356 error = EINVAL; 1357 eprintsoline(so, error); 1358 goto done; 1359 } 1360 1361 mutex_exit(&so->so_lock); 1362 /* 1363 * Flush the write side (unless this is a listener) 1364 * and then send down a T_DISCON_REQ. 1365 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1366 * and other messages.) 1367 */ 1368 if (!(so->so_state & SS_ACCEPTCONN)) 1369 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1370 1371 discon_req.PRIM_type = T_DISCON_REQ; 1372 discon_req.SEQ_number = seqno; 1373 mp = soallocproto1(&discon_req, sizeof (discon_req), 1374 0, _ALLOC_SLEEP); 1375 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1376 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1377 mutex_enter(&so->so_lock); 1378 if (error) { 1379 eprintsoline(so, error); 1380 goto done; 1381 } 1382 1383 error = sowaitokack(so, T_DISCON_REQ); 1384 if (error) { 1385 eprintsoline(so, error); 1386 goto done; 1387 } 1388 /* 1389 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1390 * strsock_proto while the lock was dropped above, the disconnect 1391 * is allowed to complete. However, it is not possible to 1392 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1393 */ 1394 so->so_state &= 1395 ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID); 1396 done: 1397 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1398 so_unlock_single(so, SOLOCKED); 1399 mutex_exit(&so->so_lock); 1400 } else { 1401 /* If the caller held the lock don't release it here */ 1402 ASSERT(MUTEX_HELD(&so->so_lock)); 1403 ASSERT(so->so_flag & SOLOCKED); 1404 } 1405 return (error); 1406 } 1407 1408 int 1409 sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) 1410 { 1411 struct T_conn_ind *conn_ind; 1412 struct T_conn_res *conn_res; 1413 int error = 0; 1414 mblk_t *mp, *ctxmp; 1415 struct sonode *nso; 1416 vnode_t *nvp; 1417 void *src; 1418 t_uscalar_t srclen; 1419 void *opt; 1420 t_uscalar_t optlen; 1421 t_scalar_t PRIM_type; 1422 t_scalar_t SEQ_number; 1423 1424 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1425 so, fflag, nsop, pr_state(so->so_state, so->so_mode))); 1426 1427 /* 1428 * Defer single-threading the accepting socket until 1429 * the T_CONN_IND has been received and parsed and the 1430 * new sonode has been opened. 1431 */ 1432 1433 /* Check that we are not already connected */ 1434 if ((so->so_state & SS_ACCEPTCONN) == 0) 1435 goto conn_bad; 1436 again: 1437 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1438 goto e_bad; 1439 1440 ASSERT(mp); 1441 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1442 ctxmp = mp->b_cont; 1443 1444 /* 1445 * Save SEQ_number for error paths. 1446 */ 1447 SEQ_number = conn_ind->SEQ_number; 1448 1449 srclen = conn_ind->SRC_length; 1450 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1451 if (src == NULL) { 1452 error = EPROTO; 1453 freemsg(mp); 1454 eprintsoline(so, error); 1455 goto disconnect_unlocked; 1456 } 1457 optlen = conn_ind->OPT_length; 1458 switch (so->so_family) { 1459 case AF_INET: 1460 case AF_INET6: 1461 if ((optlen == sizeof (intptr_t)) && 1462 ((so->so_state & SS_DIRECT) != 0)) { 1463 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1464 &opt, conn_ind->OPT_length); 1465 } else { 1466 /* 1467 * The transport (in this case TCP) hasn't sent up 1468 * a pointer to an instance for the accept fast-path. 1469 * Disable fast-path completely because the call to 1470 * sotpi_create() below would otherwise create an 1471 * incomplete TCP instance, which would lead to 1472 * problems when sockfs sends a normal T_CONN_RES 1473 * message down the new stream. 1474 */ 1475 if (so->so_state & SS_DIRECT) { 1476 int rval; 1477 /* 1478 * For consistency we inform tcp to disable 1479 * direct interface on the listener, though 1480 * we can certainly live without doing this 1481 * because no data will ever travel upstream 1482 * on the listening socket. 1483 */ 1484 so->so_state &= ~SS_DIRECT; 1485 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1486 0, 0, K_TO_K, CRED(), &rval); 1487 } 1488 opt = NULL; 1489 optlen = 0; 1490 } 1491 break; 1492 case AF_UNIX: 1493 default: 1494 if (optlen != 0) { 1495 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1496 __TPI_ALIGN_SIZE); 1497 if (opt == NULL) { 1498 error = EPROTO; 1499 freemsg(mp); 1500 eprintsoline(so, error); 1501 goto disconnect_unlocked; 1502 } 1503 } 1504 if (so->so_family == AF_UNIX) { 1505 if (!(so->so_state & SS_FADDR_NOXLATE)) { 1506 src = NULL; 1507 srclen = 0; 1508 } 1509 /* Extract src address from options */ 1510 if (optlen != 0) 1511 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1512 } 1513 break; 1514 } 1515 1516 /* 1517 * Create the new socket. 1518 */ 1519 VN_HOLD(so->so_accessvp); 1520 nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, 1521 so->so_protocol, so->so_version, so, &error); 1522 if (nso == NULL) { 1523 ASSERT(error != 0); 1524 /* 1525 * Accept can not fail with ENOBUFS. sotpi_create 1526 * sleeps waiting for memory until a signal is caught 1527 * so return EINTR. 1528 */ 1529 freemsg(mp); 1530 if (error == ENOBUFS) 1531 error = EINTR; 1532 goto e_disc_unl; 1533 } 1534 nvp = SOTOV(nso); 1535 1536 /* 1537 * If the transport sent up an SSL connection context, then attach 1538 * it the new socket, and set the (sd_wputdatafunc)() and 1539 * (sd_rputdatafunc)() stream head hooks to intercept and process 1540 * SSL records. 1541 */ 1542 if (ctxmp != NULL) { 1543 /* 1544 * This kssl_ctx_t is already held for us by the transport. 1545 * So, we don't need to do a kssl_hold_ctx() here. 1546 */ 1547 nso->so_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1548 freemsg(ctxmp); 1549 mp->b_cont = NULL; 1550 strsetrwputdatahooks(nvp, strsock_kssl_input, 1551 strsock_kssl_output); 1552 } 1553 #ifdef DEBUG 1554 /* 1555 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1556 * it's inherited early to allow debugging of the accept code itself. 1557 */ 1558 nso->so_options |= so->so_options & SO_DEBUG; 1559 #endif /* DEBUG */ 1560 1561 /* 1562 * Save the SRC address from the T_CONN_IND 1563 * for getpeername to work on AF_UNIX and on transports that do not 1564 * support TI_GETPEERNAME. 1565 * 1566 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1567 * copyin_name(). 1568 */ 1569 if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) { 1570 error = EINVAL; 1571 freemsg(mp); 1572 eprintsoline(so, error); 1573 goto disconnect_vp_unlocked; 1574 } 1575 nso->so_faddr_len = (socklen_t)srclen; 1576 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1577 bcopy(src, nso->so_faddr_sa, srclen); 1578 nso->so_state |= SS_FADDR_VALID; 1579 1580 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1581 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1582 cred_t *cr; 1583 1584 if ((cr = DB_CRED(mp)) != NULL) { 1585 crhold(cr); 1586 nso->so_peercred = cr; 1587 nso->so_cpid = DB_CPID(mp); 1588 } 1589 freemsg(mp); 1590 1591 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1592 sizeof (intptr_t), 0, _ALLOC_INTR); 1593 if (mp == NULL) { 1594 /* 1595 * Accept can not fail with ENOBUFS. 1596 * A signal was caught so return EINTR. 1597 */ 1598 error = EINTR; 1599 eprintsoline(so, error); 1600 goto disconnect_vp_unlocked; 1601 } 1602 conn_res = (struct T_conn_res *)mp->b_rptr; 1603 } else { 1604 nso->so_peercred = DB_CRED(mp); 1605 nso->so_cpid = DB_CPID(mp); 1606 DB_CRED(mp) = NULL; 1607 1608 mp->b_rptr = DB_BASE(mp); 1609 conn_res = (struct T_conn_res *)mp->b_rptr; 1610 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1611 } 1612 1613 /* 1614 * New socket must be bound at least in sockfs and, except for AF_INET, 1615 * (or AF_INET6) it also has to be bound in the transport provider. 1616 * After accepting the connection on nso so_laddr_sa will be set to 1617 * contain the same address as the listener's local address 1618 * so the address we bind to isn't important. 1619 */ 1620 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1621 /*CONSTCOND*/ 1622 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1623 /* 1624 * Optimization for AF_INET{,6} transports 1625 * that can handle a T_CONN_RES without being bound. 1626 */ 1627 mutex_enter(&nso->so_lock); 1628 so_automatic_bind(nso); 1629 mutex_exit(&nso->so_lock); 1630 } else { 1631 /* Perform NULL bind with the transport provider. */ 1632 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) { 1633 ASSERT(error != ENOBUFS); 1634 freemsg(mp); 1635 eprintsoline(nso, error); 1636 goto disconnect_vp_unlocked; 1637 } 1638 } 1639 1640 /* 1641 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1642 * so that any data arriving on the new socket will cause the 1643 * appropriate signals to be delivered for the new socket. 1644 * 1645 * No other thread (except strsock_proto and strsock_misc) 1646 * can access the new socket thus we relax the locking. 1647 */ 1648 nso->so_pgrp = so->so_pgrp; 1649 nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE); 1650 1651 if (nso->so_pgrp != 0) { 1652 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1653 eprintsoline(nso, error); 1654 error = 0; 1655 nso->so_pgrp = 0; 1656 } 1657 } 1658 1659 /* 1660 * Make note of the socket level options. TCP and IP level options 1661 * are already inherited. We could do all this after accept is 1662 * successful but doing it here simplifies code and no harm done 1663 * for error case. 1664 */ 1665 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1666 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1667 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1668 nso->so_sndbuf = so->so_sndbuf; 1669 nso->so_rcvbuf = so->so_rcvbuf; 1670 if (nso->so_options & SO_LINGER) 1671 nso->so_linger = so->so_linger; 1672 1673 if ((so->so_state & SS_DIRECT) != 0) { 1674 mblk_t *ack_mp; 1675 1676 ASSERT(nso->so_state & SS_DIRECT); 1677 ASSERT(opt != NULL); 1678 1679 conn_res->OPT_length = optlen; 1680 conn_res->OPT_offset = MBLKL(mp); 1681 bcopy(&opt, mp->b_wptr, optlen); 1682 mp->b_wptr += optlen; 1683 conn_res->PRIM_type = T_CONN_RES; 1684 conn_res->ACCEPTOR_id = 0; 1685 PRIM_type = T_CONN_RES; 1686 1687 /* Send down the T_CONN_RES on acceptor STREAM */ 1688 error = kstrputmsg(SOTOV(nso), mp, NULL, 1689 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1690 if (error) { 1691 mutex_enter(&so->so_lock); 1692 so_lock_single(so); 1693 eprintsoline(so, error); 1694 goto disconnect_vp; 1695 } 1696 mutex_enter(&nso->so_lock); 1697 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1698 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1699 if (error) { 1700 mutex_exit(&nso->so_lock); 1701 mutex_enter(&so->so_lock); 1702 so_lock_single(so); 1703 eprintsoline(so, error); 1704 goto disconnect_vp; 1705 } 1706 if (nso->so_family == AF_INET) { 1707 sin_t *sin; 1708 1709 sin = (sin_t *)(ack_mp->b_rptr + 1710 sizeof (struct T_ok_ack)); 1711 bcopy(sin, nso->so_laddr_sa, sizeof (sin_t)); 1712 nso->so_laddr_len = sizeof (sin_t); 1713 } else { 1714 sin6_t *sin6; 1715 1716 sin6 = (sin6_t *)(ack_mp->b_rptr + 1717 sizeof (struct T_ok_ack)); 1718 bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t)); 1719 nso->so_laddr_len = sizeof (sin6_t); 1720 } 1721 freemsg(ack_mp); 1722 1723 nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID; 1724 nso->so_priv = opt; 1725 1726 if (so->so_nl7c_flags & NL7C_ENABLED) { 1727 /* 1728 * An NL7C marked listen()er so the new socket 1729 * inherits the listen()er's NL7C state. 1730 * 1731 * When calling NL7C to process the new socket 1732 * pass the nonblocking i/o state of the listen 1733 * socket as this is the context we are in. 1734 */ 1735 nso->so_nl7c_flags = so->so_nl7c_flags; 1736 if (nl7c_process(nso, 1737 (nso->so_state & (SS_NONBLOCK|SS_NDELAY)), 1738 (int)((tcp_t *)nso->so_priv)->tcp_mss)) { 1739 /* 1740 * NL7C has completed processing on the 1741 * socket, close the socket and back to 1742 * the top to await the next T_CONN_IND. 1743 */ 1744 mutex_exit(&nso->so_lock); 1745 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1746 CRED()); 1747 VN_RELE(nvp); 1748 goto again; 1749 } 1750 /* Pass the new socket out */ 1751 } 1752 1753 mutex_exit(&nso->so_lock); 1754 1755 /* 1756 * Pass out new socket. 1757 */ 1758 if (nsop != NULL) 1759 *nsop = nso; 1760 1761 return (0); 1762 } 1763 1764 /* 1765 * Copy local address from listener. 1766 */ 1767 nso->so_laddr_len = so->so_laddr_len; 1768 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1769 bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len); 1770 nso->so_state |= SS_LADDR_VALID; 1771 1772 /* 1773 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1774 * which don't support the FireEngine accept fast-path. It is also 1775 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1776 * again. Neither sockfs nor TCP attempt to find out if some other 1777 * random module has been inserted in between (in which case we 1778 * should follow TLI accept behaviour). We blindly assume the worst 1779 * case and revert back to old behaviour i.e. TCP will not send us 1780 * any option (eager) and the accept should happen on the listener 1781 * queue. Any queued T_conn_ind have already got their options removed 1782 * by so_sock2_stream() when "sockmod" was I_POP'd. 1783 */ 1784 /* 1785 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1786 */ 1787 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1788 #ifdef _ILP32 1789 queue_t *q; 1790 1791 /* 1792 * Find read queue in driver 1793 * Can safely do this since we "own" nso/nvp. 1794 */ 1795 q = strvp2wq(nvp)->q_next; 1796 while (SAMESTR(q)) 1797 q = q->q_next; 1798 q = RD(q); 1799 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1800 #else 1801 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1802 #endif /* _ILP32 */ 1803 conn_res->PRIM_type = O_T_CONN_RES; 1804 PRIM_type = O_T_CONN_RES; 1805 } else { 1806 conn_res->ACCEPTOR_id = nso->so_acceptor_id; 1807 conn_res->PRIM_type = T_CONN_RES; 1808 PRIM_type = T_CONN_RES; 1809 } 1810 conn_res->SEQ_number = SEQ_number; 1811 conn_res->OPT_length = 0; 1812 conn_res->OPT_offset = 0; 1813 1814 mutex_enter(&so->so_lock); 1815 so_lock_single(so); /* Set SOLOCKED */ 1816 mutex_exit(&so->so_lock); 1817 1818 error = kstrputmsg(SOTOV(so), mp, NULL, 1819 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1820 mutex_enter(&so->so_lock); 1821 if (error) { 1822 eprintsoline(so, error); 1823 goto disconnect_vp; 1824 } 1825 error = sowaitokack(so, PRIM_type); 1826 if (error) { 1827 eprintsoline(so, error); 1828 goto disconnect_vp; 1829 } 1830 so_unlock_single(so, SOLOCKED); 1831 mutex_exit(&so->so_lock); 1832 1833 nso->so_state |= SS_ISCONNECTED; 1834 1835 /* 1836 * Pass out new socket. 1837 */ 1838 if (nsop != NULL) 1839 *nsop = nso; 1840 1841 return (0); 1842 1843 1844 eproto_disc_unl: 1845 error = EPROTO; 1846 e_disc_unl: 1847 eprintsoline(so, error); 1848 goto disconnect_unlocked; 1849 1850 pr_disc_vp_unl: 1851 eprintsoline(so, error); 1852 disconnect_vp_unlocked: 1853 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 1854 VN_RELE(nvp); 1855 disconnect_unlocked: 1856 (void) sodisconnect(so, SEQ_number, 0); 1857 return (error); 1858 1859 pr_disc_vp: 1860 eprintsoline(so, error); 1861 disconnect_vp: 1862 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 1863 so_unlock_single(so, SOLOCKED); 1864 mutex_exit(&so->so_lock); 1865 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 1866 VN_RELE(nvp); 1867 return (error); 1868 1869 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 1870 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 1871 ? EOPNOTSUPP : EINVAL; 1872 e_bad: 1873 eprintsoline(so, error); 1874 return (error); 1875 } 1876 1877 /* 1878 * connect a socket. 1879 * 1880 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 1881 * unconnect (by specifying a null address). 1882 */ 1883 int 1884 sotpi_connect(struct sonode *so, 1885 const struct sockaddr *name, 1886 socklen_t namelen, 1887 int fflag, 1888 int flags) 1889 { 1890 struct T_conn_req conn_req; 1891 int error = 0; 1892 mblk_t *mp; 1893 void *src; 1894 socklen_t srclen; 1895 void *addr; 1896 socklen_t addrlen; 1897 boolean_t need_unlock; 1898 1899 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 1900 so, name, namelen, fflag, flags, 1901 pr_state(so->so_state, so->so_mode))); 1902 1903 /* 1904 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 1905 * avoid sleeping for memory with SOLOCKED held. 1906 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen 1907 * + sizeof (struct T_opthdr). 1908 * (the AF_UNIX so_ux_addr_xlate() does not make the address 1909 * exceed so_faddr_maxlen). 1910 */ 1911 mp = soallocproto(sizeof (struct T_conn_req) + 1912 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 1913 if (mp == NULL) { 1914 /* 1915 * Connect can not fail with ENOBUFS. A signal was 1916 * caught so return EINTR. 1917 */ 1918 error = EINTR; 1919 eprintsoline(so, error); 1920 return (error); 1921 } 1922 1923 mutex_enter(&so->so_lock); 1924 /* 1925 * Make sure that there is a preallocated unbind_req 1926 * message before any binding. This message allocated when 1927 * the socket is created but it might be have been 1928 * consumed. 1929 */ 1930 if (so->so_unbind_mp == NULL) { 1931 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 1932 /* NOTE: holding so_lock while sleeping */ 1933 so->so_unbind_mp = 1934 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 1935 if (so->so_unbind_mp == NULL) { 1936 error = EINTR; 1937 need_unlock = B_FALSE; 1938 goto done; 1939 } 1940 } 1941 1942 so_lock_single(so); /* Set SOLOCKED */ 1943 need_unlock = B_TRUE; 1944 1945 /* 1946 * Can't have done a listen before connecting. 1947 */ 1948 if (so->so_state & SS_ACCEPTCONN) { 1949 error = EOPNOTSUPP; 1950 goto done; 1951 } 1952 1953 /* 1954 * Must be bound with the transport 1955 */ 1956 if (!(so->so_state & SS_ISBOUND)) { 1957 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 1958 /*CONSTCOND*/ 1959 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 1960 /* 1961 * Optimization for AF_INET{,6} transports 1962 * that can handle a T_CONN_REQ without being bound. 1963 */ 1964 so_automatic_bind(so); 1965 } else { 1966 error = sotpi_bind(so, NULL, 0, 1967 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 1968 if (error) 1969 goto done; 1970 } 1971 ASSERT(so->so_state & SS_ISBOUND); 1972 flags |= _SOCONNECT_DID_BIND; 1973 } 1974 1975 /* 1976 * Handle a connect to a name parameter of type AF_UNSPEC like a 1977 * connect to a null address. This is the portable method to 1978 * unconnect a socket. 1979 */ 1980 if ((namelen >= sizeof (sa_family_t)) && 1981 (name->sa_family == AF_UNSPEC)) { 1982 name = NULL; 1983 namelen = 0; 1984 } 1985 1986 /* 1987 * Check that we are not already connected. 1988 * A connection-oriented socket cannot be reconnected. 1989 * A connected connection-less socket can be 1990 * - connected to a different address by a subsequent connect 1991 * - "unconnected" by a connect to the NULL address 1992 */ 1993 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 1994 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 1995 if (so->so_mode & SM_CONNREQUIRED) { 1996 /* Connection-oriented socket */ 1997 error = so->so_state & SS_ISCONNECTED ? 1998 EISCONN : EALREADY; 1999 goto done; 2000 } 2001 /* Connection-less socket */ 2002 if (name == NULL) { 2003 /* 2004 * Remove the connected state and clear SO_DGRAM_ERRIND 2005 * since it was set when the socket was connected. 2006 * If this is UDP also send down a T_DISCON_REQ. 2007 */ 2008 int val; 2009 2010 if ((so->so_family == AF_INET || 2011 so->so_family == AF_INET6) && 2012 (so->so_type == SOCK_DGRAM || 2013 so->so_type == SOCK_RAW) && 2014 /*CONSTCOND*/ 2015 !soconnect_tpi_udp) { 2016 /* XXX What about implicitly unbinding here? */ 2017 error = sodisconnect(so, -1, 2018 _SODISCONNECT_LOCK_HELD); 2019 } else { 2020 so->so_state &= 2021 ~(SS_ISCONNECTED | SS_ISCONNECTING | 2022 SS_FADDR_VALID); 2023 so->so_faddr_len = 0; 2024 } 2025 2026 so_unlock_single(so, SOLOCKED); 2027 mutex_exit(&so->so_lock); 2028 2029 val = 0; 2030 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2031 &val, (t_uscalar_t)sizeof (val)); 2032 2033 mutex_enter(&so->so_lock); 2034 so_lock_single(so); /* Set SOLOCKED */ 2035 goto done; 2036 } 2037 } 2038 ASSERT(so->so_state & SS_ISBOUND); 2039 2040 if (name == NULL || namelen == 0) { 2041 error = EINVAL; 2042 goto done; 2043 } 2044 /* 2045 * Mark the socket if so_faddr_sa represents the transport level 2046 * address. 2047 */ 2048 if (flags & _SOCONNECT_NOXLATE) { 2049 struct sockaddr_ux *soaddr_ux; 2050 2051 ASSERT(so->so_family == AF_UNIX); 2052 if (namelen != sizeof (struct sockaddr_ux)) { 2053 error = EINVAL; 2054 goto done; 2055 } 2056 soaddr_ux = (struct sockaddr_ux *)name; 2057 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2058 namelen = sizeof (soaddr_ux->sou_addr); 2059 so->so_state |= SS_FADDR_NOXLATE; 2060 } 2061 2062 /* 2063 * Length and family checks. 2064 */ 2065 error = so_addr_verify(so, name, namelen); 2066 if (error) 2067 goto bad; 2068 2069 /* 2070 * Save foreign address. Needed for AF_UNIX as well as 2071 * transport providers that do not support TI_GETPEERNAME. 2072 * Also used for cached foreign address for TCP and UDP. 2073 */ 2074 if (namelen > (t_uscalar_t)so->so_faddr_maxlen) { 2075 error = EINVAL; 2076 goto done; 2077 } 2078 so->so_faddr_len = (socklen_t)namelen; 2079 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2080 bcopy(name, so->so_faddr_sa, namelen); 2081 so->so_state |= SS_FADDR_VALID; 2082 2083 if (so->so_family == AF_UNIX) { 2084 if (so->so_state & SS_FADDR_NOXLATE) { 2085 /* 2086 * Already have a transport internal address. Do not 2087 * pass any (transport internal) source address. 2088 */ 2089 addr = so->so_faddr_sa; 2090 addrlen = (t_uscalar_t)so->so_faddr_len; 2091 src = NULL; 2092 srclen = 0; 2093 } else { 2094 /* 2095 * Pass the sockaddr_un source address as an option 2096 * and translate the remote address. 2097 * Holding so_lock thus so_laddr_sa can not change. 2098 */ 2099 src = so->so_laddr_sa; 2100 srclen = (t_uscalar_t)so->so_laddr_len; 2101 dprintso(so, 1, 2102 ("sotpi_connect UNIX: srclen %d, src %p\n", 2103 srclen, src)); 2104 error = so_ux_addr_xlate(so, 2105 so->so_faddr_sa, (socklen_t)so->so_faddr_len, 2106 (flags & _SOCONNECT_XPG4_2), 2107 &addr, &addrlen); 2108 if (error) 2109 goto bad; 2110 } 2111 } else { 2112 addr = so->so_faddr_sa; 2113 addrlen = (t_uscalar_t)so->so_faddr_len; 2114 src = NULL; 2115 srclen = 0; 2116 } 2117 /* 2118 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2119 * option which asks the transport provider to send T_UDERR_IND 2120 * messages. These T_UDERR_IND messages are used to return connected 2121 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2122 * 2123 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2124 * we send down a T_CONN_REQ. This is needed to let the 2125 * transport assign a local address that is consistent with 2126 * the remote address. Applications depend on a getsockname() 2127 * after a connect() to retrieve the "source" IP address for 2128 * the connected socket. Invalidate the cached local address 2129 * to force getsockname() to enquire of the transport. 2130 */ 2131 if (!(so->so_mode & SM_CONNREQUIRED)) { 2132 /* 2133 * Datagram socket. 2134 */ 2135 int32_t val; 2136 2137 so_unlock_single(so, SOLOCKED); 2138 mutex_exit(&so->so_lock); 2139 2140 val = 1; 2141 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2142 &val, (t_uscalar_t)sizeof (val)); 2143 2144 mutex_enter(&so->so_lock); 2145 so_lock_single(so); /* Set SOLOCKED */ 2146 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2147 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2148 soconnect_tpi_udp) { 2149 soisconnected(so); 2150 goto done; 2151 } 2152 /* 2153 * Send down T_CONN_REQ etc. 2154 * Clear fflag to avoid returning EWOULDBLOCK. 2155 */ 2156 fflag = 0; 2157 ASSERT(so->so_family != AF_UNIX); 2158 so->so_state &= ~SS_LADDR_VALID; 2159 } else if (so->so_laddr_len != 0) { 2160 /* 2161 * If the local address or port was "any" then it may be 2162 * changed by the transport as a result of the 2163 * connect. Invalidate the cached version if we have one. 2164 */ 2165 switch (so->so_family) { 2166 case AF_INET: 2167 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t)); 2168 if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr == 2169 INADDR_ANY || 2170 ((sin_t *)so->so_laddr_sa)->sin_port == 0) 2171 so->so_state &= ~SS_LADDR_VALID; 2172 break; 2173 2174 case AF_INET6: 2175 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t)); 2176 if (IN6_IS_ADDR_UNSPECIFIED( 2177 &((sin6_t *)so->so_laddr_sa) ->sin6_addr) || 2178 IN6_IS_ADDR_V4MAPPED_ANY( 2179 &((sin6_t *)so->so_laddr_sa)->sin6_addr) || 2180 ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) 2181 so->so_state &= ~SS_LADDR_VALID; 2182 break; 2183 2184 default: 2185 break; 2186 } 2187 } 2188 2189 /* 2190 * Check for failure of an earlier call 2191 */ 2192 if (so->so_error != 0) 2193 goto so_bad; 2194 2195 /* 2196 * Send down T_CONN_REQ. Message was allocated above. 2197 */ 2198 conn_req.PRIM_type = T_CONN_REQ; 2199 conn_req.DEST_length = addrlen; 2200 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2201 if (srclen == 0) { 2202 conn_req.OPT_length = 0; 2203 conn_req.OPT_offset = 0; 2204 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2205 soappendmsg(mp, addr, addrlen); 2206 } else { 2207 /* 2208 * There is a AF_UNIX sockaddr_un to include as a source 2209 * address option. 2210 */ 2211 struct T_opthdr toh; 2212 2213 toh.level = SOL_SOCKET; 2214 toh.name = SO_SRCADDR; 2215 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2216 toh.status = 0; 2217 conn_req.OPT_length = 2218 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2219 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2220 _TPI_ALIGN_TOPT(addrlen)); 2221 2222 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2223 soappendmsg(mp, addr, addrlen); 2224 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2225 soappendmsg(mp, &toh, sizeof (toh)); 2226 soappendmsg(mp, src, srclen); 2227 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2228 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2229 } 2230 /* 2231 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2232 * in order to have the right state when the T_CONN_CON shows up. 2233 */ 2234 soisconnecting(so); 2235 mutex_exit(&so->so_lock); 2236 2237 #ifdef C2_AUDIT 2238 if (audit_active) 2239 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2240 #endif /* C2_AUDIT */ 2241 2242 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2243 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2244 mp = NULL; 2245 mutex_enter(&so->so_lock); 2246 if (error != 0) 2247 goto bad; 2248 2249 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2250 goto bad; 2251 2252 /* Allow other threads to access the socket */ 2253 so_unlock_single(so, SOLOCKED); 2254 need_unlock = B_FALSE; 2255 2256 /* 2257 * Wait until we get a T_CONN_CON or an error 2258 */ 2259 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2260 so_lock_single(so); /* Set SOLOCKED */ 2261 need_unlock = B_TRUE; 2262 } 2263 2264 done: 2265 freemsg(mp); 2266 switch (error) { 2267 case EINPROGRESS: 2268 case EALREADY: 2269 case EISCONN: 2270 case EINTR: 2271 /* Non-fatal errors */ 2272 so->so_state &= ~SS_LADDR_VALID; 2273 /* FALLTHRU */ 2274 case 0: 2275 break; 2276 2277 case EHOSTUNREACH: 2278 if (flags & _SOCONNECT_XPG4_2) { 2279 /* 2280 * X/Open specification contains a requirement that 2281 * ENETUNREACH be returned but does not require 2282 * EHOSTUNREACH. In order to keep the test suite 2283 * happy we mess with the errno here. 2284 */ 2285 error = ENETUNREACH; 2286 } 2287 /* FALLTHRU */ 2288 2289 default: 2290 ASSERT(need_unlock); 2291 /* 2292 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2293 * and invalidate local-address cache 2294 */ 2295 so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID); 2296 /* A discon_ind might have already unbound us */ 2297 if ((flags & _SOCONNECT_DID_BIND) && 2298 (so->so_state & SS_ISBOUND)) { 2299 int err; 2300 2301 err = sotpi_unbind(so, 0); 2302 /* LINTED - statement has no conseq */ 2303 if (err) { 2304 eprintsoline(so, err); 2305 } 2306 } 2307 break; 2308 } 2309 if (need_unlock) 2310 so_unlock_single(so, SOLOCKED); 2311 mutex_exit(&so->so_lock); 2312 return (error); 2313 2314 so_bad: error = sogeterr(so); 2315 bad: eprintsoline(so, error); 2316 goto done; 2317 } 2318 2319 int 2320 sotpi_shutdown(struct sonode *so, int how) 2321 { 2322 struct T_ordrel_req ordrel_req; 2323 mblk_t *mp; 2324 uint_t old_state, state_change; 2325 int error = 0; 2326 2327 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2328 so, how, pr_state(so->so_state, so->so_mode))); 2329 2330 mutex_enter(&so->so_lock); 2331 so_lock_single(so); /* Set SOLOCKED */ 2332 2333 /* 2334 * SunOS 4.X has no check for datagram sockets. 2335 * 5.X checks that it is connected (ENOTCONN) 2336 * X/Open requires that we check the connected state. 2337 */ 2338 if (!(so->so_state & SS_ISCONNECTED)) { 2339 if (!xnet_skip_checks) { 2340 error = ENOTCONN; 2341 if (xnet_check_print) { 2342 printf("sockfs: X/Open shutdown check " 2343 "caused ENOTCONN\n"); 2344 } 2345 } 2346 goto done; 2347 } 2348 /* 2349 * Record the current state and then perform any state changes. 2350 * Then use the difference between the old and new states to 2351 * determine which messages need to be sent. 2352 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2353 * duplicate calls to shutdown(). 2354 */ 2355 old_state = so->so_state; 2356 2357 switch (how) { 2358 case 0: 2359 socantrcvmore(so); 2360 break; 2361 case 1: 2362 socantsendmore(so); 2363 break; 2364 case 2: 2365 socantsendmore(so); 2366 socantrcvmore(so); 2367 break; 2368 default: 2369 error = EINVAL; 2370 goto done; 2371 } 2372 2373 /* 2374 * Assumes that the SS_CANT* flags are never cleared in the above code. 2375 */ 2376 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2377 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2378 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2379 2380 switch (state_change) { 2381 case 0: 2382 dprintso(so, 1, 2383 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2384 so->so_state)); 2385 goto done; 2386 2387 case SS_CANTRCVMORE: 2388 mutex_exit(&so->so_lock); 2389 strseteof(SOTOV(so), 1); 2390 /* 2391 * strseteof takes care of read side wakeups, 2392 * pollwakeups, and signals. 2393 */ 2394 /* 2395 * Get the read lock before flushing data to avoid problems 2396 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2397 */ 2398 mutex_enter(&so->so_lock); 2399 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2400 mutex_exit(&so->so_lock); 2401 2402 /* Flush read side queue */ 2403 strflushrq(SOTOV(so), FLUSHALL); 2404 2405 mutex_enter(&so->so_lock); 2406 so_unlock_read(so); /* Clear SOREADLOCKED */ 2407 break; 2408 2409 case SS_CANTSENDMORE: 2410 mutex_exit(&so->so_lock); 2411 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2412 mutex_enter(&so->so_lock); 2413 break; 2414 2415 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2416 mutex_exit(&so->so_lock); 2417 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2418 strseteof(SOTOV(so), 1); 2419 /* 2420 * strseteof takes care of read side wakeups, 2421 * pollwakeups, and signals. 2422 */ 2423 /* 2424 * Get the read lock before flushing data to avoid problems 2425 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2426 */ 2427 mutex_enter(&so->so_lock); 2428 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2429 mutex_exit(&so->so_lock); 2430 2431 /* Flush read side queue */ 2432 strflushrq(SOTOV(so), FLUSHALL); 2433 2434 mutex_enter(&so->so_lock); 2435 so_unlock_read(so); /* Clear SOREADLOCKED */ 2436 break; 2437 } 2438 2439 ASSERT(MUTEX_HELD(&so->so_lock)); 2440 2441 /* 2442 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2443 * was set due to this call and the new state has both of them set: 2444 * Send the AF_UNIX close indication 2445 * For T_COTS send a discon_ind 2446 * 2447 * If cantsend was set due to this call: 2448 * For T_COTSORD send an ordrel_ind 2449 * 2450 * Note that for T_CLTS there is no message sent here. 2451 */ 2452 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2453 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2454 /* 2455 * For SunOS 4.X compatibility we tell the other end 2456 * that we are unable to receive at this point. 2457 */ 2458 if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS) 2459 so_unix_close(so); 2460 2461 if (so->so_serv_type == T_COTS) 2462 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2463 } 2464 if ((state_change & SS_CANTSENDMORE) && 2465 (so->so_serv_type == T_COTS_ORD)) { 2466 /* Send an orderly release */ 2467 ordrel_req.PRIM_type = T_ORDREL_REQ; 2468 2469 mutex_exit(&so->so_lock); 2470 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2471 0, _ALLOC_SLEEP); 2472 /* 2473 * Send down the T_ORDREL_REQ even if there is flow control. 2474 * This prevents shutdown from blocking. 2475 * Note that there is no T_OK_ACK for ordrel_req. 2476 */ 2477 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2478 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2479 mutex_enter(&so->so_lock); 2480 if (error) { 2481 eprintsoline(so, error); 2482 goto done; 2483 } 2484 } 2485 2486 done: 2487 so_unlock_single(so, SOLOCKED); 2488 mutex_exit(&so->so_lock); 2489 return (error); 2490 } 2491 2492 /* 2493 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2494 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2495 * that we have closed. 2496 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2497 * T_UNITDATA_REQ containing the same option. 2498 * 2499 * For SOCK_DGRAM half-connections (somebody connected to this end 2500 * but this end is not connect) we don't know where to send any 2501 * SO_UNIX_CLOSE. 2502 * 2503 * We have to ignore stream head errors just in case there has been 2504 * a shutdown(output). 2505 * Ignore any flow control to try to get the message more quickly to the peer. 2506 * While locally ignoring flow control solves the problem when there 2507 * is only the loopback transport on the stream it would not provide 2508 * the correct AF_UNIX socket semantics when one or more modules have 2509 * been pushed. 2510 */ 2511 void 2512 so_unix_close(struct sonode *so) 2513 { 2514 int error; 2515 struct T_opthdr toh; 2516 mblk_t *mp; 2517 2518 ASSERT(MUTEX_HELD(&so->so_lock)); 2519 2520 ASSERT(so->so_family == AF_UNIX); 2521 2522 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2523 (SS_ISCONNECTED|SS_ISBOUND)) 2524 return; 2525 2526 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2527 so, pr_state(so->so_state, so->so_mode))); 2528 2529 toh.level = SOL_SOCKET; 2530 toh.name = SO_UNIX_CLOSE; 2531 2532 /* zero length + header */ 2533 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2534 toh.status = 0; 2535 2536 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2537 struct T_optdata_req tdr; 2538 2539 tdr.PRIM_type = T_OPTDATA_REQ; 2540 tdr.DATA_flag = 0; 2541 2542 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2543 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2544 2545 /* NOTE: holding so_lock while sleeping */ 2546 mp = soallocproto2(&tdr, sizeof (tdr), 2547 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2548 } else { 2549 struct T_unitdata_req tudr; 2550 void *addr; 2551 socklen_t addrlen; 2552 void *src; 2553 socklen_t srclen; 2554 struct T_opthdr toh2; 2555 t_scalar_t size; 2556 2557 /* Connecteded DGRAM socket */ 2558 2559 /* 2560 * For AF_UNIX the destination address is translated to 2561 * an internal name and the source address is passed as 2562 * an option. 2563 */ 2564 /* 2565 * Length and family checks. 2566 */ 2567 error = so_addr_verify(so, so->so_faddr_sa, 2568 (t_uscalar_t)so->so_faddr_len); 2569 if (error) { 2570 eprintsoline(so, error); 2571 return; 2572 } 2573 if (so->so_state & SS_FADDR_NOXLATE) { 2574 /* 2575 * Already have a transport internal address. Do not 2576 * pass any (transport internal) source address. 2577 */ 2578 addr = so->so_faddr_sa; 2579 addrlen = (t_uscalar_t)so->so_faddr_len; 2580 src = NULL; 2581 srclen = 0; 2582 } else { 2583 /* 2584 * Pass the sockaddr_un source address as an option 2585 * and translate the remote address. 2586 * Holding so_lock thus so_laddr_sa can not change. 2587 */ 2588 src = so->so_laddr_sa; 2589 srclen = (socklen_t)so->so_laddr_len; 2590 dprintso(so, 1, 2591 ("so_ux_close: srclen %d, src %p\n", 2592 srclen, src)); 2593 error = so_ux_addr_xlate(so, 2594 so->so_faddr_sa, 2595 (socklen_t)so->so_faddr_len, 0, 2596 &addr, &addrlen); 2597 if (error) { 2598 eprintsoline(so, error); 2599 return; 2600 } 2601 } 2602 tudr.PRIM_type = T_UNITDATA_REQ; 2603 tudr.DEST_length = addrlen; 2604 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2605 if (srclen == 0) { 2606 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2607 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2608 _TPI_ALIGN_TOPT(addrlen)); 2609 2610 size = tudr.OPT_offset + tudr.OPT_length; 2611 /* NOTE: holding so_lock while sleeping */ 2612 mp = soallocproto2(&tudr, sizeof (tudr), 2613 addr, addrlen, size, _ALLOC_SLEEP); 2614 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2615 soappendmsg(mp, &toh, sizeof (toh)); 2616 } else { 2617 /* 2618 * There is a AF_UNIX sockaddr_un to include as a 2619 * source address option. 2620 */ 2621 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2622 _TPI_ALIGN_TOPT(srclen)); 2623 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2624 _TPI_ALIGN_TOPT(addrlen)); 2625 2626 toh2.level = SOL_SOCKET; 2627 toh2.name = SO_SRCADDR; 2628 toh2.len = (t_uscalar_t)(srclen + 2629 sizeof (struct T_opthdr)); 2630 toh2.status = 0; 2631 2632 size = tudr.OPT_offset + tudr.OPT_length; 2633 2634 /* NOTE: holding so_lock while sleeping */ 2635 mp = soallocproto2(&tudr, sizeof (tudr), 2636 addr, addrlen, size, _ALLOC_SLEEP); 2637 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2638 soappendmsg(mp, &toh, sizeof (toh)); 2639 soappendmsg(mp, &toh2, sizeof (toh2)); 2640 soappendmsg(mp, src, srclen); 2641 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2642 } 2643 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2644 } 2645 mutex_exit(&so->so_lock); 2646 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2647 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2648 mutex_enter(&so->so_lock); 2649 } 2650 2651 /* 2652 * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 2653 */ 2654 int 2655 sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) 2656 { 2657 mblk_t *mp, *nmp; 2658 int error; 2659 2660 dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", so, msg, flags)); 2661 2662 /* 2663 * There is never any oob data with addresses or control since 2664 * the T_EXDATA_IND does not carry any options. 2665 */ 2666 msg->msg_controllen = 0; 2667 msg->msg_namelen = 0; 2668 2669 mutex_enter(&so->so_lock); 2670 ASSERT(so_verify_oobstate(so)); 2671 if ((so->so_options & SO_OOBINLINE) || 2672 (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 2673 dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 2674 mutex_exit(&so->so_lock); 2675 return (EINVAL); 2676 } 2677 if (!(so->so_state & SS_HAVEOOBDATA)) { 2678 dprintso(so, 1, ("sorecvoob: no data yet\n")); 2679 mutex_exit(&so->so_lock); 2680 return (EWOULDBLOCK); 2681 } 2682 ASSERT(so->so_oobmsg != NULL); 2683 mp = so->so_oobmsg; 2684 if (flags & MSG_PEEK) { 2685 /* 2686 * Since recv* can not return ENOBUFS we can not use dupmsg. 2687 * Instead we revert to the consolidation private 2688 * allocb_wait plus bcopy. 2689 */ 2690 mblk_t *mp1; 2691 2692 mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 2693 ASSERT(mp1); 2694 2695 while (mp != NULL) { 2696 ssize_t size; 2697 2698 size = MBLKL(mp); 2699 bcopy(mp->b_rptr, mp1->b_wptr, size); 2700 mp1->b_wptr += size; 2701 ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 2702 mp = mp->b_cont; 2703 } 2704 mp = mp1; 2705 } else { 2706 /* 2707 * Update the state indicating that the data has been consumed. 2708 * Keep SS_OOBPEND set until data is consumed past the mark. 2709 */ 2710 so->so_oobmsg = NULL; 2711 so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 2712 } 2713 dprintso(so, 1, 2714 ("after recvoob(%p): counts %d/%d state %s\n", 2715 so, so->so_oobsigcnt, 2716 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2717 ASSERT(so_verify_oobstate(so)); 2718 mutex_exit(&so->so_lock); 2719 2720 error = 0; 2721 nmp = mp; 2722 while (nmp != NULL && uiop->uio_resid > 0) { 2723 ssize_t n = MBLKL(nmp); 2724 2725 n = MIN(n, uiop->uio_resid); 2726 if (n > 0) 2727 error = uiomove(nmp->b_rptr, n, 2728 UIO_READ, uiop); 2729 if (error) 2730 break; 2731 nmp = nmp->b_cont; 2732 } 2733 freemsg(mp); 2734 return (error); 2735 } 2736 2737 /* 2738 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2739 * In addition, the caller typically verifies that there is some 2740 * potential state to clear by checking 2741 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2742 * before calling this routine. 2743 * Note that such a check can be made without holding so_lock since 2744 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2745 * decrements so_oobsigcnt. 2746 * 2747 * When data is read *after* the point that all pending 2748 * oob data has been consumed the oob indication is cleared. 2749 * 2750 * This logic keeps select/poll returning POLLRDBAND and 2751 * SIOCATMARK returning true until we have read past 2752 * the mark. 2753 */ 2754 static void 2755 sorecv_update_oobstate(struct sonode *so) 2756 { 2757 mutex_enter(&so->so_lock); 2758 ASSERT(so_verify_oobstate(so)); 2759 dprintso(so, 1, 2760 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2761 so->so_oobsigcnt, 2762 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2763 if (so->so_oobsigcnt == 0) { 2764 /* No more pending oob indications */ 2765 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2766 freemsg(so->so_oobmsg); 2767 so->so_oobmsg = NULL; 2768 } 2769 ASSERT(so_verify_oobstate(so)); 2770 mutex_exit(&so->so_lock); 2771 } 2772 2773 /* 2774 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2775 */ 2776 static int 2777 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2778 { 2779 int error = 0; 2780 mblk_t *tmp = NULL; 2781 mblk_t *pmp = NULL; 2782 mblk_t *nmp = so->so_nl7c_rcv_mp; 2783 2784 ASSERT(nmp != NULL); 2785 2786 while (nmp != NULL && uiop->uio_resid > 0) { 2787 ssize_t n; 2788 2789 if (DB_TYPE(nmp) == M_DATA) { 2790 /* 2791 * We have some data, uiomove up to resid bytes. 2792 */ 2793 n = MIN(MBLKL(nmp), uiop->uio_resid); 2794 if (n > 0) 2795 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2796 if (error) 2797 break; 2798 nmp->b_rptr += n; 2799 if (nmp->b_rptr == nmp->b_wptr) { 2800 pmp = nmp; 2801 nmp = nmp->b_cont; 2802 } 2803 } else { 2804 /* 2805 * We only handle data, save for caller to handle. 2806 */ 2807 if (pmp != NULL) { 2808 pmp->b_cont = nmp->b_cont; 2809 } 2810 nmp->b_cont = NULL; 2811 if (*rmp == NULL) { 2812 *rmp = nmp; 2813 } else { 2814 tmp->b_next = nmp; 2815 } 2816 nmp = nmp->b_cont; 2817 tmp = nmp; 2818 } 2819 } 2820 if (pmp != NULL) { 2821 /* Free any mblk_t(s) which we have consumed */ 2822 pmp->b_cont = NULL; 2823 freemsg(so->so_nl7c_rcv_mp); 2824 } 2825 if ((so->so_nl7c_rcv_mp = nmp) == NULL) { 2826 /* Last mblk_t so return the saved rval from kstrgetmsg() */ 2827 rp->r_vals = so->so_nl7c_rcv_rval; 2828 so->so_nl7c_rcv_rval = 0; 2829 } else { 2830 /* More mblk_t(s) to process so no rval to return */ 2831 rp->r_vals = 0; 2832 } 2833 return (error); 2834 } 2835 2836 /* 2837 * Receive the next message on the queue. 2838 * If msg_controllen is non-zero when called the caller is interested in 2839 * any received control info (options). 2840 * If msg_namelen is non-zero when called the caller is interested in 2841 * any received source address. 2842 * The routine returns with msg_control and msg_name pointing to 2843 * kmem_alloc'ed memory which the caller has to free. 2844 */ 2845 int 2846 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2847 { 2848 union T_primitives *tpr; 2849 mblk_t *mp; 2850 uchar_t pri; 2851 int pflag, opflag; 2852 void *control; 2853 t_uscalar_t controllen; 2854 t_uscalar_t namelen; 2855 int so_state = so->so_state; /* Snapshot */ 2856 ssize_t saved_resid; 2857 int error; 2858 rval_t rval; 2859 int flags; 2860 clock_t timout; 2861 int first; 2862 2863 flags = msg->msg_flags; 2864 msg->msg_flags = 0; 2865 2866 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2867 so, msg, flags, 2868 pr_state(so->so_state, so->so_mode), so->so_error)); 2869 2870 /* 2871 * If we are not connected because we have never been connected 2872 * we return ENOTCONN. If we have been connected (but are no longer 2873 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2874 * the EOF. 2875 * 2876 * An alternative would be to post an ENOTCONN error in stream head 2877 * (read+write) and clear it when we're connected. However, that error 2878 * would cause incorrect poll/select behavior! 2879 */ 2880 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2881 (so->so_mode & SM_CONNREQUIRED)) { 2882 return (ENOTCONN); 2883 } 2884 2885 /* 2886 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2887 * after checking that the read queue is empty) and returns zero. 2888 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2889 * is zero. 2890 */ 2891 2892 if (flags & MSG_OOB) { 2893 /* Check that the transport supports OOB */ 2894 if (!(so->so_mode & SM_EXDATA)) 2895 return (EOPNOTSUPP); 2896 return (sorecvoob(so, msg, uiop, flags)); 2897 } 2898 2899 /* 2900 * Set msg_controllen and msg_namelen to zero here to make it 2901 * simpler in the cases that no control or name is returned. 2902 */ 2903 controllen = msg->msg_controllen; 2904 namelen = msg->msg_namelen; 2905 msg->msg_controllen = 0; 2906 msg->msg_namelen = 0; 2907 2908 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2909 namelen, controllen)); 2910 2911 /* 2912 * If an NL7C enabled socket and not waiting for write data. 2913 */ 2914 mutex_enter(&so->so_lock); 2915 if ((so->so_nl7c_flags & (NL7C_ENABLED|NL7C_WAITWRITE)) == 2916 NL7C_ENABLED) { 2917 if (so->so_nl7c_uri) { 2918 /* 2919 * Close uri processing for a previous request. 2920 */ 2921 nl7c_close(so); 2922 } 2923 if (nl7c_process(so, 2924 (so->so_state & (SS_NONBLOCK|SS_NDELAY)), 2925 (int)((tcp_t *)so->so_priv)->tcp_mss)) { 2926 /* 2927 * NL7C has completed processing on the socket, 2928 * clear the enabled bit as no further NL7C 2929 * processing will be needed. 2930 */ 2931 so->so_nl7c_flags = 0; 2932 } 2933 } 2934 2935 /* 2936 * Only one reader is allowed at any given time. This is needed 2937 * for T_EXDATA handling and, in the future, MSG_WAITALL. 2938 * 2939 * This is slightly different that BSD behavior in that it fails with 2940 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 2941 * is single-threaded using sblock(), which is dropped while waiting 2942 * for data to appear. The difference shows up e.g. if one 2943 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 2944 * does use nonblocking io and different threads are reading each 2945 * file descriptor. In BSD there would never be an EWOULDBLOCK error 2946 * in this case as long as the read queue doesn't get empty. 2947 * In this implementation the thread using nonblocking io can 2948 * get an EWOULDBLOCK error due to the blocking thread executing 2949 * e.g. in the uiomove in kstrgetmsg. 2950 * This difference is not believed to be significant. 2951 */ 2952 error = so_lock_read_intr(so, uiop->uio_fmode); /* Set SOREADLOCKED */ 2953 mutex_exit(&so->so_lock); 2954 if (error) 2955 return (error); 2956 2957 /* 2958 * Tell kstrgetmsg to not inspect the stream head errors until all 2959 * queued data has been consumed. 2960 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 2961 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 2962 * 2963 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 2964 * to T_OPTDATA_IND that do not contain any user-visible control msg. 2965 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 2966 */ 2967 pflag = MSG_ANY | MSG_DELAYERROR; 2968 if (flags & MSG_PEEK) { 2969 pflag |= MSG_IPEEK; 2970 flags &= ~MSG_WAITALL; 2971 } 2972 if (so->so_mode & SM_ATOMIC) 2973 pflag |= MSG_DISCARDTAIL; 2974 2975 if (flags & MSG_DONTWAIT) 2976 timout = 0; 2977 else 2978 timout = -1; 2979 opflag = pflag; 2980 first = 1; 2981 2982 /* 2983 * If so saved NL7C rcv mblk_t(s) uiomove them first 2984 * else get'm from the streamhead. 2985 */ 2986 retry: 2987 saved_resid = uiop->uio_resid; 2988 pri = 0; 2989 mp = NULL; 2990 if (so->so_nl7c_rcv_mp != NULL) { 2991 error = nl7c_sorecv(so, &mp, uiop, &rval); 2992 } else { 2993 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 2994 timout, &rval); 2995 } 2996 if (error) { 2997 switch (error) { 2998 case EINTR: 2999 case EWOULDBLOCK: 3000 if (!first) 3001 error = 0; 3002 break; 3003 case ETIME: 3004 /* Returned from kstrgetmsg when timeout expires */ 3005 if (!first) 3006 error = 0; 3007 else 3008 error = EWOULDBLOCK; 3009 break; 3010 default: 3011 eprintsoline(so, error); 3012 break; 3013 } 3014 mutex_enter(&so->so_lock); 3015 so_unlock_read(so); /* Clear SOREADLOCKED */ 3016 mutex_exit(&so->so_lock); 3017 return (error); 3018 } 3019 /* 3020 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3021 * For non-datagrams MOREDATA is used to set MSG_EOR. 3022 */ 3023 ASSERT(!(rval.r_val1 & MORECTL)); 3024 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3025 msg->msg_flags |= MSG_TRUNC; 3026 3027 if (mp == NULL) { 3028 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3029 /* 3030 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3031 * The draft Posix socket spec states that the mark should 3032 * not be cleared when peeking. We follow the latter. 3033 */ 3034 if ((so->so_state & 3035 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3036 (uiop->uio_resid != saved_resid) && 3037 !(flags & MSG_PEEK)) { 3038 sorecv_update_oobstate(so); 3039 } 3040 3041 mutex_enter(&so->so_lock); 3042 /* Set MSG_EOR based on MOREDATA */ 3043 if (!(rval.r_val1 & MOREDATA)) { 3044 if (so->so_state & SS_SAVEDEOR) { 3045 msg->msg_flags |= MSG_EOR; 3046 so->so_state &= ~SS_SAVEDEOR; 3047 } 3048 } 3049 /* 3050 * If some data was received (i.e. not EOF) and the 3051 * read/recv* has not been satisfied wait for some more. 3052 */ 3053 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3054 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3055 mutex_exit(&so->so_lock); 3056 first = 0; 3057 pflag = opflag | MSG_NOMARK; 3058 goto retry; 3059 } 3060 so_unlock_read(so); /* Clear SOREADLOCKED */ 3061 mutex_exit(&so->so_lock); 3062 return (0); 3063 } 3064 3065 /* strsock_proto has already verified length and alignment */ 3066 tpr = (union T_primitives *)mp->b_rptr; 3067 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3068 3069 switch (tpr->type) { 3070 case T_DATA_IND: { 3071 if ((so->so_state & 3072 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3073 (uiop->uio_resid != saved_resid) && 3074 !(flags & MSG_PEEK)) { 3075 sorecv_update_oobstate(so); 3076 } 3077 3078 /* 3079 * Set msg_flags to MSG_EOR based on 3080 * MORE_flag and MOREDATA. 3081 */ 3082 mutex_enter(&so->so_lock); 3083 so->so_state &= ~SS_SAVEDEOR; 3084 if (!(tpr->data_ind.MORE_flag & 1)) { 3085 if (!(rval.r_val1 & MOREDATA)) 3086 msg->msg_flags |= MSG_EOR; 3087 else 3088 so->so_state |= SS_SAVEDEOR; 3089 } 3090 freemsg(mp); 3091 /* 3092 * If some data was received (i.e. not EOF) and the 3093 * read/recv* has not been satisfied wait for some more. 3094 */ 3095 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3096 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3097 mutex_exit(&so->so_lock); 3098 first = 0; 3099 pflag = opflag | MSG_NOMARK; 3100 goto retry; 3101 } 3102 so_unlock_read(so); /* Clear SOREADLOCKED */ 3103 mutex_exit(&so->so_lock); 3104 return (0); 3105 } 3106 case T_UNITDATA_IND: { 3107 void *addr; 3108 t_uscalar_t addrlen; 3109 void *abuf; 3110 t_uscalar_t optlen; 3111 void *opt; 3112 3113 if ((so->so_state & 3114 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3115 (uiop->uio_resid != saved_resid) && 3116 !(flags & MSG_PEEK)) { 3117 sorecv_update_oobstate(so); 3118 } 3119 3120 if (namelen != 0) { 3121 /* Caller wants source address */ 3122 addrlen = tpr->unitdata_ind.SRC_length; 3123 addr = sogetoff(mp, 3124 tpr->unitdata_ind.SRC_offset, 3125 addrlen, 1); 3126 if (addr == NULL) { 3127 freemsg(mp); 3128 error = EPROTO; 3129 eprintsoline(so, error); 3130 goto err; 3131 } 3132 if (so->so_family == AF_UNIX) { 3133 /* 3134 * Can not use the transport level address. 3135 * If there is a SO_SRCADDR option carrying 3136 * the socket level address it will be 3137 * extracted below. 3138 */ 3139 addr = NULL; 3140 addrlen = 0; 3141 } 3142 } 3143 optlen = tpr->unitdata_ind.OPT_length; 3144 if (optlen != 0) { 3145 t_uscalar_t ncontrollen; 3146 3147 /* 3148 * Extract any source address option. 3149 * Determine how large cmsg buffer is needed. 3150 */ 3151 opt = sogetoff(mp, 3152 tpr->unitdata_ind.OPT_offset, 3153 optlen, __TPI_ALIGN_SIZE); 3154 3155 if (opt == NULL) { 3156 freemsg(mp); 3157 error = EPROTO; 3158 eprintsoline(so, error); 3159 goto err; 3160 } 3161 if (so->so_family == AF_UNIX) 3162 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3163 ncontrollen = so_cmsglen(mp, opt, optlen, 3164 !(flags & MSG_XPG4_2)); 3165 if (controllen != 0) 3166 controllen = ncontrollen; 3167 else if (ncontrollen != 0) 3168 msg->msg_flags |= MSG_CTRUNC; 3169 } else { 3170 controllen = 0; 3171 } 3172 3173 if (namelen != 0) { 3174 /* 3175 * Return address to caller. 3176 * Caller handles truncation if length 3177 * exceeds msg_namelen. 3178 * NOTE: AF_UNIX NUL termination is ensured by 3179 * the sender's copyin_name(). 3180 */ 3181 abuf = kmem_alloc(addrlen, KM_SLEEP); 3182 3183 bcopy(addr, abuf, addrlen); 3184 msg->msg_name = abuf; 3185 msg->msg_namelen = addrlen; 3186 } 3187 3188 if (controllen != 0) { 3189 /* 3190 * Return control msg to caller. 3191 * Caller handles truncation if length 3192 * exceeds msg_controllen. 3193 */ 3194 control = kmem_alloc(controllen, KM_SLEEP); 3195 3196 error = so_opt2cmsg(mp, opt, optlen, 3197 !(flags & MSG_XPG4_2), 3198 control, controllen); 3199 if (error) { 3200 freemsg(mp); 3201 if (msg->msg_namelen != 0) 3202 kmem_free(msg->msg_name, 3203 msg->msg_namelen); 3204 kmem_free(control, controllen); 3205 eprintsoline(so, error); 3206 goto err; 3207 } 3208 msg->msg_control = control; 3209 msg->msg_controllen = controllen; 3210 } 3211 3212 freemsg(mp); 3213 mutex_enter(&so->so_lock); 3214 so_unlock_read(so); /* Clear SOREADLOCKED */ 3215 mutex_exit(&so->so_lock); 3216 return (0); 3217 } 3218 case T_OPTDATA_IND: { 3219 struct T_optdata_req *tdr; 3220 void *opt; 3221 t_uscalar_t optlen; 3222 3223 if ((so->so_state & 3224 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3225 (uiop->uio_resid != saved_resid) && 3226 !(flags & MSG_PEEK)) { 3227 sorecv_update_oobstate(so); 3228 } 3229 3230 tdr = (struct T_optdata_req *)mp->b_rptr; 3231 optlen = tdr->OPT_length; 3232 if (optlen != 0) { 3233 t_uscalar_t ncontrollen; 3234 /* 3235 * Determine how large cmsg buffer is needed. 3236 */ 3237 opt = sogetoff(mp, 3238 tpr->optdata_ind.OPT_offset, 3239 optlen, __TPI_ALIGN_SIZE); 3240 3241 if (opt == NULL) { 3242 freemsg(mp); 3243 error = EPROTO; 3244 eprintsoline(so, error); 3245 goto err; 3246 } 3247 3248 ncontrollen = so_cmsglen(mp, opt, optlen, 3249 !(flags & MSG_XPG4_2)); 3250 if (controllen != 0) 3251 controllen = ncontrollen; 3252 else if (ncontrollen != 0) 3253 msg->msg_flags |= MSG_CTRUNC; 3254 } else { 3255 controllen = 0; 3256 } 3257 3258 if (controllen != 0) { 3259 /* 3260 * Return control msg to caller. 3261 * Caller handles truncation if length 3262 * exceeds msg_controllen. 3263 */ 3264 control = kmem_alloc(controllen, KM_SLEEP); 3265 3266 error = so_opt2cmsg(mp, opt, optlen, 3267 !(flags & MSG_XPG4_2), 3268 control, controllen); 3269 if (error) { 3270 freemsg(mp); 3271 kmem_free(control, controllen); 3272 eprintsoline(so, error); 3273 goto err; 3274 } 3275 msg->msg_control = control; 3276 msg->msg_controllen = controllen; 3277 } 3278 3279 /* 3280 * Set msg_flags to MSG_EOR based on 3281 * DATA_flag and MOREDATA. 3282 */ 3283 mutex_enter(&so->so_lock); 3284 so->so_state &= ~SS_SAVEDEOR; 3285 if (!(tpr->data_ind.MORE_flag & 1)) { 3286 if (!(rval.r_val1 & MOREDATA)) 3287 msg->msg_flags |= MSG_EOR; 3288 else 3289 so->so_state |= SS_SAVEDEOR; 3290 } 3291 freemsg(mp); 3292 /* 3293 * If some data was received (i.e. not EOF) and the 3294 * read/recv* has not been satisfied wait for some more. 3295 * Not possible to wait if control info was received. 3296 */ 3297 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3298 controllen == 0 && 3299 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3300 mutex_exit(&so->so_lock); 3301 first = 0; 3302 pflag = opflag | MSG_NOMARK; 3303 goto retry; 3304 } 3305 so_unlock_read(so); /* Clear SOREADLOCKED */ 3306 mutex_exit(&so->so_lock); 3307 return (0); 3308 } 3309 case T_EXDATA_IND: { 3310 dprintso(so, 1, 3311 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3312 "state %s\n", 3313 so->so_oobsigcnt, so->so_oobcnt, 3314 saved_resid - uiop->uio_resid, 3315 pr_state(so->so_state, so->so_mode))); 3316 /* 3317 * kstrgetmsg handles MSGMARK so there is nothing to 3318 * inspect in the T_EXDATA_IND. 3319 * strsock_proto makes the stream head queue the T_EXDATA_IND 3320 * as a separate message with no M_DATA component. Furthermore, 3321 * the stream head does not consolidate M_DATA messages onto 3322 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3323 * remains a message by itself. This is needed since MSGMARK 3324 * marks both the whole message as well as the last byte 3325 * of the message. 3326 */ 3327 freemsg(mp); 3328 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3329 if (flags & MSG_PEEK) { 3330 /* 3331 * Even though we are peeking we consume the 3332 * T_EXDATA_IND thereby moving the mark information 3333 * to SS_RCVATMARK. Then the oob code below will 3334 * retry the peeking kstrgetmsg. 3335 * Note that the stream head read queue is 3336 * never flushed without holding SOREADLOCKED 3337 * thus the T_EXDATA_IND can not disappear 3338 * underneath us. 3339 */ 3340 dprintso(so, 1, 3341 ("sotpi_recvmsg: consume EXDATA_IND " 3342 "counts %d/%d state %s\n", 3343 so->so_oobsigcnt, 3344 so->so_oobcnt, 3345 pr_state(so->so_state, so->so_mode))); 3346 3347 pflag = MSG_ANY | MSG_DELAYERROR; 3348 if (so->so_mode & SM_ATOMIC) 3349 pflag |= MSG_DISCARDTAIL; 3350 3351 pri = 0; 3352 mp = NULL; 3353 3354 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3355 &pri, &pflag, (clock_t)-1, &rval); 3356 ASSERT(uiop->uio_resid == saved_resid); 3357 3358 if (error) { 3359 #ifdef SOCK_DEBUG 3360 if (error != EWOULDBLOCK && error != EINTR) { 3361 eprintsoline(so, error); 3362 } 3363 #endif /* SOCK_DEBUG */ 3364 mutex_enter(&so->so_lock); 3365 so_unlock_read(so); /* Clear SOREADLOCKED */ 3366 mutex_exit(&so->so_lock); 3367 return (error); 3368 } 3369 ASSERT(mp); 3370 tpr = (union T_primitives *)mp->b_rptr; 3371 ASSERT(tpr->type == T_EXDATA_IND); 3372 freemsg(mp); 3373 } /* end "if (flags & MSG_PEEK)" */ 3374 3375 /* 3376 * Decrement the number of queued and pending oob. 3377 * 3378 * SS_RCVATMARK is cleared when we read past a mark. 3379 * SS_HAVEOOBDATA is cleared when we've read past the 3380 * last mark. 3381 * SS_OOBPEND is cleared if we've read past the last 3382 * mark and no (new) SIGURG has been posted. 3383 */ 3384 mutex_enter(&so->so_lock); 3385 ASSERT(so_verify_oobstate(so)); 3386 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 3387 ASSERT(so->so_oobsigcnt > 0); 3388 so->so_oobsigcnt--; 3389 ASSERT(so->so_oobcnt > 0); 3390 so->so_oobcnt--; 3391 /* 3392 * Since the T_EXDATA_IND has been removed from the stream 3393 * head, but we have not read data past the mark, 3394 * sockfs needs to track that the socket is still at the mark. 3395 * 3396 * Since no data was received call kstrgetmsg again to wait 3397 * for data. 3398 */ 3399 so->so_state |= SS_RCVATMARK; 3400 mutex_exit(&so->so_lock); 3401 dprintso(so, 1, 3402 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3403 so->so_oobsigcnt, so->so_oobcnt, 3404 pr_state(so->so_state, so->so_mode))); 3405 pflag = opflag; 3406 goto retry; 3407 } 3408 default: 3409 ASSERT(0); 3410 freemsg(mp); 3411 error = EPROTO; 3412 eprintsoline(so, error); 3413 goto err; 3414 } 3415 /* NOTREACHED */ 3416 err: 3417 mutex_enter(&so->so_lock); 3418 so_unlock_read(so); /* Clear SOREADLOCKED */ 3419 mutex_exit(&so->so_lock); 3420 return (error); 3421 } 3422 3423 /* 3424 * Sending data with options on a datagram socket. 3425 * Assumes caller has verified that SS_ISBOUND etc. are set. 3426 */ 3427 static int 3428 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3429 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3430 { 3431 struct T_unitdata_req tudr; 3432 mblk_t *mp; 3433 int error; 3434 void *addr; 3435 socklen_t addrlen; 3436 void *src; 3437 socklen_t srclen; 3438 ssize_t len; 3439 int size; 3440 struct T_opthdr toh; 3441 struct fdbuf *fdbuf; 3442 t_uscalar_t optlen; 3443 void *fds; 3444 int fdlen; 3445 3446 ASSERT(name && namelen); 3447 ASSERT(control && controllen); 3448 3449 len = uiop->uio_resid; 3450 if (len > (ssize_t)so->so_tidu_size) { 3451 return (EMSGSIZE); 3452 } 3453 3454 /* 3455 * For AF_UNIX the destination address is translated to an internal 3456 * name and the source address is passed as an option. 3457 * Also, file descriptors are passed as file pointers in an 3458 * option. 3459 */ 3460 3461 /* 3462 * Length and family checks. 3463 */ 3464 error = so_addr_verify(so, name, namelen); 3465 if (error) { 3466 eprintsoline(so, error); 3467 return (error); 3468 } 3469 if (so->so_family == AF_UNIX) { 3470 if (so->so_state & SS_FADDR_NOXLATE) { 3471 /* 3472 * Already have a transport internal address. Do not 3473 * pass any (transport internal) source address. 3474 */ 3475 addr = name; 3476 addrlen = namelen; 3477 src = NULL; 3478 srclen = 0; 3479 } else { 3480 /* 3481 * Pass the sockaddr_un source address as an option 3482 * and translate the remote address. 3483 * 3484 * Note that this code does not prevent so_laddr_sa 3485 * from changing while it is being used. Thus 3486 * if an unbind+bind occurs concurrently with this 3487 * send the peer might see a partially new and a 3488 * partially old "from" address. 3489 */ 3490 src = so->so_laddr_sa; 3491 srclen = (t_uscalar_t)so->so_laddr_len; 3492 dprintso(so, 1, 3493 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3494 srclen, src)); 3495 error = so_ux_addr_xlate(so, name, namelen, 3496 (flags & MSG_XPG4_2), 3497 &addr, &addrlen); 3498 if (error) { 3499 eprintsoline(so, error); 3500 return (error); 3501 } 3502 } 3503 } else { 3504 addr = name; 3505 addrlen = namelen; 3506 src = NULL; 3507 srclen = 0; 3508 } 3509 optlen = so_optlen(control, controllen, 3510 !(flags & MSG_XPG4_2)); 3511 tudr.PRIM_type = T_UNITDATA_REQ; 3512 tudr.DEST_length = addrlen; 3513 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3514 if (srclen != 0) 3515 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3516 _TPI_ALIGN_TOPT(srclen)); 3517 else 3518 tudr.OPT_length = optlen; 3519 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3520 _TPI_ALIGN_TOPT(addrlen)); 3521 3522 size = tudr.OPT_offset + tudr.OPT_length; 3523 3524 /* 3525 * File descriptors only when SM_FDPASSING set. 3526 */ 3527 error = so_getfdopt(control, controllen, 3528 !(flags & MSG_XPG4_2), &fds, &fdlen); 3529 if (error) 3530 return (error); 3531 if (fdlen != -1) { 3532 if (!(so->so_mode & SM_FDPASSING)) 3533 return (EOPNOTSUPP); 3534 3535 error = fdbuf_create(fds, fdlen, &fdbuf); 3536 if (error) 3537 return (error); 3538 mp = fdbuf_allocmsg(size, fdbuf); 3539 } else { 3540 mp = soallocproto(size, _ALLOC_INTR); 3541 if (mp == NULL) { 3542 /* 3543 * Caught a signal waiting for memory. 3544 * Let send* return EINTR. 3545 */ 3546 return (EINTR); 3547 } 3548 } 3549 soappendmsg(mp, &tudr, sizeof (tudr)); 3550 soappendmsg(mp, addr, addrlen); 3551 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3552 3553 if (fdlen != -1) { 3554 ASSERT(fdbuf != NULL); 3555 toh.level = SOL_SOCKET; 3556 toh.name = SO_FILEP; 3557 toh.len = fdbuf->fd_size + 3558 (t_uscalar_t)sizeof (struct T_opthdr); 3559 toh.status = 0; 3560 soappendmsg(mp, &toh, sizeof (toh)); 3561 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3562 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3563 } 3564 if (srclen != 0) { 3565 /* 3566 * There is a AF_UNIX sockaddr_un to include as a source 3567 * address option. 3568 */ 3569 toh.level = SOL_SOCKET; 3570 toh.name = SO_SRCADDR; 3571 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3572 toh.status = 0; 3573 soappendmsg(mp, &toh, sizeof (toh)); 3574 soappendmsg(mp, src, srclen); 3575 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3576 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3577 } 3578 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3579 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3580 /* At most 3 bytes left in the message */ 3581 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3582 ASSERT(MBLKL(mp) <= (ssize_t)size); 3583 3584 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3585 #ifdef C2_AUDIT 3586 if (audit_active) 3587 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3588 #endif /* C2_AUDIT */ 3589 3590 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3591 #ifdef SOCK_DEBUG 3592 if (error) { 3593 eprintsoline(so, error); 3594 } 3595 #endif /* SOCK_DEBUG */ 3596 return (error); 3597 } 3598 3599 /* 3600 * Sending data with options on a connected stream socket. 3601 * Assumes caller has verified that SS_ISCONNECTED is set. 3602 */ 3603 static int 3604 sosend_svccmsg(struct sonode *so, 3605 struct uio *uiop, 3606 int more, 3607 void *control, 3608 t_uscalar_t controllen, 3609 int flags) 3610 { 3611 struct T_optdata_req tdr; 3612 mblk_t *mp; 3613 int error; 3614 ssize_t iosize; 3615 int first = 1; 3616 int size; 3617 struct fdbuf *fdbuf; 3618 t_uscalar_t optlen; 3619 void *fds; 3620 int fdlen; 3621 struct T_opthdr toh; 3622 3623 dprintso(so, 1, 3624 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3625 3626 /* 3627 * Has to be bound and connected. However, since no locks are 3628 * held the state could have changed after sotpi_sendmsg checked it 3629 * thus it is not possible to ASSERT on the state. 3630 */ 3631 3632 /* Options on connection-oriented only when SM_OPTDATA set. */ 3633 if (!(so->so_mode & SM_OPTDATA)) 3634 return (EOPNOTSUPP); 3635 3636 do { 3637 /* 3638 * Set the MORE flag if uio_resid does not fit in this 3639 * message or if the caller passed in "more". 3640 * Error for transports with zero tidu_size. 3641 */ 3642 tdr.PRIM_type = T_OPTDATA_REQ; 3643 iosize = so->so_tidu_size; 3644 if (iosize <= 0) 3645 return (EMSGSIZE); 3646 if (uiop->uio_resid > iosize) { 3647 tdr.DATA_flag = 1; 3648 } else { 3649 if (more) 3650 tdr.DATA_flag = 1; 3651 else 3652 tdr.DATA_flag = 0; 3653 iosize = uiop->uio_resid; 3654 } 3655 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3656 tdr.DATA_flag, iosize)); 3657 3658 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3659 tdr.OPT_length = optlen; 3660 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3661 3662 size = (int)sizeof (tdr) + optlen; 3663 /* 3664 * File descriptors only when SM_FDPASSING set. 3665 */ 3666 error = so_getfdopt(control, controllen, 3667 !(flags & MSG_XPG4_2), &fds, &fdlen); 3668 if (error) 3669 return (error); 3670 if (fdlen != -1) { 3671 if (!(so->so_mode & SM_FDPASSING)) 3672 return (EOPNOTSUPP); 3673 3674 error = fdbuf_create(fds, fdlen, &fdbuf); 3675 if (error) 3676 return (error); 3677 mp = fdbuf_allocmsg(size, fdbuf); 3678 } else { 3679 mp = soallocproto(size, _ALLOC_INTR); 3680 if (mp == NULL) { 3681 /* 3682 * Caught a signal waiting for memory. 3683 * Let send* return EINTR. 3684 */ 3685 return (first ? EINTR : 0); 3686 } 3687 } 3688 soappendmsg(mp, &tdr, sizeof (tdr)); 3689 3690 if (fdlen != -1) { 3691 ASSERT(fdbuf != NULL); 3692 toh.level = SOL_SOCKET; 3693 toh.name = SO_FILEP; 3694 toh.len = fdbuf->fd_size + 3695 (t_uscalar_t)sizeof (struct T_opthdr); 3696 toh.status = 0; 3697 soappendmsg(mp, &toh, sizeof (toh)); 3698 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3699 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3700 } 3701 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3702 /* At most 3 bytes left in the message */ 3703 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3704 ASSERT(MBLKL(mp) <= (ssize_t)size); 3705 3706 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3707 3708 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3709 0, MSG_BAND, 0); 3710 if (error) { 3711 if (!first && error == EWOULDBLOCK) 3712 return (0); 3713 eprintsoline(so, error); 3714 return (error); 3715 } 3716 control = NULL; 3717 first = 0; 3718 if (uiop->uio_resid > 0) { 3719 /* 3720 * Recheck for fatal errors. Fail write even though 3721 * some data have been written. This is consistent 3722 * with strwrite semantics and BSD sockets semantics. 3723 */ 3724 if (so->so_state & SS_CANTSENDMORE) { 3725 tsignal(curthread, SIGPIPE); 3726 eprintsoline(so, error); 3727 return (EPIPE); 3728 } 3729 if (so->so_error != 0) { 3730 mutex_enter(&so->so_lock); 3731 error = sogeterr(so); 3732 mutex_exit(&so->so_lock); 3733 if (error != 0) { 3734 eprintsoline(so, error); 3735 return (error); 3736 } 3737 } 3738 } 3739 } while (uiop->uio_resid > 0); 3740 return (0); 3741 } 3742 3743 /* 3744 * Sending data on a datagram socket. 3745 * Assumes caller has verified that SS_ISBOUND etc. are set. 3746 * 3747 * For AF_UNIX the destination address is translated to an internal 3748 * name and the source address is passed as an option. 3749 */ 3750 int 3751 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3752 struct uio *uiop, int flags) 3753 { 3754 struct T_unitdata_req tudr; 3755 mblk_t *mp; 3756 int error; 3757 void *addr; 3758 socklen_t addrlen; 3759 void *src; 3760 socklen_t srclen; 3761 ssize_t len; 3762 3763 ASSERT(name != NULL && namelen != 0); 3764 3765 len = uiop->uio_resid; 3766 if (len > so->so_tidu_size) { 3767 error = EMSGSIZE; 3768 goto done; 3769 } 3770 3771 /* Length and family checks */ 3772 error = so_addr_verify(so, name, namelen); 3773 if (error != 0) 3774 goto done; 3775 3776 if (so->so_state & SS_DIRECT) 3777 return (sodgram_direct(so, name, namelen, uiop, flags)); 3778 3779 if (so->so_family == AF_UNIX) { 3780 if (so->so_state & SS_FADDR_NOXLATE) { 3781 /* 3782 * Already have a transport internal address. Do not 3783 * pass any (transport internal) source address. 3784 */ 3785 addr = name; 3786 addrlen = namelen; 3787 src = NULL; 3788 srclen = 0; 3789 } else { 3790 /* 3791 * Pass the sockaddr_un source address as an option 3792 * and translate the remote address. 3793 * 3794 * Note that this code does not prevent so_laddr_sa 3795 * from changing while it is being used. Thus 3796 * if an unbind+bind occurs concurrently with this 3797 * send the peer might see a partially new and a 3798 * partially old "from" address. 3799 */ 3800 src = so->so_laddr_sa; 3801 srclen = (socklen_t)so->so_laddr_len; 3802 dprintso(so, 1, 3803 ("sosend_dgram UNIX: srclen %d, src %p\n", 3804 srclen, src)); 3805 error = so_ux_addr_xlate(so, name, namelen, 3806 (flags & MSG_XPG4_2), 3807 &addr, &addrlen); 3808 if (error) { 3809 eprintsoline(so, error); 3810 goto done; 3811 } 3812 } 3813 } else { 3814 addr = name; 3815 addrlen = namelen; 3816 src = NULL; 3817 srclen = 0; 3818 } 3819 tudr.PRIM_type = T_UNITDATA_REQ; 3820 tudr.DEST_length = addrlen; 3821 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3822 if (srclen == 0) { 3823 tudr.OPT_length = 0; 3824 tudr.OPT_offset = 0; 3825 3826 mp = soallocproto2(&tudr, sizeof (tudr), 3827 addr, addrlen, 0, _ALLOC_INTR); 3828 if (mp == NULL) { 3829 /* 3830 * Caught a signal waiting for memory. 3831 * Let send* return EINTR. 3832 */ 3833 error = EINTR; 3834 goto done; 3835 } 3836 } else { 3837 /* 3838 * There is a AF_UNIX sockaddr_un to include as a source 3839 * address option. 3840 */ 3841 struct T_opthdr toh; 3842 ssize_t size; 3843 3844 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3845 _TPI_ALIGN_TOPT(srclen)); 3846 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3847 _TPI_ALIGN_TOPT(addrlen)); 3848 3849 toh.level = SOL_SOCKET; 3850 toh.name = SO_SRCADDR; 3851 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3852 toh.status = 0; 3853 3854 size = tudr.OPT_offset + tudr.OPT_length; 3855 mp = soallocproto2(&tudr, sizeof (tudr), 3856 addr, addrlen, size, _ALLOC_INTR); 3857 if (mp == NULL) { 3858 /* 3859 * Caught a signal waiting for memory. 3860 * Let send* return EINTR. 3861 */ 3862 error = EINTR; 3863 goto done; 3864 } 3865 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3866 soappendmsg(mp, &toh, sizeof (toh)); 3867 soappendmsg(mp, src, srclen); 3868 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3869 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3870 } 3871 3872 #ifdef C2_AUDIT 3873 if (audit_active) 3874 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3875 #endif /* C2_AUDIT */ 3876 3877 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3878 done: 3879 #ifdef SOCK_DEBUG 3880 if (error) { 3881 eprintsoline(so, error); 3882 } 3883 #endif /* SOCK_DEBUG */ 3884 return (error); 3885 } 3886 3887 /* 3888 * Sending data on a connected stream socket. 3889 * Assumes caller has verified that SS_ISCONNECTED is set. 3890 */ 3891 int 3892 sosend_svc(struct sonode *so, 3893 struct uio *uiop, 3894 t_scalar_t prim, 3895 int more, 3896 int sflag) 3897 { 3898 struct T_data_req tdr; 3899 mblk_t *mp; 3900 int error; 3901 ssize_t iosize; 3902 int first = 1; 3903 3904 dprintso(so, 1, 3905 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 3906 so, uiop->uio_resid, prim, sflag)); 3907 3908 /* 3909 * Has to be bound and connected. However, since no locks are 3910 * held the state could have changed after sotpi_sendmsg checked it 3911 * thus it is not possible to ASSERT on the state. 3912 */ 3913 3914 do { 3915 /* 3916 * Set the MORE flag if uio_resid does not fit in this 3917 * message or if the caller passed in "more". 3918 * Error for transports with zero tidu_size. 3919 */ 3920 tdr.PRIM_type = prim; 3921 iosize = so->so_tidu_size; 3922 if (iosize <= 0) 3923 return (EMSGSIZE); 3924 if (uiop->uio_resid > iosize) { 3925 tdr.MORE_flag = 1; 3926 } else { 3927 if (more) 3928 tdr.MORE_flag = 1; 3929 else 3930 tdr.MORE_flag = 0; 3931 iosize = uiop->uio_resid; 3932 } 3933 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 3934 prim, tdr.MORE_flag, iosize)); 3935 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 3936 if (mp == NULL) { 3937 /* 3938 * Caught a signal waiting for memory. 3939 * Let send* return EINTR. 3940 */ 3941 if (first) 3942 return (EINTR); 3943 else 3944 return (0); 3945 } 3946 3947 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3948 0, sflag | MSG_BAND, 0); 3949 if (error) { 3950 if (!first && error == EWOULDBLOCK) 3951 return (0); 3952 eprintsoline(so, error); 3953 return (error); 3954 } 3955 first = 0; 3956 if (uiop->uio_resid > 0) { 3957 /* 3958 * Recheck for fatal errors. Fail write even though 3959 * some data have been written. This is consistent 3960 * with strwrite semantics and BSD sockets semantics. 3961 */ 3962 if (so->so_state & SS_CANTSENDMORE) { 3963 tsignal(curthread, SIGPIPE); 3964 eprintsoline(so, error); 3965 return (EPIPE); 3966 } 3967 if (so->so_error != 0) { 3968 mutex_enter(&so->so_lock); 3969 error = sogeterr(so); 3970 mutex_exit(&so->so_lock); 3971 if (error != 0) { 3972 eprintsoline(so, error); 3973 return (error); 3974 } 3975 } 3976 } 3977 } while (uiop->uio_resid > 0); 3978 return (0); 3979 } 3980 3981 /* 3982 * Check the state for errors and call the appropriate send function. 3983 * 3984 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 3985 * this function issues a setsockopt to toggle SO_DONTROUTE before and 3986 * after sending the message. 3987 */ 3988 static int 3989 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 3990 { 3991 int so_state; 3992 int so_mode; 3993 int error; 3994 struct sockaddr *name; 3995 t_uscalar_t namelen; 3996 int dontroute; 3997 int flags; 3998 3999 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4000 so, msg, msg->msg_flags, 4001 pr_state(so->so_state, so->so_mode), so->so_error)); 4002 4003 mutex_enter(&so->so_lock); 4004 so_state = so->so_state; 4005 4006 if (so_state & SS_CANTSENDMORE) { 4007 mutex_exit(&so->so_lock); 4008 tsignal(curthread, SIGPIPE); 4009 return (EPIPE); 4010 } 4011 4012 if (so->so_error != 0) { 4013 error = sogeterr(so); 4014 if (error != 0) { 4015 mutex_exit(&so->so_lock); 4016 return (error); 4017 } 4018 } 4019 4020 name = (struct sockaddr *)msg->msg_name; 4021 namelen = msg->msg_namelen; 4022 4023 so_mode = so->so_mode; 4024 4025 if (name == NULL) { 4026 if (!(so_state & SS_ISCONNECTED)) { 4027 mutex_exit(&so->so_lock); 4028 if (so_mode & SM_CONNREQUIRED) 4029 return (ENOTCONN); 4030 else 4031 return (EDESTADDRREQ); 4032 } 4033 if (so_mode & SM_CONNREQUIRED) { 4034 name = NULL; 4035 namelen = 0; 4036 } else { 4037 /* 4038 * Note that this code does not prevent so_faddr_sa 4039 * from changing while it is being used. Thus 4040 * if an "unconnect"+connect occurs concurrently with 4041 * this send the datagram might be delivered to a 4042 * garbaled address. 4043 */ 4044 ASSERT(so->so_faddr_sa); 4045 name = so->so_faddr_sa; 4046 namelen = (t_uscalar_t)so->so_faddr_len; 4047 } 4048 } else { 4049 if (!(so_state & SS_ISCONNECTED) && 4050 (so_mode & SM_CONNREQUIRED)) { 4051 /* Required but not connected */ 4052 mutex_exit(&so->so_lock); 4053 return (ENOTCONN); 4054 } 4055 /* 4056 * Ignore the address on connection-oriented sockets. 4057 * Just like BSD this code does not generate an error for 4058 * TCP (a CONNREQUIRED socket) when sending to an address 4059 * passed in with sendto/sendmsg. Instead the data is 4060 * delivered on the connection as if no address had been 4061 * supplied. 4062 */ 4063 if ((so_state & SS_ISCONNECTED) && 4064 !(so_mode & SM_CONNREQUIRED)) { 4065 mutex_exit(&so->so_lock); 4066 return (EISCONN); 4067 } 4068 if (!(so_state & SS_ISBOUND)) { 4069 so_lock_single(so); /* Set SOLOCKED */ 4070 error = sotpi_bind(so, NULL, 0, 4071 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 4072 so_unlock_single(so, SOLOCKED); 4073 if (error) { 4074 mutex_exit(&so->so_lock); 4075 eprintsoline(so, error); 4076 return (error); 4077 } 4078 } 4079 /* 4080 * Handle delayed datagram errors. These are only queued 4081 * when the application sets SO_DGRAM_ERRIND. 4082 * Return the error if we are sending to the address 4083 * that was returned in the last T_UDERROR_IND. 4084 * If sending to some other address discard the delayed 4085 * error indication. 4086 */ 4087 if (so->so_delayed_error) { 4088 struct T_uderror_ind *tudi; 4089 void *addr; 4090 t_uscalar_t addrlen; 4091 boolean_t match = B_FALSE; 4092 4093 ASSERT(so->so_eaddr_mp); 4094 error = so->so_delayed_error; 4095 so->so_delayed_error = 0; 4096 tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; 4097 addrlen = tudi->DEST_length; 4098 addr = sogetoff(so->so_eaddr_mp, 4099 tudi->DEST_offset, 4100 addrlen, 1); 4101 ASSERT(addr); /* Checked by strsock_proto */ 4102 switch (so->so_family) { 4103 case AF_INET: { 4104 /* Compare just IP address and port */ 4105 sin_t *sin1 = (sin_t *)name; 4106 sin_t *sin2 = (sin_t *)addr; 4107 4108 if (addrlen == sizeof (sin_t) && 4109 namelen == addrlen && 4110 sin1->sin_port == sin2->sin_port && 4111 sin1->sin_addr.s_addr == 4112 sin2->sin_addr.s_addr) 4113 match = B_TRUE; 4114 break; 4115 } 4116 case AF_INET6: { 4117 /* Compare just IP address and port. Not flow */ 4118 sin6_t *sin1 = (sin6_t *)name; 4119 sin6_t *sin2 = (sin6_t *)addr; 4120 4121 if (addrlen == sizeof (sin6_t) && 4122 namelen == addrlen && 4123 sin1->sin6_port == sin2->sin6_port && 4124 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4125 &sin2->sin6_addr)) 4126 match = B_TRUE; 4127 break; 4128 } 4129 case AF_UNIX: 4130 default: 4131 if (namelen == addrlen && 4132 bcmp(name, addr, namelen) == 0) 4133 match = B_TRUE; 4134 } 4135 if (match) { 4136 freemsg(so->so_eaddr_mp); 4137 so->so_eaddr_mp = NULL; 4138 mutex_exit(&so->so_lock); 4139 #ifdef DEBUG 4140 dprintso(so, 0, 4141 ("sockfs delayed error %d for %s\n", 4142 error, 4143 pr_addr(so->so_family, name, namelen))); 4144 #endif /* DEBUG */ 4145 return (error); 4146 } 4147 freemsg(so->so_eaddr_mp); 4148 so->so_eaddr_mp = NULL; 4149 } 4150 } 4151 mutex_exit(&so->so_lock); 4152 4153 flags = msg->msg_flags; 4154 dontroute = 0; 4155 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4156 uint32_t val; 4157 4158 val = 1; 4159 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4160 &val, (t_uscalar_t)sizeof (val)); 4161 if (error) 4162 return (error); 4163 dontroute = 1; 4164 } 4165 4166 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4167 error = EOPNOTSUPP; 4168 goto done; 4169 } 4170 if (msg->msg_controllen != 0) { 4171 if (!(so_mode & SM_CONNREQUIRED)) { 4172 error = sosend_dgramcmsg(so, name, namelen, uiop, 4173 msg->msg_control, msg->msg_controllen, flags); 4174 } else { 4175 if (flags & MSG_OOB) { 4176 /* Can't generate T_EXDATA_REQ with options */ 4177 error = EOPNOTSUPP; 4178 goto done; 4179 } 4180 error = sosend_svccmsg(so, uiop, 4181 !(flags & MSG_EOR), 4182 msg->msg_control, msg->msg_controllen, 4183 flags); 4184 } 4185 goto done; 4186 } 4187 4188 if (!(so_mode & SM_CONNREQUIRED)) { 4189 /* 4190 * If there is no SO_DONTROUTE to turn off return immediately 4191 * from send_dgram. This can allow tail-call optimizations. 4192 */ 4193 if (!dontroute) { 4194 return (sosend_dgram(so, name, namelen, uiop, flags)); 4195 } 4196 error = sosend_dgram(so, name, namelen, uiop, flags); 4197 } else { 4198 t_scalar_t prim; 4199 int sflag; 4200 4201 /* Ignore msg_name in the connected state */ 4202 if (flags & MSG_OOB) { 4203 prim = T_EXDATA_REQ; 4204 /* 4205 * Send down T_EXDATA_REQ even if there is flow 4206 * control for data. 4207 */ 4208 sflag = MSG_IGNFLOW; 4209 } else { 4210 if (so_mode & SM_BYTESTREAM) { 4211 /* Byte stream transport - use write */ 4212 4213 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4214 /* 4215 * If there is no SO_DONTROUTE to turn off, 4216 * SS_DIRECT is on, and there is no flow 4217 * control, we can take the fast path. 4218 */ 4219 if (!dontroute && 4220 (so_state & SS_DIRECT) && 4221 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4222 return (sostream_direct(so, uiop, 4223 NULL, CRED())); 4224 } 4225 error = strwrite(SOTOV(so), uiop, CRED()); 4226 goto done; 4227 } 4228 prim = T_DATA_REQ; 4229 sflag = 0; 4230 } 4231 /* 4232 * If there is no SO_DONTROUTE to turn off return immediately 4233 * from sosend_svc. This can allow tail-call optimizations. 4234 */ 4235 if (!dontroute) 4236 return (sosend_svc(so, uiop, prim, 4237 !(flags & MSG_EOR), sflag)); 4238 error = sosend_svc(so, uiop, prim, 4239 !(flags & MSG_EOR), sflag); 4240 } 4241 ASSERT(dontroute); 4242 done: 4243 if (dontroute) { 4244 uint32_t val; 4245 4246 val = 0; 4247 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4248 &val, (t_uscalar_t)sizeof (val)); 4249 } 4250 return (error); 4251 } 4252 4253 /* 4254 * Sending data on a datagram socket. 4255 * Assumes caller has verified that SS_ISBOUND etc. are set. 4256 */ 4257 /* ARGSUSED */ 4258 static int 4259 sodgram_direct(struct sonode *so, struct sockaddr *name, 4260 socklen_t namelen, struct uio *uiop, int flags) 4261 { 4262 struct T_unitdata_req tudr; 4263 mblk_t *mp; 4264 int error = 0; 4265 void *addr; 4266 socklen_t addrlen; 4267 ssize_t len; 4268 struct stdata *stp = SOTOV(so)->v_stream; 4269 int so_state; 4270 queue_t *udp_wq; 4271 4272 ASSERT(name != NULL && namelen != 0); 4273 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4274 ASSERT(!(so->so_mode & SM_EXDATA)); 4275 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4276 ASSERT(SOTOV(so)->v_type == VSOCK); 4277 4278 /* Caller checked for proper length */ 4279 len = uiop->uio_resid; 4280 ASSERT(len <= so->so_tidu_size); 4281 4282 /* Length and family checks have been done by caller */ 4283 ASSERT(name->sa_family == so->so_family); 4284 ASSERT(so->so_family == AF_INET || 4285 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4286 ASSERT(so->so_family == AF_INET6 || 4287 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4288 4289 addr = name; 4290 addrlen = namelen; 4291 4292 if (stp->sd_sidp != NULL && 4293 (error = straccess(stp, JCWRITE)) != 0) 4294 goto done; 4295 4296 so_state = so->so_state; 4297 4298 /* 4299 * For UDP we don't break up the copyin into smaller pieces 4300 * as in the TCP case. That means if ENOMEM is returned by 4301 * mcopyinuio() then the uio vector has not been modified at 4302 * all and we fallback to either strwrite() or kstrputmsg() 4303 * below. Note also that we never generate priority messages 4304 * from here. 4305 */ 4306 udp_wq = stp->sd_wrq->q_next; 4307 if (canput(udp_wq) && 4308 (mp = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4309 ASSERT(DB_TYPE(mp) == M_DATA); 4310 ASSERT(uiop->uio_resid == 0); 4311 #ifdef C2_AUDIT 4312 if (audit_active) 4313 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4314 #endif /* C2_AUDIT */ 4315 udp_wput_data(udp_wq, mp, addr, addrlen); 4316 return (0); 4317 } 4318 if (error != 0 && error != ENOMEM) 4319 return (error); 4320 4321 /* 4322 * For connected, let strwrite() handle the blocking case. 4323 * Otherwise we fall thru and use kstrputmsg(). 4324 */ 4325 if (so_state & SS_ISCONNECTED) 4326 return (strwrite(SOTOV(so), uiop, CRED())); 4327 4328 tudr.PRIM_type = T_UNITDATA_REQ; 4329 tudr.DEST_length = addrlen; 4330 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4331 tudr.OPT_length = 0; 4332 tudr.OPT_offset = 0; 4333 4334 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, _ALLOC_INTR); 4335 if (mp == NULL) { 4336 /* 4337 * Caught a signal waiting for memory. 4338 * Let send* return EINTR. 4339 */ 4340 error = EINTR; 4341 goto done; 4342 } 4343 4344 #ifdef C2_AUDIT 4345 if (audit_active) 4346 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4347 #endif /* C2_AUDIT */ 4348 4349 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4350 done: 4351 #ifdef SOCK_DEBUG 4352 if (error != 0) { 4353 eprintsoline(so, error); 4354 } 4355 #endif /* SOCK_DEBUG */ 4356 return (error); 4357 } 4358 4359 int 4360 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4361 { 4362 struct stdata *stp = SOTOV(so)->v_stream; 4363 ssize_t iosize, rmax, maxblk; 4364 queue_t *tcp_wq = stp->sd_wrq->q_next; 4365 mblk_t *newmp; 4366 int error = 0, wflag = 0; 4367 4368 ASSERT(so->so_mode & SM_BYTESTREAM); 4369 ASSERT(SOTOV(so)->v_type == VSOCK); 4370 4371 if (stp->sd_sidp != NULL && 4372 (error = straccess(stp, JCWRITE)) != 0) 4373 return (error); 4374 4375 if (uiop == NULL) { 4376 /* 4377 * kstrwritemp() should have checked sd_flag and 4378 * flow-control before coming here. If we end up 4379 * here it means that we can simply pass down the 4380 * data to tcp. 4381 */ 4382 ASSERT(mp != NULL); 4383 if (stp->sd_wputdatafunc != NULL) { 4384 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4385 NULL, NULL, NULL); 4386 if (newmp == NULL) { 4387 /* The caller will free mp */ 4388 return (ECOMM); 4389 } 4390 mp = newmp; 4391 } 4392 tcp_wput(tcp_wq, mp); 4393 return (0); 4394 } 4395 4396 /* Fallback to strwrite() to do proper error handling */ 4397 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4398 return (strwrite(SOTOV(so), uiop, cr)); 4399 4400 rmax = stp->sd_qn_maxpsz; 4401 ASSERT(rmax >= 0 || rmax == INFPSZ); 4402 if (rmax == 0 || uiop->uio_resid <= 0) 4403 return (0); 4404 4405 if (rmax == INFPSZ) 4406 rmax = uiop->uio_resid; 4407 4408 maxblk = stp->sd_maxblk; 4409 4410 for (;;) { 4411 iosize = MIN(uiop->uio_resid, rmax); 4412 4413 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4414 if (mp == NULL) { 4415 /* 4416 * Fallback to strwrite() for ENOMEM; if this 4417 * is our first time in this routine and the uio 4418 * vector has not been modified, we will end up 4419 * calling strwrite() without any flag set. 4420 */ 4421 if (error == ENOMEM) 4422 goto slow_send; 4423 else 4424 return (error); 4425 } 4426 ASSERT(uiop->uio_resid >= 0); 4427 /* 4428 * If mp is non-NULL and ENOMEM is set, it means that 4429 * mcopyinuio() was able to break down some of the user 4430 * data into one or more mblks. Send the partial data 4431 * to tcp and let the rest be handled in strwrite(). 4432 */ 4433 ASSERT(error == 0 || error == ENOMEM); 4434 if (stp->sd_wputdatafunc != NULL) { 4435 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4436 NULL, NULL, NULL); 4437 if (newmp == NULL) { 4438 /* The caller will free mp */ 4439 return (ECOMM); 4440 } 4441 mp = newmp; 4442 } 4443 tcp_wput(tcp_wq, mp); 4444 4445 wflag |= NOINTR; 4446 4447 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4448 ASSERT(error == 0); 4449 break; 4450 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4451 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4452 slow_send: 4453 /* 4454 * We were able to send down partial data using 4455 * the direct call interface, but are now relying 4456 * on strwrite() to handle the non-fastpath cases. 4457 * If the socket is blocking we will sleep in 4458 * strwaitq() until write is permitted, otherwise, 4459 * we will need to return the amount of bytes 4460 * written so far back to the app. This is the 4461 * reason why we pass NOINTR flag to strwrite() 4462 * for non-blocking socket, because we don't want 4463 * to return EAGAIN when portion of the user data 4464 * has actually been sent down. 4465 */ 4466 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4467 } 4468 } 4469 return (0); 4470 } 4471 4472 /* 4473 * Update so_faddr by asking the transport (unless AF_UNIX). 4474 */ 4475 int 4476 sotpi_getpeername(struct sonode *so) 4477 { 4478 struct strbuf strbuf; 4479 int error = 0, res; 4480 void *addr; 4481 t_uscalar_t addrlen; 4482 k_sigset_t smask; 4483 4484 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4485 so, pr_state(so->so_state, so->so_mode))); 4486 4487 mutex_enter(&so->so_lock); 4488 so_lock_single(so); /* Set SOLOCKED */ 4489 if (!(so->so_state & SS_ISCONNECTED)) { 4490 error = ENOTCONN; 4491 goto done; 4492 } 4493 /* Added this check for X/Open */ 4494 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4495 error = EINVAL; 4496 if (xnet_check_print) { 4497 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4498 } 4499 goto done; 4500 } 4501 #ifdef DEBUG 4502 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4503 pr_addr(so->so_family, so->so_faddr_sa, 4504 (t_uscalar_t)so->so_faddr_len))); 4505 #endif /* DEBUG */ 4506 4507 if (so->so_family == AF_UNIX || so->so_family == AF_NCA) { 4508 /* Transport has different name space - return local info */ 4509 error = 0; 4510 goto done; 4511 } 4512 4513 ASSERT(so->so_faddr_sa); 4514 /* Allocate local buffer to use with ioctl */ 4515 addrlen = (t_uscalar_t)so->so_faddr_maxlen; 4516 mutex_exit(&so->so_lock); 4517 addr = kmem_alloc(addrlen, KM_SLEEP); 4518 4519 /* 4520 * Issue TI_GETPEERNAME with signals masked. 4521 * Put the result in so_faddr_sa so that getpeername works after 4522 * a shutdown(output). 4523 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4524 * back to the socket. 4525 */ 4526 strbuf.buf = addr; 4527 strbuf.maxlen = addrlen; 4528 strbuf.len = 0; 4529 4530 sigintr(&smask, 0); 4531 res = 0; 4532 ASSERT(CRED()); 4533 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4534 0, K_TO_K, CRED(), &res); 4535 sigunintr(&smask); 4536 4537 mutex_enter(&so->so_lock); 4538 /* 4539 * If there is an error record the error in so_error put don't fail 4540 * the getpeername. Instead fallback on the recorded 4541 * so->so_faddr_sa. 4542 */ 4543 if (error) { 4544 /* 4545 * Various stream head errors can be returned to the ioctl. 4546 * However, it is impossible to determine which ones of 4547 * these are really socket level errors that were incorrectly 4548 * consumed by the ioctl. Thus this code silently ignores the 4549 * error - to code explicitly does not reinstate the error 4550 * using soseterror(). 4551 * Experiments have shows that at least this set of 4552 * errors are reported and should not be reinstated on the 4553 * socket: 4554 * EINVAL E.g. if an I_LINK was in effect when 4555 * getpeername was called. 4556 * EPIPE The ioctl error semantics prefer the write 4557 * side error over the read side error. 4558 * ENOTCONN The transport just got disconnected but 4559 * sockfs had not yet seen the T_DISCON_IND 4560 * when issuing the ioctl. 4561 */ 4562 error = 0; 4563 } else if (res == 0 && strbuf.len > 0 && 4564 (so->so_state & SS_ISCONNECTED)) { 4565 ASSERT(strbuf.len <= (int)so->so_faddr_maxlen); 4566 so->so_faddr_len = (socklen_t)strbuf.len; 4567 bcopy(addr, so->so_faddr_sa, so->so_faddr_len); 4568 so->so_state |= SS_FADDR_VALID; 4569 } 4570 kmem_free(addr, addrlen); 4571 #ifdef DEBUG 4572 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4573 pr_addr(so->so_family, so->so_faddr_sa, 4574 (t_uscalar_t)so->so_faddr_len))); 4575 #endif /* DEBUG */ 4576 done: 4577 so_unlock_single(so, SOLOCKED); 4578 mutex_exit(&so->so_lock); 4579 return (error); 4580 } 4581 4582 /* 4583 * Update so_laddr by asking the transport (unless AF_UNIX). 4584 */ 4585 int 4586 sotpi_getsockname(struct sonode *so) 4587 { 4588 struct strbuf strbuf; 4589 int error = 0, res; 4590 void *addr; 4591 t_uscalar_t addrlen; 4592 k_sigset_t smask; 4593 4594 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4595 so, pr_state(so->so_state, so->so_mode))); 4596 4597 mutex_enter(&so->so_lock); 4598 so_lock_single(so); /* Set SOLOCKED */ 4599 if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) { 4600 /* Return an all zero address except for the family */ 4601 if (so->so_family == AF_INET) 4602 so->so_laddr_len = (socklen_t)sizeof (sin_t); 4603 else if (so->so_family == AF_INET6) 4604 so->so_laddr_len = (socklen_t)sizeof (sin6_t); 4605 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 4606 bzero(so->so_laddr_sa, so->so_laddr_len); 4607 /* 4608 * Can not assume there is a sa_family for all 4609 * protocol families. 4610 */ 4611 if (so->so_family == AF_INET || so->so_family == AF_INET6) 4612 so->so_laddr_sa->sa_family = so->so_family; 4613 } 4614 #ifdef DEBUG 4615 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4616 pr_addr(so->so_family, so->so_laddr_sa, 4617 (t_uscalar_t)so->so_laddr_len))); 4618 #endif /* DEBUG */ 4619 if (so->so_family == AF_UNIX) { 4620 /* Transport has different name space - return local info */ 4621 error = 0; 4622 goto done; 4623 } 4624 /* Allocate local buffer to use with ioctl */ 4625 addrlen = (t_uscalar_t)so->so_laddr_maxlen; 4626 mutex_exit(&so->so_lock); 4627 addr = kmem_alloc(addrlen, KM_SLEEP); 4628 4629 /* 4630 * Issue TI_GETMYNAME with signals masked. 4631 * Put the result in so_laddr_sa so that getsockname works after 4632 * a shutdown(output). 4633 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4634 * back to the socket. 4635 */ 4636 strbuf.buf = addr; 4637 strbuf.maxlen = addrlen; 4638 strbuf.len = 0; 4639 4640 sigintr(&smask, 0); 4641 res = 0; 4642 ASSERT(CRED()); 4643 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4644 0, K_TO_K, CRED(), &res); 4645 sigunintr(&smask); 4646 4647 mutex_enter(&so->so_lock); 4648 /* 4649 * If there is an error record the error in so_error put don't fail 4650 * the getsockname. Instead fallback on the recorded 4651 * so->so_laddr_sa. 4652 */ 4653 if (error) { 4654 /* 4655 * Various stream head errors can be returned to the ioctl. 4656 * However, it is impossible to determine which ones of 4657 * these are really socket level errors that were incorrectly 4658 * consumed by the ioctl. Thus this code silently ignores the 4659 * error - to code explicitly does not reinstate the error 4660 * using soseterror(). 4661 * Experiments have shows that at least this set of 4662 * errors are reported and should not be reinstated on the 4663 * socket: 4664 * EINVAL E.g. if an I_LINK was in effect when 4665 * getsockname was called. 4666 * EPIPE The ioctl error semantics prefer the write 4667 * side error over the read side error. 4668 */ 4669 error = 0; 4670 } else if (res == 0 && strbuf.len > 0 && 4671 (so->so_state & SS_ISBOUND)) { 4672 ASSERT(strbuf.len <= (int)so->so_laddr_maxlen); 4673 so->so_laddr_len = (socklen_t)strbuf.len; 4674 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 4675 so->so_state |= SS_LADDR_VALID; 4676 } 4677 kmem_free(addr, addrlen); 4678 #ifdef DEBUG 4679 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4680 pr_addr(so->so_family, so->so_laddr_sa, 4681 (t_uscalar_t)so->so_laddr_len))); 4682 #endif /* DEBUG */ 4683 done: 4684 so_unlock_single(so, SOLOCKED); 4685 mutex_exit(&so->so_lock); 4686 return (error); 4687 } 4688 4689 /* 4690 * Get socket options. For SOL_SOCKET options some options are handled 4691 * by the sockfs while others use the value recorded in the sonode as a 4692 * fallback should the T_SVR4_OPTMGMT_REQ fail. 4693 * 4694 * On the return most *optlenp bytes are copied to optval. 4695 */ 4696 int 4697 sotpi_getsockopt(struct sonode *so, int level, int option_name, 4698 void *optval, socklen_t *optlenp, int flags) 4699 { 4700 struct T_optmgmt_req optmgmt_req; 4701 struct T_optmgmt_ack *optmgmt_ack; 4702 struct opthdr oh; 4703 struct opthdr *opt_res; 4704 mblk_t *mp = NULL; 4705 int error = 0; 4706 void *option = NULL; /* Set if fallback value */ 4707 t_uscalar_t maxlen = *optlenp; 4708 t_uscalar_t len; 4709 uint32_t value; 4710 4711 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 4712 so, level, option_name, optval, optlenp, 4713 pr_state(so->so_state, so->so_mode))); 4714 4715 mutex_enter(&so->so_lock); 4716 so_lock_single(so); /* Set SOLOCKED */ 4717 4718 /* 4719 * Check for SOL_SOCKET options. 4720 * Certain SOL_SOCKET options are returned directly whereas 4721 * others only provide a default (fallback) value should 4722 * the T_SVR4_OPTMGMT_REQ fail. 4723 */ 4724 if (level == SOL_SOCKET) { 4725 /* Check parameters */ 4726 switch (option_name) { 4727 case SO_TYPE: 4728 case SO_ERROR: 4729 case SO_DEBUG: 4730 case SO_ACCEPTCONN: 4731 case SO_REUSEADDR: 4732 case SO_KEEPALIVE: 4733 case SO_DONTROUTE: 4734 case SO_BROADCAST: 4735 case SO_USELOOPBACK: 4736 case SO_OOBINLINE: 4737 case SO_SNDBUF: 4738 case SO_RCVBUF: 4739 #ifdef notyet 4740 case SO_SNDLOWAT: 4741 case SO_RCVLOWAT: 4742 case SO_SNDTIMEO: 4743 case SO_RCVTIMEO: 4744 #endif /* notyet */ 4745 case SO_DGRAM_ERRIND: 4746 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 4747 error = EINVAL; 4748 eprintsoline(so, error); 4749 goto done2; 4750 } 4751 break; 4752 case SO_LINGER: 4753 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 4754 error = EINVAL; 4755 eprintsoline(so, error); 4756 goto done2; 4757 } 4758 break; 4759 } 4760 4761 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 4762 4763 switch (option_name) { 4764 case SO_TYPE: 4765 value = so->so_type; 4766 option = &value; 4767 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4768 4769 case SO_ERROR: 4770 value = sogeterr(so); 4771 option = &value; 4772 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4773 4774 case SO_ACCEPTCONN: 4775 if (so->so_state & SS_ACCEPTCONN) 4776 value = SO_ACCEPTCONN; 4777 else 4778 value = 0; 4779 #ifdef DEBUG 4780 if (value) { 4781 dprintso(so, 1, 4782 ("sotpi_getsockopt: 0x%x is set\n", 4783 option_name)); 4784 } else { 4785 dprintso(so, 1, 4786 ("sotpi_getsockopt: 0x%x not set\n", 4787 option_name)); 4788 } 4789 #endif /* DEBUG */ 4790 option = &value; 4791 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4792 4793 case SO_DEBUG: 4794 case SO_REUSEADDR: 4795 case SO_KEEPALIVE: 4796 case SO_DONTROUTE: 4797 case SO_BROADCAST: 4798 case SO_USELOOPBACK: 4799 case SO_OOBINLINE: 4800 case SO_DGRAM_ERRIND: 4801 value = (so->so_options & option_name); 4802 #ifdef DEBUG 4803 if (value) { 4804 dprintso(so, 1, 4805 ("sotpi_getsockopt: 0x%x is set\n", 4806 option_name)); 4807 } else { 4808 dprintso(so, 1, 4809 ("sotpi_getsockopt: 0x%x not set\n", 4810 option_name)); 4811 } 4812 #endif /* DEBUG */ 4813 option = &value; 4814 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4815 4816 /* 4817 * The following options are only returned by sockfs when the 4818 * T_SVR4_OPTMGMT_REQ fails. 4819 */ 4820 case SO_LINGER: 4821 option = &so->so_linger; 4822 len = (t_uscalar_t)sizeof (struct linger); 4823 break; 4824 case SO_SNDBUF: { 4825 ssize_t lvalue; 4826 4827 /* 4828 * If the option has not been set then get a default 4829 * value from the read queue. This value is 4830 * returned if the transport fails 4831 * the T_SVR4_OPTMGMT_REQ. 4832 */ 4833 lvalue = so->so_sndbuf; 4834 if (lvalue == 0) { 4835 mutex_exit(&so->so_lock); 4836 (void) strqget(strvp2wq(SOTOV(so))->q_next, 4837 QHIWAT, 0, &lvalue); 4838 mutex_enter(&so->so_lock); 4839 dprintso(so, 1, 4840 ("got SO_SNDBUF %ld from q\n", lvalue)); 4841 } 4842 value = (int)lvalue; 4843 option = &value; 4844 len = (t_uscalar_t)sizeof (so->so_sndbuf); 4845 break; 4846 } 4847 case SO_RCVBUF: { 4848 ssize_t lvalue; 4849 4850 /* 4851 * If the option has not been set then get a default 4852 * value from the read queue. This value is 4853 * returned if the transport fails 4854 * the T_SVR4_OPTMGMT_REQ. 4855 * 4856 * XXX If SO_RCVBUF has been set and this is an 4857 * XPG 4.2 application then do not ask the transport 4858 * since the transport might adjust the value and not 4859 * return exactly what was set by the application. 4860 * For non-XPG 4.2 application we return the value 4861 * that the transport is actually using. 4862 */ 4863 lvalue = so->so_rcvbuf; 4864 if (lvalue == 0) { 4865 mutex_exit(&so->so_lock); 4866 (void) strqget(RD(strvp2wq(SOTOV(so))), 4867 QHIWAT, 0, &lvalue); 4868 mutex_enter(&so->so_lock); 4869 dprintso(so, 1, 4870 ("got SO_RCVBUF %ld from q\n", lvalue)); 4871 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 4872 value = (int)lvalue; 4873 option = &value; 4874 goto copyout; /* skip asking transport */ 4875 } 4876 value = (int)lvalue; 4877 option = &value; 4878 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 4879 break; 4880 } 4881 #ifdef notyet 4882 /* 4883 * We do not implement the semantics of these options 4884 * thus we shouldn't implement the options either. 4885 */ 4886 case SO_SNDLOWAT: 4887 value = so->so_sndlowat; 4888 option = &value; 4889 break; 4890 case SO_RCVLOWAT: 4891 value = so->so_rcvlowat; 4892 option = &value; 4893 break; 4894 case SO_SNDTIMEO: 4895 value = so->so_sndtimeo; 4896 option = &value; 4897 break; 4898 case SO_RCVTIMEO: 4899 value = so->so_rcvtimeo; 4900 option = &value; 4901 break; 4902 #endif /* notyet */ 4903 } 4904 } 4905 4906 if (so->so_family == AF_NCA) { 4907 goto done2; 4908 } 4909 4910 mutex_exit(&so->so_lock); 4911 4912 /* Send request */ 4913 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 4914 optmgmt_req.MGMT_flags = T_CHECK; 4915 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 4916 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 4917 4918 oh.level = level; 4919 oh.name = option_name; 4920 oh.len = maxlen; 4921 4922 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 4923 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 4924 /* Let option management work in the presence of data flow control */ 4925 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 4926 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 4927 mp = NULL; 4928 mutex_enter(&so->so_lock); 4929 if (error) { 4930 eprintsoline(so, error); 4931 goto done2; 4932 } 4933 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 4934 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 4935 if (error) { 4936 if (option != NULL) { 4937 /* We have a fallback value */ 4938 error = 0; 4939 goto copyout; 4940 } 4941 eprintsoline(so, error); 4942 goto done2; 4943 } 4944 ASSERT(mp); 4945 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 4946 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 4947 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 4948 if (opt_res == NULL) { 4949 if (option != NULL) { 4950 /* We have a fallback value */ 4951 error = 0; 4952 goto copyout; 4953 } 4954 error = EPROTO; 4955 eprintsoline(so, error); 4956 goto done; 4957 } 4958 option = &opt_res[1]; 4959 4960 /* check to ensure that the option is within bounds */ 4961 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 4962 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 4963 if (option != NULL) { 4964 /* We have a fallback value */ 4965 error = 0; 4966 goto copyout; 4967 } 4968 error = EPROTO; 4969 eprintsoline(so, error); 4970 goto done; 4971 } 4972 4973 len = opt_res->len; 4974 4975 copyout: { 4976 t_uscalar_t size = MIN(len, maxlen); 4977 bcopy(option, optval, size); 4978 bcopy(&size, optlenp, sizeof (size)); 4979 } 4980 done: 4981 freemsg(mp); 4982 done2: 4983 so_unlock_single(so, SOLOCKED); 4984 mutex_exit(&so->so_lock); 4985 return (error); 4986 } 4987 4988 /* 4989 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 4990 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 4991 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 4992 * setsockopt has to work even if the transport does not support the option. 4993 */ 4994 int 4995 sotpi_setsockopt(struct sonode *so, int level, int option_name, 4996 const void *optval, t_uscalar_t optlen) 4997 { 4998 struct T_optmgmt_req optmgmt_req; 4999 struct opthdr oh; 5000 mblk_t *mp; 5001 int error = 0; 5002 boolean_t handled = B_FALSE; 5003 5004 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5005 so, level, option_name, optval, optlen, 5006 pr_state(so->so_state, so->so_mode))); 5007 5008 5009 /* X/Open requires this check */ 5010 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5011 if (xnet_check_print) 5012 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5013 return (EINVAL); 5014 } 5015 5016 /* Caller allocates aligned optval, or passes null */ 5017 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 5018 /* If optval is null optlen is 0, and vice-versa */ 5019 ASSERT(optval != NULL || optlen == 0); 5020 ASSERT(optlen != 0 || optval == NULL); 5021 5022 mutex_enter(&so->so_lock); 5023 so_lock_single(so); /* Set SOLOCKED */ 5024 mutex_exit(&so->so_lock); 5025 5026 if (so->so_family == AF_NCA) { 5027 /* Ignore any flow control problems with the transport. */ 5028 mutex_enter(&so->so_lock); 5029 goto done; 5030 } 5031 5032 /* 5033 * For SOCKET or TCP level options, try to set it here itself 5034 * provided socket has not been popped and we know the tcp 5035 * structure (stored in so_priv). 5036 */ 5037 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5038 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5039 (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) { 5040 tcp_t *tcp = so->so_priv; 5041 boolean_t onoff; 5042 5043 #define intvalue (*(int32_t *)optval) 5044 5045 switch (level) { 5046 case SOL_SOCKET: 5047 switch (option_name) { /* Check length param */ 5048 case SO_DEBUG: 5049 case SO_REUSEADDR: 5050 case SO_DONTROUTE: 5051 case SO_BROADCAST: 5052 case SO_USELOOPBACK: 5053 case SO_OOBINLINE: 5054 case SO_DGRAM_ERRIND: 5055 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5056 error = EINVAL; 5057 eprintsoline(so, error); 5058 mutex_enter(&so->so_lock); 5059 goto done2; 5060 } 5061 ASSERT(optval); 5062 onoff = intvalue != 0; 5063 handled = B_TRUE; 5064 break; 5065 case SO_LINGER: 5066 if (optlen != 5067 (t_uscalar_t)sizeof (struct linger)) { 5068 error = EINVAL; 5069 eprintsoline(so, error); 5070 mutex_enter(&so->so_lock); 5071 goto done2; 5072 } 5073 ASSERT(optval); 5074 handled = B_TRUE; 5075 break; 5076 } 5077 5078 switch (option_name) { /* Do actions */ 5079 case SO_LINGER: { 5080 struct linger *lgr = (struct linger *)optval; 5081 5082 if (lgr->l_onoff) { 5083 tcp->tcp_linger = 1; 5084 tcp->tcp_lingertime = lgr->l_linger; 5085 so->so_linger.l_onoff = SO_LINGER; 5086 so->so_options |= SO_LINGER; 5087 } else { 5088 tcp->tcp_linger = 0; 5089 tcp->tcp_lingertime = 0; 5090 so->so_linger.l_onoff = 0; 5091 so->so_options &= ~SO_LINGER; 5092 } 5093 so->so_linger.l_linger = lgr->l_linger; 5094 handled = B_TRUE; 5095 break; 5096 } 5097 case SO_DEBUG: 5098 tcp->tcp_debug = onoff; 5099 #ifdef SOCK_TEST 5100 if (intvalue & 2) 5101 sock_test_timelimit = 10 * hz; 5102 else 5103 sock_test_timelimit = 0; 5104 5105 if (intvalue & 4) 5106 do_useracc = 0; 5107 else 5108 do_useracc = 1; 5109 #endif /* SOCK_TEST */ 5110 break; 5111 case SO_DONTROUTE: 5112 /* 5113 * SO_DONTROUTE, SO_USELOOPBACK and 5114 * SO_BROADCAST are only of interest to IP. 5115 * We track them here only so 5116 * that we can report their current value. 5117 */ 5118 tcp->tcp_dontroute = onoff; 5119 if (onoff) 5120 so->so_options |= option_name; 5121 else 5122 so->so_options &= ~option_name; 5123 break; 5124 case SO_USELOOPBACK: 5125 tcp->tcp_useloopback = onoff; 5126 if (onoff) 5127 so->so_options |= option_name; 5128 else 5129 so->so_options &= ~option_name; 5130 break; 5131 case SO_BROADCAST: 5132 tcp->tcp_broadcast = onoff; 5133 if (onoff) 5134 so->so_options |= option_name; 5135 else 5136 so->so_options &= ~option_name; 5137 break; 5138 case SO_REUSEADDR: 5139 tcp->tcp_reuseaddr = onoff; 5140 if (onoff) 5141 so->so_options |= option_name; 5142 else 5143 so->so_options &= ~option_name; 5144 break; 5145 case SO_OOBINLINE: 5146 tcp->tcp_oobinline = onoff; 5147 if (onoff) 5148 so->so_options |= option_name; 5149 else 5150 so->so_options &= ~option_name; 5151 break; 5152 case SO_DGRAM_ERRIND: 5153 tcp->tcp_dgram_errind = onoff; 5154 if (onoff) 5155 so->so_options |= option_name; 5156 else 5157 so->so_options &= ~option_name; 5158 break; 5159 } 5160 break; 5161 case IPPROTO_TCP: 5162 switch (option_name) { 5163 case TCP_NODELAY: 5164 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5165 error = EINVAL; 5166 eprintsoline(so, error); 5167 mutex_enter(&so->so_lock); 5168 goto done2; 5169 } 5170 ASSERT(optval); 5171 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5172 handled = B_TRUE; 5173 break; 5174 } 5175 break; 5176 default: 5177 handled = B_FALSE; 5178 break; 5179 } 5180 } 5181 5182 if (handled) { 5183 mutex_enter(&so->so_lock); 5184 goto done2; 5185 } 5186 5187 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5188 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5189 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5190 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5191 5192 oh.level = level; 5193 oh.name = option_name; 5194 oh.len = optlen; 5195 5196 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5197 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 5198 /* Let option management work in the presence of data flow control */ 5199 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5200 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5201 mp = NULL; 5202 mutex_enter(&so->so_lock); 5203 if (error) { 5204 eprintsoline(so, error); 5205 goto done; 5206 } 5207 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5208 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5209 if (error) { 5210 eprintsoline(so, error); 5211 goto done; 5212 } 5213 ASSERT(mp); 5214 /* No need to verify T_optmgmt_ack */ 5215 freemsg(mp); 5216 done: 5217 /* 5218 * Check for SOL_SOCKET options and record their values. 5219 * If we know about a SOL_SOCKET parameter and the transport 5220 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5221 * EPROTO) we let the setsockopt succeed. 5222 */ 5223 if (level == SOL_SOCKET) { 5224 /* Check parameters */ 5225 switch (option_name) { 5226 case SO_DEBUG: 5227 case SO_REUSEADDR: 5228 case SO_KEEPALIVE: 5229 case SO_DONTROUTE: 5230 case SO_BROADCAST: 5231 case SO_USELOOPBACK: 5232 case SO_OOBINLINE: 5233 case SO_SNDBUF: 5234 case SO_RCVBUF: 5235 #ifdef notyet 5236 case SO_SNDLOWAT: 5237 case SO_RCVLOWAT: 5238 case SO_SNDTIMEO: 5239 case SO_RCVTIMEO: 5240 #endif /* notyet */ 5241 case SO_DGRAM_ERRIND: 5242 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5243 error = EINVAL; 5244 eprintsoline(so, error); 5245 goto done2; 5246 } 5247 ASSERT(optval); 5248 handled = B_TRUE; 5249 break; 5250 case SO_LINGER: 5251 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5252 error = EINVAL; 5253 eprintsoline(so, error); 5254 goto done2; 5255 } 5256 ASSERT(optval); 5257 handled = B_TRUE; 5258 break; 5259 } 5260 5261 #define intvalue (*(int32_t *)optval) 5262 5263 switch (option_name) { 5264 case SO_TYPE: 5265 case SO_ERROR: 5266 case SO_ACCEPTCONN: 5267 /* Can't be set */ 5268 error = ENOPROTOOPT; 5269 goto done2; 5270 case SO_LINGER: { 5271 struct linger *l = (struct linger *)optval; 5272 5273 so->so_linger.l_linger = l->l_linger; 5274 if (l->l_onoff) { 5275 so->so_linger.l_onoff = SO_LINGER; 5276 so->so_options |= SO_LINGER; 5277 } else { 5278 so->so_linger.l_onoff = 0; 5279 so->so_options &= ~SO_LINGER; 5280 } 5281 break; 5282 } 5283 5284 case SO_DEBUG: 5285 #ifdef SOCK_TEST 5286 if (intvalue & 2) 5287 sock_test_timelimit = 10 * hz; 5288 else 5289 sock_test_timelimit = 0; 5290 5291 if (intvalue & 4) 5292 do_useracc = 0; 5293 else 5294 do_useracc = 1; 5295 #endif /* SOCK_TEST */ 5296 /* FALLTHRU */ 5297 case SO_REUSEADDR: 5298 case SO_KEEPALIVE: 5299 case SO_DONTROUTE: 5300 case SO_BROADCAST: 5301 case SO_USELOOPBACK: 5302 case SO_OOBINLINE: 5303 case SO_DGRAM_ERRIND: 5304 if (intvalue != 0) { 5305 dprintso(so, 1, 5306 ("sotpi_setsockopt: setting 0x%x\n", 5307 option_name)); 5308 so->so_options |= option_name; 5309 } else { 5310 dprintso(so, 1, 5311 ("sotpi_setsockopt: clearing 0x%x\n", 5312 option_name)); 5313 so->so_options &= ~option_name; 5314 } 5315 break; 5316 /* 5317 * The following options are only returned by us when the 5318 * T_SVR4_OPTMGMT_REQ fails. 5319 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5320 * since the transport might adjust the value and not 5321 * return exactly what was set by the application. 5322 */ 5323 case SO_SNDBUF: 5324 so->so_sndbuf = intvalue; 5325 break; 5326 case SO_RCVBUF: 5327 so->so_rcvbuf = intvalue; 5328 break; 5329 #ifdef notyet 5330 /* 5331 * We do not implement the semantics of these options 5332 * thus we shouldn't implement the options either. 5333 */ 5334 case SO_SNDLOWAT: 5335 so->so_sndlowat = intvalue; 5336 break; 5337 case SO_RCVLOWAT: 5338 so->so_rcvlowat = intvalue; 5339 break; 5340 case SO_SNDTIMEO: 5341 so->so_sndtimeo = intvalue; 5342 break; 5343 case SO_RCVTIMEO: 5344 so->so_rcvtimeo = intvalue; 5345 break; 5346 #endif /* notyet */ 5347 } 5348 #undef intvalue 5349 5350 if (error) { 5351 if ((error == ENOPROTOOPT || error == EPROTO || 5352 error == EINVAL) && handled) { 5353 dprintso(so, 1, 5354 ("setsockopt: ignoring error %d for 0x%x\n", 5355 error, option_name)); 5356 error = 0; 5357 } 5358 } 5359 } 5360 done2: 5361 ret: 5362 so_unlock_single(so, SOLOCKED); 5363 mutex_exit(&so->so_lock); 5364 return (error); 5365 } 5366