1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <netinet/in.h> 62 #include <sys/un.h> 63 #include <sys/strsun.h> 64 65 #include <sys/tiuser.h> 66 #define _SUN_TPI_VERSION 2 67 #include <sys/tihdr.h> 68 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 69 70 #include <c2/audit.h> 71 72 #include <inet/common.h> 73 #include <inet/ip.h> 74 #include <inet/ip6.h> 75 #include <inet/tcp.h> 76 #include <inet/udp_impl.h> 77 78 #include <sys/zone.h> 79 80 #include <fs/sockfs/nl7c.h> 81 #include <fs/sockfs/nl7curi.h> 82 83 #include <inet/kssl/ksslapi.h> 84 85 /* 86 * Possible failures when memory can't be allocated. The documented behavior: 87 * 88 * 5.5: 4.X: XNET: 89 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 90 * EINTR 91 * (4.X does not document EINTR but returns it) 92 * bind: ENOSR - ENOBUFS/ENOSR 93 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 94 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 95 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 96 * (4.X getpeername and getsockname do not fail in practice) 97 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 98 * listen: - - ENOBUFS 99 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 100 * EINTR 101 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 102 * EINTR 103 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 104 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 105 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 106 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 107 * 108 * Resolution. When allocation fails: 109 * recv: return EINTR 110 * send: return EINTR 111 * connect, accept: EINTR 112 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 113 * socket, socketpair: ENOBUFS 114 * getpeername, getsockname: sleep 115 * getsockopt, setsockopt: sleep 116 */ 117 118 #ifdef SOCK_TEST 119 /* 120 * Variables that make sockfs do something other than the standard TPI 121 * for the AF_INET transports. 122 * 123 * solisten_tpi_tcp: 124 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 125 * the transport is already bound. This is needed to avoid loosing the 126 * port number should listen() do a T_UNBIND_REQ followed by a 127 * O_T_BIND_REQ. 128 * 129 * soconnect_tpi_udp: 130 * UDP and ICMP can handle a T_CONN_REQ. 131 * This is needed to make the sequence of connect(), getsockname() 132 * return the local IP address used to send packets to the connected to 133 * destination. 134 * 135 * soconnect_tpi_tcp: 136 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 137 * Set this to non-zero to send TPI conformant messages to TCP in this 138 * respect. This is a performance optimization. 139 * 140 * soaccept_tpi_tcp: 141 * TCP can handle a T_CONN_REQ without the acceptor being bound. 142 * This is a performance optimization that has been picked up in XTI. 143 * 144 * soaccept_tpi_multioptions: 145 * When inheriting SOL_SOCKET options from the listener to the accepting 146 * socket send them as a single message for AF_INET{,6}. 147 */ 148 int solisten_tpi_tcp = 0; 149 int soconnect_tpi_udp = 0; 150 int soconnect_tpi_tcp = 0; 151 int soaccept_tpi_tcp = 0; 152 int soaccept_tpi_multioptions = 1; 153 #else /* SOCK_TEST */ 154 #define soconnect_tpi_tcp 0 155 #define soconnect_tpi_udp 0 156 #define solisten_tpi_tcp 0 157 #define soaccept_tpi_tcp 0 158 #define soaccept_tpi_multioptions 1 159 #endif /* SOCK_TEST */ 160 161 #ifdef SOCK_TEST 162 extern int do_useracc; 163 extern clock_t sock_test_timelimit; 164 #endif /* SOCK_TEST */ 165 166 /* 167 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 168 * applications working. Turn on this flag to disable these checks. 169 */ 170 int xnet_skip_checks = 0; 171 int xnet_check_print = 0; 172 int xnet_truncate_print = 0; 173 174 extern void sigintr(k_sigset_t *, int); 175 extern void sigunintr(k_sigset_t *); 176 177 extern void *nl7c_lookup_addr(void *, t_uscalar_t); 178 extern void *nl7c_add_addr(void *, t_uscalar_t); 179 extern void nl7c_listener_addr(void *, struct sonode *); 180 181 /* Sockets acting as an in-kernel SSL proxy */ 182 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 183 strsigset_t *, strsigset_t *, strpollset_t *); 184 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 185 strsigset_t *, strsigset_t *, strpollset_t *); 186 187 static int sotpi_unbind(struct sonode *, int); 188 189 /* TPI sockfs sonode operations */ 190 static int sotpi_accept(struct sonode *, int, struct sonode **); 191 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 192 int); 193 static int sotpi_connect(struct sonode *, const struct sockaddr *, 194 socklen_t, int, int); 195 static int sotpi_listen(struct sonode *, int); 196 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 197 struct uio *); 198 static int sotpi_shutdown(struct sonode *, int); 199 static int sotpi_getsockname(struct sonode *); 200 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 201 struct uio *, void *, t_uscalar_t, int); 202 static int sodgram_direct(struct sonode *, struct sockaddr *, 203 socklen_t, struct uio *, int); 204 205 sonodeops_t sotpi_sonodeops = { 206 sotpi_accept, /* sop_accept */ 207 sotpi_bind, /* sop_bind */ 208 sotpi_listen, /* sop_listen */ 209 sotpi_connect, /* sop_connect */ 210 sotpi_recvmsg, /* sop_recvmsg */ 211 sotpi_sendmsg, /* sop_sendmsg */ 212 sotpi_getpeername, /* sop_getpeername */ 213 sotpi_getsockname, /* sop_getsockname */ 214 sotpi_shutdown, /* sop_shutdown */ 215 sotpi_getsockopt, /* sop_getsockopt */ 216 sotpi_setsockopt /* sop_setsockopt */ 217 }; 218 219 /* 220 * Common create code for socket and accept. If tso is set the values 221 * from that node is used instead of issuing a T_INFO_REQ. 222 * 223 * Assumes that the caller has a VN_HOLD on accessvp. 224 * The VN_RELE will occur either when sotpi_create() fails or when 225 * the returned sonode is freed. 226 */ 227 struct sonode * 228 sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version, 229 struct sonode *tso, int *errorp) 230 { 231 struct sonode *so; 232 vnode_t *vp; 233 int flags, error; 234 235 ASSERT(accessvp != NULL); 236 vp = makesockvp(accessvp, domain, type, protocol); 237 ASSERT(vp != NULL); 238 so = VTOSO(vp); 239 240 flags = FREAD|FWRITE; 241 242 if ((type == SOCK_STREAM || type == SOCK_DGRAM) && 243 (domain == AF_INET || domain == AF_INET6) && 244 (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 245 protocol == IPPROTO_IP)) { 246 /* Tell tcp or udp that it's talking to sockets */ 247 flags |= SO_SOCKSTR; 248 249 /* 250 * Here we indicate to socktpi_open() our attempt to 251 * make direct calls between sockfs and transport. 252 * The final decision is left to socktpi_open(). 253 */ 254 so->so_state |= SS_DIRECT; 255 256 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 257 if (so->so_type == SOCK_STREAM && tso != NULL) { 258 if (tso->so_state & SS_DIRECT) { 259 /* 260 * Inherit SS_DIRECT from listener and pass 261 * SO_ACCEPTOR open flag to tcp, indicating 262 * that this is an accept fast-path instance. 263 */ 264 flags |= SO_ACCEPTOR; 265 } else { 266 /* 267 * SS_DIRECT is not set on listener, meaning 268 * that the listener has been converted from 269 * a socket to a stream. Ensure that the 270 * acceptor inherits these settings. 271 */ 272 so->so_state &= ~SS_DIRECT; 273 flags &= ~SO_SOCKSTR; 274 } 275 } 276 } 277 278 /* 279 * Tell local transport that it is talking to sockets. 280 */ 281 if (so->so_family == AF_UNIX) { 282 flags |= SO_SOCKSTR; 283 } 284 285 /* Initialize the kernel SSL proxy fields */ 286 so->so_kssl_type = KSSL_NO_PROXY; 287 so->so_kssl_ent = NULL; 288 so->so_kssl_ctx = NULL; 289 290 if (error = socktpi_open(&vp, flags, CRED())) { 291 VN_RELE(vp); 292 *errorp = error; 293 return (NULL); 294 } 295 296 if (error = so_strinit(so, tso)) { 297 (void) VOP_CLOSE(vp, 0, 1, 0, CRED()); 298 VN_RELE(vp); 299 *errorp = error; 300 return (NULL); 301 } 302 303 if (version == SOV_DEFAULT) 304 version = so_default_version; 305 306 so->so_version = (short)version; 307 308 return (so); 309 } 310 311 /* 312 * Bind the socket to an unspecified address in sockfs only. 313 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 314 * required in all cases. 315 */ 316 static void 317 so_automatic_bind(struct sonode *so) 318 { 319 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 320 321 ASSERT(MUTEX_HELD(&so->so_lock)); 322 ASSERT(!(so->so_state & SS_ISBOUND)); 323 ASSERT(so->so_unbind_mp); 324 325 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 326 bzero(so->so_laddr_sa, so->so_laddr_len); 327 so->so_laddr_sa->sa_family = so->so_family; 328 so->so_state |= SS_ISBOUND; 329 } 330 331 332 /* 333 * bind the socket. 334 * 335 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 336 * are passed in we allow rebinding. Note that for backwards compatibility 337 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 338 * Thus the rebinding code is currently not executed. 339 * 340 * The constraints for rebinding are: 341 * - it is a SOCK_DGRAM, or 342 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 343 * and no listen() has been done. 344 * This rebinding code was added based on some language in the XNET book 345 * about not returning EINVAL it the protocol allows rebinding. However, 346 * this language is not present in the Posix socket draft. Thus maybe the 347 * rebinding logic should be deleted from the source. 348 * 349 * A null "name" can be used to unbind the socket if: 350 * - it is a SOCK_DGRAM, or 351 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 352 * and no listen() has been done. 353 */ 354 static int 355 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 356 socklen_t namelen, int backlog, int flags) 357 { 358 struct T_bind_req bind_req; 359 struct T_bind_ack *bind_ack; 360 int error = 0; 361 mblk_t *mp; 362 void *addr; 363 t_uscalar_t addrlen; 364 int unbind_on_err = 1; 365 boolean_t clear_acceptconn_on_err = B_FALSE; 366 boolean_t restore_backlog_on_err = B_FALSE; 367 int save_so_backlog; 368 t_scalar_t PRIM_type = O_T_BIND_REQ; 369 boolean_t tcp_udp_xport; 370 void *nl7c = NULL; 371 372 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 373 so, name, namelen, backlog, flags, 374 pr_state(so->so_state, so->so_mode))); 375 376 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 377 378 if (!(flags & _SOBIND_LOCK_HELD)) { 379 mutex_enter(&so->so_lock); 380 so_lock_single(so); /* Set SOLOCKED */ 381 } else { 382 ASSERT(MUTEX_HELD(&so->so_lock)); 383 ASSERT(so->so_flag & SOLOCKED); 384 } 385 386 /* 387 * Make sure that there is a preallocated unbind_req message 388 * before binding. This message allocated when the socket is 389 * created but it might be have been consumed. 390 */ 391 if (so->so_unbind_mp == NULL) { 392 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 393 /* NOTE: holding so_lock while sleeping */ 394 so->so_unbind_mp = 395 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 396 } 397 398 if (flags & _SOBIND_REBIND) { 399 /* 400 * Called from solisten after doing an sotpi_unbind() or 401 * potentially without the unbind (latter for AF_INET{,6}). 402 */ 403 ASSERT(name == NULL && namelen == 0); 404 405 if (so->so_family == AF_UNIX) { 406 ASSERT(so->so_ux_bound_vp); 407 addr = &so->so_ux_laddr; 408 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 409 dprintso(so, 1, 410 ("sobind rebind UNIX: addrlen %d, addr 0x%p, vp %p\n", 411 addrlen, 412 ((struct so_ux_addr *)addr)->soua_vp, 413 so->so_ux_bound_vp)); 414 } else { 415 addr = so->so_laddr_sa; 416 addrlen = (t_uscalar_t)so->so_laddr_len; 417 } 418 } else if (flags & _SOBIND_UNSPEC) { 419 ASSERT(name == NULL && namelen == 0); 420 421 /* 422 * The caller checked SS_ISBOUND but not necessarily 423 * under so_lock 424 */ 425 if (so->so_state & SS_ISBOUND) { 426 /* No error */ 427 goto done; 428 } 429 430 /* Set an initial local address */ 431 switch (so->so_family) { 432 case AF_UNIX: 433 /* 434 * Use an address with same size as struct sockaddr 435 * just like BSD. 436 */ 437 so->so_laddr_len = 438 (socklen_t)sizeof (struct sockaddr); 439 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 440 bzero(so->so_laddr_sa, so->so_laddr_len); 441 so->so_laddr_sa->sa_family = so->so_family; 442 443 /* 444 * Pass down an address with the implicit bind 445 * magic number and the rest all zeros. 446 * The transport will return a unique address. 447 */ 448 so->so_ux_laddr.soua_vp = NULL; 449 so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 450 addr = &so->so_ux_laddr; 451 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 452 break; 453 454 case AF_INET: 455 case AF_INET6: 456 /* 457 * An unspecified bind in TPI has a NULL address. 458 * Set the address in sockfs to have the sa_family. 459 */ 460 so->so_laddr_len = (so->so_family == AF_INET) ? 461 (socklen_t)sizeof (sin_t) : 462 (socklen_t)sizeof (sin6_t); 463 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 464 bzero(so->so_laddr_sa, so->so_laddr_len); 465 so->so_laddr_sa->sa_family = so->so_family; 466 addr = NULL; 467 addrlen = 0; 468 break; 469 470 default: 471 /* 472 * An unspecified bind in TPI has a NULL address. 473 * Set the address in sockfs to be zero length. 474 * 475 * Can not assume there is a sa_family for all 476 * protocol families. For example, AF_X25 does not 477 * have a family field. 478 */ 479 bzero(so->so_laddr_sa, so->so_laddr_len); 480 so->so_laddr_len = 0; /* XXX correct? */ 481 addr = NULL; 482 addrlen = 0; 483 break; 484 } 485 486 } else { 487 if (so->so_state & SS_ISBOUND) { 488 /* 489 * If it is ok to rebind the socket, first unbind 490 * with the transport. A rebind to the NULL address 491 * is interpreted as an unbind. 492 * Note that a bind to NULL in BSD does unbind the 493 * socket but it fails with EINVAL. 494 * Note that regular sockets set SOV_SOCKBSD i.e. 495 * _SOBIND_SOCKBSD gets set here hence no type of 496 * socket does currently allow rebinding. 497 * 498 * If the name is NULL just do an unbind. 499 */ 500 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 501 name != NULL) { 502 error = EINVAL; 503 unbind_on_err = 0; 504 eprintsoline(so, error); 505 goto done; 506 } 507 if ((so->so_mode & SM_CONNREQUIRED) && 508 (so->so_state & SS_CANTREBIND)) { 509 error = EINVAL; 510 unbind_on_err = 0; 511 eprintsoline(so, error); 512 goto done; 513 } 514 error = sotpi_unbind(so, 0); 515 if (error) { 516 eprintsoline(so, error); 517 goto done; 518 } 519 ASSERT(!(so->so_state & SS_ISBOUND)); 520 if (name == NULL) { 521 so->so_state &= 522 ~(SS_ISCONNECTED|SS_ISCONNECTING); 523 goto done; 524 } 525 } 526 /* X/Open requires this check */ 527 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 528 if (xnet_check_print) { 529 printf("sockfs: X/Open bind state check " 530 "caused EINVAL\n"); 531 } 532 error = EINVAL; 533 goto done; 534 } 535 536 switch (so->so_family) { 537 case AF_UNIX: 538 /* 539 * All AF_UNIX addresses are nul terminated 540 * when copied (copyin_name) in so the minimum 541 * length is 3 bytes. 542 */ 543 if (name == NULL || 544 (ssize_t)namelen <= sizeof (short) + 1) { 545 error = EISDIR; 546 eprintsoline(so, error); 547 goto done; 548 } 549 /* 550 * Verify so_family matches the bound family. 551 * BSD does not check this for AF_UNIX resulting 552 * in funny mknods. 553 */ 554 if (name->sa_family != so->so_family) { 555 error = EAFNOSUPPORT; 556 goto done; 557 } 558 break; 559 case AF_INET: 560 if (name == NULL) { 561 error = EINVAL; 562 eprintsoline(so, error); 563 goto done; 564 } 565 if ((size_t)namelen != sizeof (sin_t)) { 566 error = name->sa_family != so->so_family ? 567 EAFNOSUPPORT : EINVAL; 568 eprintsoline(so, error); 569 goto done; 570 } 571 if ((flags & _SOBIND_XPG4_2) && 572 (name->sa_family != so->so_family)) { 573 /* 574 * This check has to be made for X/Open 575 * sockets however application failures have 576 * been observed when it is applied to 577 * all sockets. 578 */ 579 error = EAFNOSUPPORT; 580 eprintsoline(so, error); 581 goto done; 582 } 583 /* 584 * Force a zero sa_family to match so_family. 585 * 586 * Some programs like inetd(1M) don't set the 587 * family field. Other programs leave 588 * sin_family set to garbage - SunOS 4.X does 589 * not check the family field on a bind. 590 * We use the family field that 591 * was passed in to the socket() call. 592 */ 593 name->sa_family = so->so_family; 594 break; 595 596 case AF_INET6: { 597 #ifdef DEBUG 598 sin6_t *sin6 = (sin6_t *)name; 599 #endif /* DEBUG */ 600 601 if (name == NULL) { 602 error = EINVAL; 603 eprintsoline(so, error); 604 goto done; 605 } 606 if ((size_t)namelen != sizeof (sin6_t)) { 607 error = name->sa_family != so->so_family ? 608 EAFNOSUPPORT : EINVAL; 609 eprintsoline(so, error); 610 goto done; 611 } 612 if (name->sa_family != so->so_family) { 613 /* 614 * With IPv6 we require the family to match 615 * unlike in IPv4. 616 */ 617 error = EAFNOSUPPORT; 618 eprintsoline(so, error); 619 goto done; 620 } 621 #ifdef DEBUG 622 /* 623 * Verify that apps don't forget to clear 624 * sin6_scope_id etc 625 */ 626 if (sin6->sin6_scope_id != 0 && 627 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 628 zcmn_err(getzoneid(), CE_WARN, 629 "bind with uninitialized sin6_scope_id " 630 "(%d) on socket. Pid = %d\n", 631 (int)sin6->sin6_scope_id, 632 (int)curproc->p_pid); 633 } 634 if (sin6->__sin6_src_id != 0) { 635 zcmn_err(getzoneid(), CE_WARN, 636 "bind with uninitialized __sin6_src_id " 637 "(%d) on socket. Pid = %d\n", 638 (int)sin6->__sin6_src_id, 639 (int)curproc->p_pid); 640 } 641 #endif /* DEBUG */ 642 break; 643 } 644 default: 645 /* 646 * Don't do any length or sa_family check to allow 647 * non-sockaddr style addresses. 648 */ 649 if (name == NULL) { 650 error = EINVAL; 651 eprintsoline(so, error); 652 goto done; 653 } 654 break; 655 } 656 657 if (namelen > (t_uscalar_t)so->so_laddr_maxlen) { 658 error = ENAMETOOLONG; 659 eprintsoline(so, error); 660 goto done; 661 } 662 /* 663 * Save local address. 664 */ 665 so->so_laddr_len = (socklen_t)namelen; 666 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 667 bcopy(name, so->so_laddr_sa, namelen); 668 669 addr = so->so_laddr_sa; 670 addrlen = (t_uscalar_t)so->so_laddr_len; 671 switch (so->so_family) { 672 case AF_INET6: 673 case AF_INET: 674 break; 675 case AF_UNIX: { 676 struct sockaddr_un *soun = 677 (struct sockaddr_un *)so->so_laddr_sa; 678 struct vnode *vp; 679 struct vattr vattr; 680 681 ASSERT(so->so_ux_bound_vp == NULL); 682 /* 683 * Create vnode for the specified path name. 684 * Keep vnode held with a reference in so_ux_bound_vp. 685 * Use the vnode pointer as the address used in the 686 * bind with the transport. 687 * 688 * Use the same mode as in BSD. In particular this does 689 * not observe the umask. 690 */ 691 /* MAXPATHLEN + soun_family + nul termination */ 692 if (so->so_laddr_len > 693 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 694 error = ENAMETOOLONG; 695 eprintsoline(so, error); 696 goto done; 697 } 698 vattr.va_type = VSOCK; 699 vattr.va_mode = 0777 & ~u.u_cmask; 700 vattr.va_mask = AT_TYPE|AT_MODE; 701 /* NOTE: holding so_lock */ 702 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 703 EXCL, 0, &vp, CRMKNOD, 0, 0); 704 if (error) { 705 if (error == EEXIST) 706 error = EADDRINUSE; 707 eprintsoline(so, error); 708 goto done; 709 } 710 /* 711 * Establish pointer from the underlying filesystem 712 * vnode to the socket node. 713 * so_ux_bound_vp and v_stream->sd_vnode form the 714 * cross-linkage between the underlying filesystem 715 * node and the socket node. 716 */ 717 ASSERT(SOTOV(so)->v_stream); 718 mutex_enter(&vp->v_lock); 719 vp->v_stream = SOTOV(so)->v_stream; 720 so->so_ux_bound_vp = vp; 721 mutex_exit(&vp->v_lock); 722 723 /* 724 * Use the vnode pointer value as a unique address 725 * (together with the magic number to avoid conflicts 726 * with implicit binds) in the transport provider. 727 */ 728 so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp; 729 so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 730 addr = &so->so_ux_laddr; 731 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 732 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 733 addrlen, 734 ((struct so_ux_addr *)addr)->soua_vp)); 735 break; 736 } 737 } /* end switch (so->so_family) */ 738 } 739 740 /* 741 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 742 * the transport can start passing up T_CONN_IND messages 743 * as soon as it receives the bind req and strsock_proto() 744 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 745 */ 746 if (flags & _SOBIND_LISTEN) { 747 if ((so->so_state & SS_ACCEPTCONN) == 0) 748 clear_acceptconn_on_err = B_TRUE; 749 save_so_backlog = so->so_backlog; 750 restore_backlog_on_err = B_TRUE; 751 so->so_state |= SS_ACCEPTCONN; 752 so->so_backlog = backlog; 753 } 754 755 /* 756 * If NL7C addr(s) have been configured check for addr/port match, 757 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 758 * 759 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 760 * family sockets only. If match mark as such. 761 */ 762 if (nl7c_enabled && ((addr != NULL && 763 (so->so_family == AF_INET || so->so_family == AF_INET6) && 764 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 765 so->so_nl7c_flags == NL7C_AF_NCA)) { 766 /* 767 * NL7C is not supported in non-global zones, 768 * we enforce this restriction here. 769 */ 770 if (so->so_zoneid == GLOBAL_ZONEID) { 771 /* An NL7C socket, mark it */ 772 so->so_nl7c_flags |= NL7C_ENABLED; 773 if (nl7c == NULL) { 774 /* 775 * Was an AF_NCA bind() so add it to the 776 * addr list for reporting purposes. 777 */ 778 nl7c = nl7c_add_addr(addr, addrlen); 779 } 780 } else 781 nl7c = NULL; 782 } 783 /* 784 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 785 * for other transports we will send in a O_T_BIND_REQ. 786 */ 787 if (tcp_udp_xport && 788 (so->so_family == AF_INET || so->so_family == AF_INET6)) 789 PRIM_type = T_BIND_REQ; 790 791 bind_req.PRIM_type = PRIM_type; 792 bind_req.ADDR_length = addrlen; 793 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 794 bind_req.CONIND_number = backlog; 795 /* NOTE: holding so_lock while sleeping */ 796 mp = soallocproto2(&bind_req, sizeof (bind_req), 797 addr, addrlen, 0, _ALLOC_SLEEP); 798 so->so_state &= ~SS_LADDR_VALID; 799 800 /* Done using so_laddr_sa - can drop the lock */ 801 mutex_exit(&so->so_lock); 802 803 /* 804 * Intercept the bind_req message here to check if this <address/port> 805 * was configured as an SSL proxy server, or if another endpoint was 806 * already configured to act as a proxy for us. 807 * 808 * Note, only if NL7C not enabled for this socket. 809 */ 810 if (nl7c == NULL && 811 (so->so_family == AF_INET || so->so_family == AF_INET6) && 812 so->so_type == SOCK_STREAM) { 813 814 if (so->so_kssl_ent != NULL) { 815 kssl_release_ent(so->so_kssl_ent, so, so->so_kssl_type); 816 so->so_kssl_ent = NULL; 817 } 818 819 so->so_kssl_type = kssl_check_proxy(mp, so, &so->so_kssl_ent); 820 switch (so->so_kssl_type) { 821 case KSSL_NO_PROXY: 822 break; 823 824 case KSSL_HAS_PROXY: 825 mutex_enter(&so->so_lock); 826 goto skip_transport; 827 828 case KSSL_IS_PROXY: 829 break; 830 } 831 } 832 833 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 834 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 835 if (error) { 836 eprintsoline(so, error); 837 mutex_enter(&so->so_lock); 838 goto done; 839 } 840 841 mutex_enter(&so->so_lock); 842 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 843 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 844 if (error) { 845 eprintsoline(so, error); 846 goto done; 847 } 848 skip_transport: 849 ASSERT(mp); 850 /* 851 * Even if some TPI message (e.g. T_DISCON_IND) was received in 852 * strsock_proto while the lock was dropped above, the bind 853 * is allowed to complete. 854 */ 855 856 /* Mark as bound. This will be undone if we detect errors below. */ 857 if (flags & _SOBIND_NOXLATE) { 858 ASSERT(so->so_family == AF_UNIX); 859 so->so_state |= SS_FADDR_NOXLATE; 860 } 861 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 862 so->so_state |= SS_ISBOUND; 863 ASSERT(so->so_unbind_mp); 864 865 /* note that we've already set SS_ACCEPTCONN above */ 866 867 /* 868 * Recompute addrlen - an unspecied bind sent down an 869 * address of length zero but we expect the appropriate length 870 * in return. 871 */ 872 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 873 sizeof (so->so_ux_laddr) : so->so_laddr_len); 874 875 bind_ack = (struct T_bind_ack *)mp->b_rptr; 876 /* 877 * The alignment restriction is really too strict but 878 * we want enough alignment to inspect the fields of 879 * a sockaddr_in. 880 */ 881 addr = sogetoff(mp, bind_ack->ADDR_offset, 882 bind_ack->ADDR_length, 883 __TPI_ALIGN_SIZE); 884 if (addr == NULL) { 885 freemsg(mp); 886 error = EPROTO; 887 eprintsoline(so, error); 888 goto done; 889 } 890 if (!(flags & _SOBIND_UNSPEC)) { 891 /* 892 * Verify that the transport didn't return something we 893 * did not want e.g. an address other than what we asked for. 894 * 895 * NOTE: These checks would go away if/when we switch to 896 * using the new TPI (in which the transport would fail 897 * the request instead of assigning a different address). 898 * 899 * NOTE2: For protocols that we don't know (i.e. any 900 * other than AF_INET6, AF_INET and AF_UNIX), we 901 * cannot know if the transport should be expected to 902 * return the same address as that requested. 903 * 904 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 905 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 906 * 907 * For example, in the case of netatalk it may be 908 * inappropriate for the transport to return the 909 * requested address (as it may have allocated a local 910 * port number in behaviour similar to that of an 911 * AF_INET bind request with a port number of zero). 912 * 913 * Given the definition of O_T_BIND_REQ, where the 914 * transport may bind to an address other than the 915 * requested address, it's not possible to determine 916 * whether a returned address that differs from the 917 * requested address is a reason to fail (because the 918 * requested address was not available) or succeed 919 * (because the transport allocated an appropriate 920 * address and/or port). 921 * 922 * sockfs currently requires that the transport return 923 * the requested address in the T_BIND_ACK, unless 924 * there is code here to allow for any discrepancy. 925 * Such code exists for AF_INET and AF_INET6. 926 * 927 * Netatalk chooses to return the requested address 928 * rather than the (correct) allocated address. This 929 * means that netatalk violates the TPI specification 930 * (and would not function correctly if used from a 931 * TLI application), but it does mean that it works 932 * with sockfs. 933 * 934 * As noted above, using the newer XTI bind primitive 935 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 936 * allow sockfs to be more sure about whether or not 937 * the bind request had succeeded (as transports are 938 * not permitted to bind to a different address than 939 * that requested - they must return failure). 940 * Unfortunately, support for T_BIND_REQ may not be 941 * present in all transport implementations (netatalk, 942 * for example, doesn't have it), making the 943 * transition difficult. 944 */ 945 if (bind_ack->ADDR_length != addrlen) { 946 /* Assumes that the requested address was in use */ 947 freemsg(mp); 948 error = EADDRINUSE; 949 eprintsoline(so, error); 950 goto done; 951 } 952 953 switch (so->so_family) { 954 case AF_INET6: 955 case AF_INET: { 956 sin_t *rname, *aname; 957 958 rname = (sin_t *)addr; 959 aname = (sin_t *)so->so_laddr_sa; 960 961 /* 962 * Take advantage of the alignment 963 * of sin_port and sin6_port which fall 964 * in the same place in their data structures. 965 * Just use sin_port for either address family. 966 * 967 * This may become a problem if (heaven forbid) 968 * there's a separate ipv6port_reserved... :-P 969 * 970 * Binding to port 0 has the semantics of letting 971 * the transport bind to any port. 972 * 973 * If the transport is TCP or UDP since we had sent 974 * a T_BIND_REQ we would not get a port other than 975 * what we asked for. 976 */ 977 if (tcp_udp_xport) { 978 /* 979 * Pick up the new port number if we bound to 980 * port 0. 981 */ 982 if (aname->sin_port == 0) 983 aname->sin_port = rname->sin_port; 984 so->so_state |= SS_LADDR_VALID; 985 break; 986 } 987 if (aname->sin_port != 0 && 988 aname->sin_port != rname->sin_port) { 989 freemsg(mp); 990 error = EADDRINUSE; 991 eprintsoline(so, error); 992 goto done; 993 } 994 /* 995 * Pick up the new port number if we bound to port 0. 996 */ 997 aname->sin_port = rname->sin_port; 998 999 /* 1000 * Unfortunately, addresses aren't _quite_ the same. 1001 */ 1002 if (so->so_family == AF_INET) { 1003 if (aname->sin_addr.s_addr != 1004 rname->sin_addr.s_addr) { 1005 freemsg(mp); 1006 error = EADDRNOTAVAIL; 1007 eprintsoline(so, error); 1008 goto done; 1009 } 1010 } else { 1011 sin6_t *rname6 = (sin6_t *)rname; 1012 sin6_t *aname6 = (sin6_t *)aname; 1013 1014 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1015 &rname6->sin6_addr)) { 1016 freemsg(mp); 1017 error = EADDRNOTAVAIL; 1018 eprintsoline(so, error); 1019 goto done; 1020 } 1021 } 1022 break; 1023 } 1024 case AF_UNIX: 1025 if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) { 1026 freemsg(mp); 1027 error = EADDRINUSE; 1028 eprintsoline(so, error); 1029 eprintso(so, 1030 ("addrlen %d, addr 0x%x, vp %p\n", 1031 addrlen, *((int *)addr), 1032 so->so_ux_bound_vp)); 1033 goto done; 1034 } 1035 so->so_state |= SS_LADDR_VALID; 1036 break; 1037 default: 1038 /* 1039 * NOTE: This assumes that addresses can be 1040 * byte-compared for equivalence. 1041 */ 1042 if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) { 1043 freemsg(mp); 1044 error = EADDRINUSE; 1045 eprintsoline(so, error); 1046 goto done; 1047 } 1048 /* 1049 * Don't mark SS_LADDR_VALID, as we cannot be 1050 * sure that the returned address is the real 1051 * bound address when talking to an unknown 1052 * transport. 1053 */ 1054 break; 1055 } 1056 } else { 1057 /* 1058 * Save for returned address for getsockname. 1059 * Needed for unspecific bind unless transport supports 1060 * the TI_GETMYNAME ioctl. 1061 * Do this for AF_INET{,6} even though they do, as 1062 * caching info here is much better performance than 1063 * a TPI/STREAMS trip to the transport for getsockname. 1064 * Any which can't for some reason _must_ _not_ set 1065 * LADDR_VALID here for the caching version of getsockname 1066 * to not break; 1067 */ 1068 switch (so->so_family) { 1069 case AF_UNIX: 1070 /* 1071 * Record the address bound with the transport 1072 * for use by socketpair. 1073 */ 1074 bcopy(addr, &so->so_ux_laddr, addrlen); 1075 so->so_state |= SS_LADDR_VALID; 1076 break; 1077 case AF_INET: 1078 case AF_INET6: 1079 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 1080 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 1081 so->so_state |= SS_LADDR_VALID; 1082 break; 1083 default: 1084 /* 1085 * Don't mark SS_LADDR_VALID, as we cannot be 1086 * sure that the returned address is the real 1087 * bound address when talking to an unknown 1088 * transport. 1089 */ 1090 break; 1091 } 1092 } 1093 1094 if (nl7c != NULL) { 1095 /* Register listen()er sonode pointer with NL7C */ 1096 nl7c_listener_addr(nl7c, so); 1097 } 1098 1099 freemsg(mp); 1100 1101 done: 1102 if (error) { 1103 /* reset state & backlog to values held on entry */ 1104 if (clear_acceptconn_on_err == B_TRUE) 1105 so->so_state &= ~SS_ACCEPTCONN; 1106 if (restore_backlog_on_err == B_TRUE) 1107 so->so_backlog = save_so_backlog; 1108 1109 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1110 int err; 1111 1112 err = sotpi_unbind(so, 0); 1113 /* LINTED - statement has no consequent: if */ 1114 if (err) { 1115 eprintsoline(so, error); 1116 } else { 1117 ASSERT(!(so->so_state & SS_ISBOUND)); 1118 } 1119 } 1120 } 1121 if (!(flags & _SOBIND_LOCK_HELD)) { 1122 so_unlock_single(so, SOLOCKED); 1123 mutex_exit(&so->so_lock); 1124 } else { 1125 /* If the caller held the lock don't release it here */ 1126 ASSERT(MUTEX_HELD(&so->so_lock)); 1127 ASSERT(so->so_flag & SOLOCKED); 1128 } 1129 return (error); 1130 } 1131 1132 /* bind the socket */ 1133 static int 1134 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1135 int flags) 1136 { 1137 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1138 return (sotpi_bindlisten(so, name, namelen, 0, flags)); 1139 1140 flags &= ~_SOBIND_SOCKETPAIR; 1141 return (sotpi_bindlisten(so, name, namelen, 1, flags)); 1142 } 1143 1144 /* 1145 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1146 * address, or when listen needs to unbind and bind. 1147 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1148 * so that a sobind can pick them up. 1149 */ 1150 static int 1151 sotpi_unbind(struct sonode *so, int flags) 1152 { 1153 struct T_unbind_req unbind_req; 1154 int error = 0; 1155 mblk_t *mp; 1156 1157 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1158 so, flags, pr_state(so->so_state, so->so_mode))); 1159 1160 ASSERT(MUTEX_HELD(&so->so_lock)); 1161 ASSERT(so->so_flag & SOLOCKED); 1162 1163 if (!(so->so_state & SS_ISBOUND)) { 1164 error = EINVAL; 1165 eprintsoline(so, error); 1166 goto done; 1167 } 1168 1169 mutex_exit(&so->so_lock); 1170 1171 /* 1172 * Flush the read and write side (except stream head read queue) 1173 * and send down T_UNBIND_REQ. 1174 */ 1175 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1176 1177 unbind_req.PRIM_type = T_UNBIND_REQ; 1178 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1179 0, _ALLOC_SLEEP); 1180 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1181 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1182 mutex_enter(&so->so_lock); 1183 if (error) { 1184 eprintsoline(so, error); 1185 goto done; 1186 } 1187 1188 error = sowaitokack(so, T_UNBIND_REQ); 1189 if (error) { 1190 eprintsoline(so, error); 1191 goto done; 1192 } 1193 1194 /* 1195 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1196 * strsock_proto while the lock was dropped above, the unbind 1197 * is allowed to complete. 1198 */ 1199 if (!(flags & _SOUNBIND_REBIND)) { 1200 /* 1201 * Clear out bound address. 1202 */ 1203 vnode_t *vp; 1204 1205 if ((vp = so->so_ux_bound_vp) != NULL) { 1206 1207 /* Undo any SSL proxy setup */ 1208 if ((so->so_family == AF_INET || 1209 so->so_family == AF_INET6) && 1210 (so->so_type == SOCK_STREAM) && 1211 (so->so_kssl_ent != NULL)) { 1212 kssl_release_ent(so->so_kssl_ent, so, 1213 so->so_kssl_type); 1214 so->so_kssl_ent = NULL; 1215 so->so_kssl_type = KSSL_NO_PROXY; 1216 } 1217 1218 so->so_ux_bound_vp = NULL; 1219 vn_rele_stream(vp); 1220 } 1221 /* Clear out address */ 1222 so->so_laddr_len = 0; 1223 } 1224 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1225 1226 done: 1227 1228 /* If the caller held the lock don't release it here */ 1229 ASSERT(MUTEX_HELD(&so->so_lock)); 1230 ASSERT(so->so_flag & SOLOCKED); 1231 1232 return (error); 1233 } 1234 1235 /* 1236 * listen on the socket. 1237 * For TPI conforming transports this has to first unbind with the transport 1238 * and then bind again using the new backlog. 1239 */ 1240 int 1241 sotpi_listen(struct sonode *so, int backlog) 1242 { 1243 int error = 0; 1244 1245 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1246 so, backlog, pr_state(so->so_state, so->so_mode))); 1247 1248 if (so->so_serv_type == T_CLTS) 1249 return (EOPNOTSUPP); 1250 1251 /* 1252 * If the socket is ready to accept connections already, then 1253 * return without doing anything. This avoids a problem where 1254 * a second listen() call fails if a connection is pending and 1255 * leaves the socket unbound. Only when we are not unbinding 1256 * with the transport can we safely increase the backlog. 1257 */ 1258 if (so->so_state & SS_ACCEPTCONN && 1259 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1260 /*CONSTCOND*/ 1261 !solisten_tpi_tcp)) 1262 return (0); 1263 1264 if (so->so_state & SS_ISCONNECTED) 1265 return (EINVAL); 1266 1267 mutex_enter(&so->so_lock); 1268 so_lock_single(so); /* Set SOLOCKED */ 1269 1270 if (backlog < 0) 1271 backlog = 0; 1272 /* 1273 * Use the same qlimit as in BSD. BSD checks the qlimit 1274 * before queuing the next connection implying that a 1275 * listen(sock, 0) allows one connection to be queued. 1276 * BSD also uses 1.5 times the requested backlog. 1277 * 1278 * XNS Issue 4 required a strict interpretation of the backlog. 1279 * This has been waived subsequently for Issue 4 and the change 1280 * incorporated in XNS Issue 5. So we aren't required to do 1281 * anything special for XPG apps. 1282 */ 1283 if (backlog >= (INT_MAX - 1) / 3) 1284 backlog = INT_MAX; 1285 else 1286 backlog = backlog * 3 / 2 + 1; 1287 1288 /* 1289 * If the listen doesn't change the backlog we do nothing. 1290 * This avoids an EPROTO error from the transport. 1291 */ 1292 if ((so->so_state & SS_ACCEPTCONN) && 1293 so->so_backlog == backlog) 1294 goto done; 1295 1296 if (!(so->so_state & SS_ISBOUND)) { 1297 /* 1298 * Must have been explicitly bound in the UNIX domain. 1299 */ 1300 if (so->so_family == AF_UNIX) { 1301 error = EINVAL; 1302 goto done; 1303 } 1304 error = sotpi_bindlisten(so, NULL, 0, backlog, 1305 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1306 } else if (backlog > 0) { 1307 /* 1308 * AF_INET{,6} hack to avoid losing the port. 1309 * Assumes that all AF_INET{,6} transports can handle a 1310 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1311 * has already bound thus it is possible to avoid the unbind. 1312 */ 1313 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1314 /*CONSTCOND*/ 1315 !solisten_tpi_tcp)) { 1316 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1317 if (error) 1318 goto done; 1319 } 1320 error = sotpi_bindlisten(so, NULL, 0, backlog, 1321 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1322 } else { 1323 so->so_state |= SS_ACCEPTCONN; 1324 so->so_backlog = backlog; 1325 } 1326 if (error) 1327 goto done; 1328 ASSERT(so->so_state & SS_ACCEPTCONN); 1329 done: 1330 so_unlock_single(so, SOLOCKED); 1331 mutex_exit(&so->so_lock); 1332 return (error); 1333 } 1334 1335 /* 1336 * Disconnect either a specified seqno or all (-1). 1337 * The former is used on listening sockets only. 1338 * 1339 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1340 * the current use of sodisconnect(seqno == -1) is only for shutdown 1341 * so there is no point (and potentially incorrect) to unbind. 1342 */ 1343 int 1344 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1345 { 1346 struct T_discon_req discon_req; 1347 int error = 0; 1348 mblk_t *mp; 1349 1350 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1351 so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1352 1353 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1354 mutex_enter(&so->so_lock); 1355 so_lock_single(so); /* Set SOLOCKED */ 1356 } else { 1357 ASSERT(MUTEX_HELD(&so->so_lock)); 1358 ASSERT(so->so_flag & SOLOCKED); 1359 } 1360 1361 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1362 error = EINVAL; 1363 eprintsoline(so, error); 1364 goto done; 1365 } 1366 1367 mutex_exit(&so->so_lock); 1368 /* 1369 * Flush the write side (unless this is a listener) 1370 * and then send down a T_DISCON_REQ. 1371 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1372 * and other messages.) 1373 */ 1374 if (!(so->so_state & SS_ACCEPTCONN)) 1375 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1376 1377 discon_req.PRIM_type = T_DISCON_REQ; 1378 discon_req.SEQ_number = seqno; 1379 mp = soallocproto1(&discon_req, sizeof (discon_req), 1380 0, _ALLOC_SLEEP); 1381 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1382 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1383 mutex_enter(&so->so_lock); 1384 if (error) { 1385 eprintsoline(so, error); 1386 goto done; 1387 } 1388 1389 error = sowaitokack(so, T_DISCON_REQ); 1390 if (error) { 1391 eprintsoline(so, error); 1392 goto done; 1393 } 1394 /* 1395 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1396 * strsock_proto while the lock was dropped above, the disconnect 1397 * is allowed to complete. However, it is not possible to 1398 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1399 */ 1400 so->so_state &= 1401 ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID); 1402 done: 1403 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1404 so_unlock_single(so, SOLOCKED); 1405 mutex_exit(&so->so_lock); 1406 } else { 1407 /* If the caller held the lock don't release it here */ 1408 ASSERT(MUTEX_HELD(&so->so_lock)); 1409 ASSERT(so->so_flag & SOLOCKED); 1410 } 1411 return (error); 1412 } 1413 1414 int 1415 sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) 1416 { 1417 struct T_conn_ind *conn_ind; 1418 struct T_conn_res *conn_res; 1419 int error = 0; 1420 mblk_t *mp, *ctxmp; 1421 struct sonode *nso; 1422 vnode_t *nvp; 1423 void *src; 1424 t_uscalar_t srclen; 1425 void *opt; 1426 t_uscalar_t optlen; 1427 t_scalar_t PRIM_type; 1428 t_scalar_t SEQ_number; 1429 1430 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1431 so, fflag, nsop, pr_state(so->so_state, so->so_mode))); 1432 1433 /* 1434 * Defer single-threading the accepting socket until 1435 * the T_CONN_IND has been received and parsed and the 1436 * new sonode has been opened. 1437 */ 1438 1439 /* Check that we are not already connected */ 1440 if ((so->so_state & SS_ACCEPTCONN) == 0) 1441 goto conn_bad; 1442 again: 1443 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1444 goto e_bad; 1445 1446 ASSERT(mp); 1447 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1448 ctxmp = mp->b_cont; 1449 1450 /* 1451 * Save SEQ_number for error paths. 1452 */ 1453 SEQ_number = conn_ind->SEQ_number; 1454 1455 srclen = conn_ind->SRC_length; 1456 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1457 if (src == NULL) { 1458 error = EPROTO; 1459 freemsg(mp); 1460 eprintsoline(so, error); 1461 goto disconnect_unlocked; 1462 } 1463 optlen = conn_ind->OPT_length; 1464 switch (so->so_family) { 1465 case AF_INET: 1466 case AF_INET6: 1467 if ((optlen == sizeof (intptr_t)) && 1468 ((so->so_state & SS_DIRECT) != 0)) { 1469 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1470 &opt, conn_ind->OPT_length); 1471 } else { 1472 /* 1473 * The transport (in this case TCP) hasn't sent up 1474 * a pointer to an instance for the accept fast-path. 1475 * Disable fast-path completely because the call to 1476 * sotpi_create() below would otherwise create an 1477 * incomplete TCP instance, which would lead to 1478 * problems when sockfs sends a normal T_CONN_RES 1479 * message down the new stream. 1480 */ 1481 if (so->so_state & SS_DIRECT) { 1482 int rval; 1483 /* 1484 * For consistency we inform tcp to disable 1485 * direct interface on the listener, though 1486 * we can certainly live without doing this 1487 * because no data will ever travel upstream 1488 * on the listening socket. 1489 */ 1490 so->so_state &= ~SS_DIRECT; 1491 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1492 0, 0, K_TO_K, CRED(), &rval); 1493 } 1494 opt = NULL; 1495 optlen = 0; 1496 } 1497 break; 1498 case AF_UNIX: 1499 default: 1500 if (optlen != 0) { 1501 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1502 __TPI_ALIGN_SIZE); 1503 if (opt == NULL) { 1504 error = EPROTO; 1505 freemsg(mp); 1506 eprintsoline(so, error); 1507 goto disconnect_unlocked; 1508 } 1509 } 1510 if (so->so_family == AF_UNIX) { 1511 if (!(so->so_state & SS_FADDR_NOXLATE)) { 1512 src = NULL; 1513 srclen = 0; 1514 } 1515 /* Extract src address from options */ 1516 if (optlen != 0) 1517 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1518 } 1519 break; 1520 } 1521 1522 /* 1523 * Create the new socket. 1524 */ 1525 VN_HOLD(so->so_accessvp); 1526 nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, 1527 so->so_protocol, so->so_version, so, &error); 1528 if (nso == NULL) { 1529 ASSERT(error != 0); 1530 /* 1531 * Accept can not fail with ENOBUFS. sotpi_create 1532 * sleeps waiting for memory until a signal is caught 1533 * so return EINTR. 1534 */ 1535 freemsg(mp); 1536 if (error == ENOBUFS) 1537 error = EINTR; 1538 goto e_disc_unl; 1539 } 1540 nvp = SOTOV(nso); 1541 1542 /* 1543 * If the transport sent up an SSL connection context, then attach 1544 * it the new socket, and set the (sd_wputdatafunc)() and 1545 * (sd_rputdatafunc)() stream head hooks to intercept and process 1546 * SSL records. 1547 */ 1548 if (ctxmp != NULL) { 1549 /* 1550 * This kssl_ctx_t is already held for us by the transport. 1551 * So, we don't need to do a kssl_hold_ctx() here. 1552 */ 1553 nso->so_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1554 freemsg(ctxmp); 1555 mp->b_cont = NULL; 1556 strsetrwputdatahooks(nvp, strsock_kssl_input, 1557 strsock_kssl_output); 1558 } 1559 #ifdef DEBUG 1560 /* 1561 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1562 * it's inherited early to allow debugging of the accept code itself. 1563 */ 1564 nso->so_options |= so->so_options & SO_DEBUG; 1565 #endif /* DEBUG */ 1566 1567 /* 1568 * Save the SRC address from the T_CONN_IND 1569 * for getpeername to work on AF_UNIX and on transports that do not 1570 * support TI_GETPEERNAME. 1571 * 1572 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1573 * copyin_name(). 1574 */ 1575 if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) { 1576 error = EINVAL; 1577 freemsg(mp); 1578 eprintsoline(so, error); 1579 goto disconnect_vp_unlocked; 1580 } 1581 nso->so_faddr_len = (socklen_t)srclen; 1582 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1583 bcopy(src, nso->so_faddr_sa, srclen); 1584 nso->so_state |= SS_FADDR_VALID; 1585 1586 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1587 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1588 cred_t *cr; 1589 1590 if ((cr = DB_CRED(mp)) != NULL) { 1591 crhold(cr); 1592 nso->so_peercred = cr; 1593 nso->so_cpid = DB_CPID(mp); 1594 } 1595 freemsg(mp); 1596 1597 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1598 sizeof (intptr_t), 0, _ALLOC_INTR); 1599 if (mp == NULL) { 1600 /* 1601 * Accept can not fail with ENOBUFS. 1602 * A signal was caught so return EINTR. 1603 */ 1604 error = EINTR; 1605 eprintsoline(so, error); 1606 goto disconnect_vp_unlocked; 1607 } 1608 conn_res = (struct T_conn_res *)mp->b_rptr; 1609 } else { 1610 nso->so_peercred = DB_CRED(mp); 1611 nso->so_cpid = DB_CPID(mp); 1612 DB_CRED(mp) = NULL; 1613 1614 mp->b_rptr = DB_BASE(mp); 1615 conn_res = (struct T_conn_res *)mp->b_rptr; 1616 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1617 } 1618 1619 /* 1620 * New socket must be bound at least in sockfs and, except for AF_INET, 1621 * (or AF_INET6) it also has to be bound in the transport provider. 1622 * After accepting the connection on nso so_laddr_sa will be set to 1623 * contain the same address as the listener's local address 1624 * so the address we bind to isn't important. 1625 */ 1626 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1627 /*CONSTCOND*/ 1628 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1629 /* 1630 * Optimization for AF_INET{,6} transports 1631 * that can handle a T_CONN_RES without being bound. 1632 */ 1633 mutex_enter(&nso->so_lock); 1634 so_automatic_bind(nso); 1635 mutex_exit(&nso->so_lock); 1636 } else { 1637 /* Perform NULL bind with the transport provider. */ 1638 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) { 1639 ASSERT(error != ENOBUFS); 1640 freemsg(mp); 1641 eprintsoline(nso, error); 1642 goto disconnect_vp_unlocked; 1643 } 1644 } 1645 1646 /* 1647 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1648 * so that any data arriving on the new socket will cause the 1649 * appropriate signals to be delivered for the new socket. 1650 * 1651 * No other thread (except strsock_proto and strsock_misc) 1652 * can access the new socket thus we relax the locking. 1653 */ 1654 nso->so_pgrp = so->so_pgrp; 1655 nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE); 1656 1657 if (nso->so_pgrp != 0) { 1658 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1659 eprintsoline(nso, error); 1660 error = 0; 1661 nso->so_pgrp = 0; 1662 } 1663 } 1664 1665 /* 1666 * Make note of the socket level options. TCP and IP level options 1667 * are already inherited. We could do all this after accept is 1668 * successful but doing it here simplifies code and no harm done 1669 * for error case. 1670 */ 1671 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1672 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1673 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1674 nso->so_sndbuf = so->so_sndbuf; 1675 nso->so_rcvbuf = so->so_rcvbuf; 1676 if (nso->so_options & SO_LINGER) 1677 nso->so_linger = so->so_linger; 1678 1679 if ((so->so_state & SS_DIRECT) != 0) { 1680 mblk_t *ack_mp; 1681 1682 ASSERT(nso->so_state & SS_DIRECT); 1683 ASSERT(opt != NULL); 1684 1685 conn_res->OPT_length = optlen; 1686 conn_res->OPT_offset = MBLKL(mp); 1687 bcopy(&opt, mp->b_wptr, optlen); 1688 mp->b_wptr += optlen; 1689 conn_res->PRIM_type = T_CONN_RES; 1690 conn_res->ACCEPTOR_id = 0; 1691 PRIM_type = T_CONN_RES; 1692 1693 /* Send down the T_CONN_RES on acceptor STREAM */ 1694 error = kstrputmsg(SOTOV(nso), mp, NULL, 1695 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1696 if (error) { 1697 mutex_enter(&so->so_lock); 1698 so_lock_single(so); 1699 eprintsoline(so, error); 1700 goto disconnect_vp; 1701 } 1702 mutex_enter(&nso->so_lock); 1703 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1704 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1705 if (error) { 1706 mutex_exit(&nso->so_lock); 1707 mutex_enter(&so->so_lock); 1708 so_lock_single(so); 1709 eprintsoline(so, error); 1710 goto disconnect_vp; 1711 } 1712 if (nso->so_family == AF_INET) { 1713 sin_t *sin; 1714 1715 sin = (sin_t *)(ack_mp->b_rptr + 1716 sizeof (struct T_ok_ack)); 1717 bcopy(sin, nso->so_laddr_sa, sizeof (sin_t)); 1718 nso->so_laddr_len = sizeof (sin_t); 1719 } else { 1720 sin6_t *sin6; 1721 1722 sin6 = (sin6_t *)(ack_mp->b_rptr + 1723 sizeof (struct T_ok_ack)); 1724 bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t)); 1725 nso->so_laddr_len = sizeof (sin6_t); 1726 } 1727 freemsg(ack_mp); 1728 1729 nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID; 1730 nso->so_priv = opt; 1731 1732 if (so->so_nl7c_flags & NL7C_ENABLED) { 1733 /* 1734 * A NL7C marked listen()er so the new socket 1735 * inherits the listen()er's NL7C state, except 1736 * for NL7C_POLLIN. 1737 * 1738 * Only call NL7C to process the new socket if 1739 * the listen socket allows blocking i/o. 1740 */ 1741 nso->so_nl7c_flags = so->so_nl7c_flags & (~NL7C_POLLIN); 1742 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1743 /* 1744 * Nonblocking accept() just make it 1745 * persist to defer processing to the 1746 * read-side syscall (e.g. read). 1747 */ 1748 nso->so_nl7c_flags |= NL7C_SOPERSIST; 1749 } else if (nl7c_process(nso, B_FALSE)) { 1750 /* 1751 * NL7C has completed processing on the 1752 * socket, close the socket and back to 1753 * the top to await the next T_CONN_IND. 1754 */ 1755 mutex_exit(&nso->so_lock); 1756 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1757 CRED()); 1758 VN_RELE(nvp); 1759 goto again; 1760 } 1761 /* Pass the new socket out */ 1762 } 1763 1764 mutex_exit(&nso->so_lock); 1765 1766 /* 1767 * Pass out new socket. 1768 */ 1769 if (nsop != NULL) 1770 *nsop = nso; 1771 1772 return (0); 1773 } 1774 1775 /* 1776 * Copy local address from listener. 1777 */ 1778 nso->so_laddr_len = so->so_laddr_len; 1779 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1780 bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len); 1781 nso->so_state |= SS_LADDR_VALID; 1782 1783 /* 1784 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1785 * which don't support the FireEngine accept fast-path. It is also 1786 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1787 * again. Neither sockfs nor TCP attempt to find out if some other 1788 * random module has been inserted in between (in which case we 1789 * should follow TLI accept behaviour). We blindly assume the worst 1790 * case and revert back to old behaviour i.e. TCP will not send us 1791 * any option (eager) and the accept should happen on the listener 1792 * queue. Any queued T_conn_ind have already got their options removed 1793 * by so_sock2_stream() when "sockmod" was I_POP'd. 1794 */ 1795 /* 1796 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1797 */ 1798 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1799 #ifdef _ILP32 1800 queue_t *q; 1801 1802 /* 1803 * Find read queue in driver 1804 * Can safely do this since we "own" nso/nvp. 1805 */ 1806 q = strvp2wq(nvp)->q_next; 1807 while (SAMESTR(q)) 1808 q = q->q_next; 1809 q = RD(q); 1810 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1811 #else 1812 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1813 #endif /* _ILP32 */ 1814 conn_res->PRIM_type = O_T_CONN_RES; 1815 PRIM_type = O_T_CONN_RES; 1816 } else { 1817 conn_res->ACCEPTOR_id = nso->so_acceptor_id; 1818 conn_res->PRIM_type = T_CONN_RES; 1819 PRIM_type = T_CONN_RES; 1820 } 1821 conn_res->SEQ_number = SEQ_number; 1822 conn_res->OPT_length = 0; 1823 conn_res->OPT_offset = 0; 1824 1825 mutex_enter(&so->so_lock); 1826 so_lock_single(so); /* Set SOLOCKED */ 1827 mutex_exit(&so->so_lock); 1828 1829 error = kstrputmsg(SOTOV(so), mp, NULL, 1830 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1831 mutex_enter(&so->so_lock); 1832 if (error) { 1833 eprintsoline(so, error); 1834 goto disconnect_vp; 1835 } 1836 error = sowaitokack(so, PRIM_type); 1837 if (error) { 1838 eprintsoline(so, error); 1839 goto disconnect_vp; 1840 } 1841 so_unlock_single(so, SOLOCKED); 1842 mutex_exit(&so->so_lock); 1843 1844 nso->so_state |= SS_ISCONNECTED; 1845 1846 /* 1847 * Pass out new socket. 1848 */ 1849 if (nsop != NULL) 1850 *nsop = nso; 1851 1852 return (0); 1853 1854 1855 eproto_disc_unl: 1856 error = EPROTO; 1857 e_disc_unl: 1858 eprintsoline(so, error); 1859 goto disconnect_unlocked; 1860 1861 pr_disc_vp_unl: 1862 eprintsoline(so, error); 1863 disconnect_vp_unlocked: 1864 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 1865 VN_RELE(nvp); 1866 disconnect_unlocked: 1867 (void) sodisconnect(so, SEQ_number, 0); 1868 return (error); 1869 1870 pr_disc_vp: 1871 eprintsoline(so, error); 1872 disconnect_vp: 1873 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 1874 so_unlock_single(so, SOLOCKED); 1875 mutex_exit(&so->so_lock); 1876 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 1877 VN_RELE(nvp); 1878 return (error); 1879 1880 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 1881 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 1882 ? EOPNOTSUPP : EINVAL; 1883 e_bad: 1884 eprintsoline(so, error); 1885 return (error); 1886 } 1887 1888 /* 1889 * connect a socket. 1890 * 1891 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 1892 * unconnect (by specifying a null address). 1893 */ 1894 int 1895 sotpi_connect(struct sonode *so, 1896 const struct sockaddr *name, 1897 socklen_t namelen, 1898 int fflag, 1899 int flags) 1900 { 1901 struct T_conn_req conn_req; 1902 int error = 0; 1903 mblk_t *mp; 1904 void *src; 1905 socklen_t srclen; 1906 void *addr; 1907 socklen_t addrlen; 1908 boolean_t need_unlock; 1909 1910 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 1911 so, name, namelen, fflag, flags, 1912 pr_state(so->so_state, so->so_mode))); 1913 1914 /* 1915 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 1916 * avoid sleeping for memory with SOLOCKED held. 1917 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen 1918 * + sizeof (struct T_opthdr). 1919 * (the AF_UNIX so_ux_addr_xlate() does not make the address 1920 * exceed so_faddr_maxlen). 1921 */ 1922 mp = soallocproto(sizeof (struct T_conn_req) + 1923 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 1924 if (mp == NULL) { 1925 /* 1926 * Connect can not fail with ENOBUFS. A signal was 1927 * caught so return EINTR. 1928 */ 1929 error = EINTR; 1930 eprintsoline(so, error); 1931 return (error); 1932 } 1933 1934 mutex_enter(&so->so_lock); 1935 /* 1936 * Make sure that there is a preallocated unbind_req 1937 * message before any binding. This message allocated when 1938 * the socket is created but it might be have been 1939 * consumed. 1940 */ 1941 if (so->so_unbind_mp == NULL) { 1942 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 1943 /* NOTE: holding so_lock while sleeping */ 1944 so->so_unbind_mp = 1945 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 1946 if (so->so_unbind_mp == NULL) { 1947 error = EINTR; 1948 need_unlock = B_FALSE; 1949 goto done; 1950 } 1951 } 1952 1953 so_lock_single(so); /* Set SOLOCKED */ 1954 need_unlock = B_TRUE; 1955 1956 /* 1957 * Can't have done a listen before connecting. 1958 */ 1959 if (so->so_state & SS_ACCEPTCONN) { 1960 error = EOPNOTSUPP; 1961 goto done; 1962 } 1963 1964 /* 1965 * Must be bound with the transport 1966 */ 1967 if (!(so->so_state & SS_ISBOUND)) { 1968 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 1969 /*CONSTCOND*/ 1970 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 1971 /* 1972 * Optimization for AF_INET{,6} transports 1973 * that can handle a T_CONN_REQ without being bound. 1974 */ 1975 so_automatic_bind(so); 1976 } else { 1977 error = sotpi_bind(so, NULL, 0, 1978 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 1979 if (error) 1980 goto done; 1981 } 1982 ASSERT(so->so_state & SS_ISBOUND); 1983 flags |= _SOCONNECT_DID_BIND; 1984 } 1985 1986 /* 1987 * Handle a connect to a name parameter of type AF_UNSPEC like a 1988 * connect to a null address. This is the portable method to 1989 * unconnect a socket. 1990 */ 1991 if ((namelen >= sizeof (sa_family_t)) && 1992 (name->sa_family == AF_UNSPEC)) { 1993 name = NULL; 1994 namelen = 0; 1995 } 1996 1997 /* 1998 * Check that we are not already connected. 1999 * A connection-oriented socket cannot be reconnected. 2000 * A connected connection-less socket can be 2001 * - connected to a different address by a subsequent connect 2002 * - "unconnected" by a connect to the NULL address 2003 */ 2004 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2005 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2006 if (so->so_mode & SM_CONNREQUIRED) { 2007 /* Connection-oriented socket */ 2008 error = so->so_state & SS_ISCONNECTED ? 2009 EISCONN : EALREADY; 2010 goto done; 2011 } 2012 /* Connection-less socket */ 2013 if (name == NULL) { 2014 /* 2015 * Remove the connected state and clear SO_DGRAM_ERRIND 2016 * since it was set when the socket was connected. 2017 * If this is UDP also send down a T_DISCON_REQ. 2018 */ 2019 int val; 2020 2021 if ((so->so_family == AF_INET || 2022 so->so_family == AF_INET6) && 2023 (so->so_type == SOCK_DGRAM || 2024 so->so_type == SOCK_RAW) && 2025 /*CONSTCOND*/ 2026 !soconnect_tpi_udp) { 2027 /* XXX What about implicitly unbinding here? */ 2028 error = sodisconnect(so, -1, 2029 _SODISCONNECT_LOCK_HELD); 2030 } else { 2031 so->so_state &= 2032 ~(SS_ISCONNECTED | SS_ISCONNECTING | 2033 SS_FADDR_VALID); 2034 so->so_faddr_len = 0; 2035 } 2036 2037 so_unlock_single(so, SOLOCKED); 2038 mutex_exit(&so->so_lock); 2039 2040 val = 0; 2041 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2042 &val, (t_uscalar_t)sizeof (val)); 2043 2044 mutex_enter(&so->so_lock); 2045 so_lock_single(so); /* Set SOLOCKED */ 2046 goto done; 2047 } 2048 } 2049 ASSERT(so->so_state & SS_ISBOUND); 2050 2051 if (name == NULL || namelen == 0) { 2052 error = EINVAL; 2053 goto done; 2054 } 2055 /* 2056 * Mark the socket if so_faddr_sa represents the transport level 2057 * address. 2058 */ 2059 if (flags & _SOCONNECT_NOXLATE) { 2060 struct sockaddr_ux *soaddr_ux; 2061 2062 ASSERT(so->so_family == AF_UNIX); 2063 if (namelen != sizeof (struct sockaddr_ux)) { 2064 error = EINVAL; 2065 goto done; 2066 } 2067 soaddr_ux = (struct sockaddr_ux *)name; 2068 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2069 namelen = sizeof (soaddr_ux->sou_addr); 2070 so->so_state |= SS_FADDR_NOXLATE; 2071 } 2072 2073 /* 2074 * Length and family checks. 2075 */ 2076 error = so_addr_verify(so, name, namelen); 2077 if (error) 2078 goto bad; 2079 2080 /* 2081 * Save foreign address. Needed for AF_UNIX as well as 2082 * transport providers that do not support TI_GETPEERNAME. 2083 * Also used for cached foreign address for TCP and UDP. 2084 */ 2085 if (namelen > (t_uscalar_t)so->so_faddr_maxlen) { 2086 error = EINVAL; 2087 goto done; 2088 } 2089 so->so_faddr_len = (socklen_t)namelen; 2090 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2091 bcopy(name, so->so_faddr_sa, namelen); 2092 so->so_state |= SS_FADDR_VALID; 2093 2094 if (so->so_family == AF_UNIX) { 2095 if (so->so_state & SS_FADDR_NOXLATE) { 2096 /* 2097 * Already have a transport internal address. Do not 2098 * pass any (transport internal) source address. 2099 */ 2100 addr = so->so_faddr_sa; 2101 addrlen = (t_uscalar_t)so->so_faddr_len; 2102 src = NULL; 2103 srclen = 0; 2104 } else { 2105 /* 2106 * Pass the sockaddr_un source address as an option 2107 * and translate the remote address. 2108 * Holding so_lock thus so_laddr_sa can not change. 2109 */ 2110 src = so->so_laddr_sa; 2111 srclen = (t_uscalar_t)so->so_laddr_len; 2112 dprintso(so, 1, 2113 ("sotpi_connect UNIX: srclen %d, src %p\n", 2114 srclen, src)); 2115 error = so_ux_addr_xlate(so, 2116 so->so_faddr_sa, (socklen_t)so->so_faddr_len, 2117 (flags & _SOCONNECT_XPG4_2), 2118 &addr, &addrlen); 2119 if (error) 2120 goto bad; 2121 } 2122 } else { 2123 addr = so->so_faddr_sa; 2124 addrlen = (t_uscalar_t)so->so_faddr_len; 2125 src = NULL; 2126 srclen = 0; 2127 } 2128 /* 2129 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2130 * option which asks the transport provider to send T_UDERR_IND 2131 * messages. These T_UDERR_IND messages are used to return connected 2132 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2133 * 2134 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2135 * we send down a T_CONN_REQ. This is needed to let the 2136 * transport assign a local address that is consistent with 2137 * the remote address. Applications depend on a getsockname() 2138 * after a connect() to retrieve the "source" IP address for 2139 * the connected socket. Invalidate the cached local address 2140 * to force getsockname() to enquire of the transport. 2141 */ 2142 if (!(so->so_mode & SM_CONNREQUIRED)) { 2143 /* 2144 * Datagram socket. 2145 */ 2146 int32_t val; 2147 2148 so_unlock_single(so, SOLOCKED); 2149 mutex_exit(&so->so_lock); 2150 2151 val = 1; 2152 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2153 &val, (t_uscalar_t)sizeof (val)); 2154 2155 mutex_enter(&so->so_lock); 2156 so_lock_single(so); /* Set SOLOCKED */ 2157 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2158 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2159 soconnect_tpi_udp) { 2160 soisconnected(so); 2161 goto done; 2162 } 2163 /* 2164 * Send down T_CONN_REQ etc. 2165 * Clear fflag to avoid returning EWOULDBLOCK. 2166 */ 2167 fflag = 0; 2168 ASSERT(so->so_family != AF_UNIX); 2169 so->so_state &= ~SS_LADDR_VALID; 2170 } else if (so->so_laddr_len != 0) { 2171 /* 2172 * If the local address or port was "any" then it may be 2173 * changed by the transport as a result of the 2174 * connect. Invalidate the cached version if we have one. 2175 */ 2176 switch (so->so_family) { 2177 case AF_INET: 2178 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t)); 2179 if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr == 2180 INADDR_ANY || 2181 ((sin_t *)so->so_laddr_sa)->sin_port == 0) 2182 so->so_state &= ~SS_LADDR_VALID; 2183 break; 2184 2185 case AF_INET6: 2186 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t)); 2187 if (IN6_IS_ADDR_UNSPECIFIED( 2188 &((sin6_t *)so->so_laddr_sa) ->sin6_addr) || 2189 IN6_IS_ADDR_V4MAPPED_ANY( 2190 &((sin6_t *)so->so_laddr_sa)->sin6_addr) || 2191 ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) 2192 so->so_state &= ~SS_LADDR_VALID; 2193 break; 2194 2195 default: 2196 break; 2197 } 2198 } 2199 2200 /* 2201 * Check for failure of an earlier call 2202 */ 2203 if (so->so_error != 0) 2204 goto so_bad; 2205 2206 /* 2207 * Send down T_CONN_REQ. Message was allocated above. 2208 */ 2209 conn_req.PRIM_type = T_CONN_REQ; 2210 conn_req.DEST_length = addrlen; 2211 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2212 if (srclen == 0) { 2213 conn_req.OPT_length = 0; 2214 conn_req.OPT_offset = 0; 2215 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2216 soappendmsg(mp, addr, addrlen); 2217 } else { 2218 /* 2219 * There is a AF_UNIX sockaddr_un to include as a source 2220 * address option. 2221 */ 2222 struct T_opthdr toh; 2223 2224 toh.level = SOL_SOCKET; 2225 toh.name = SO_SRCADDR; 2226 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2227 toh.status = 0; 2228 conn_req.OPT_length = 2229 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2230 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2231 _TPI_ALIGN_TOPT(addrlen)); 2232 2233 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2234 soappendmsg(mp, addr, addrlen); 2235 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2236 soappendmsg(mp, &toh, sizeof (toh)); 2237 soappendmsg(mp, src, srclen); 2238 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2239 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2240 } 2241 /* 2242 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2243 * in order to have the right state when the T_CONN_CON shows up. 2244 */ 2245 soisconnecting(so); 2246 mutex_exit(&so->so_lock); 2247 2248 #ifdef C2_AUDIT 2249 if (audit_active) 2250 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2251 #endif /* C2_AUDIT */ 2252 2253 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2254 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2255 mp = NULL; 2256 mutex_enter(&so->so_lock); 2257 if (error != 0) 2258 goto bad; 2259 2260 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2261 goto bad; 2262 2263 /* Allow other threads to access the socket */ 2264 so_unlock_single(so, SOLOCKED); 2265 need_unlock = B_FALSE; 2266 2267 /* 2268 * Wait until we get a T_CONN_CON or an error 2269 */ 2270 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2271 so_lock_single(so); /* Set SOLOCKED */ 2272 need_unlock = B_TRUE; 2273 } 2274 2275 done: 2276 freemsg(mp); 2277 switch (error) { 2278 case EINPROGRESS: 2279 case EALREADY: 2280 case EISCONN: 2281 case EINTR: 2282 /* Non-fatal errors */ 2283 so->so_state &= ~SS_LADDR_VALID; 2284 /* FALLTHRU */ 2285 case 0: 2286 break; 2287 2288 case EHOSTUNREACH: 2289 if (flags & _SOCONNECT_XPG4_2) { 2290 /* 2291 * X/Open specification contains a requirement that 2292 * ENETUNREACH be returned but does not require 2293 * EHOSTUNREACH. In order to keep the test suite 2294 * happy we mess with the errno here. 2295 */ 2296 error = ENETUNREACH; 2297 } 2298 /* FALLTHRU */ 2299 2300 default: 2301 ASSERT(need_unlock); 2302 /* 2303 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2304 * and invalidate local-address cache 2305 */ 2306 so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID); 2307 /* A discon_ind might have already unbound us */ 2308 if ((flags & _SOCONNECT_DID_BIND) && 2309 (so->so_state & SS_ISBOUND)) { 2310 int err; 2311 2312 err = sotpi_unbind(so, 0); 2313 /* LINTED - statement has no conseq */ 2314 if (err) { 2315 eprintsoline(so, err); 2316 } 2317 } 2318 break; 2319 } 2320 if (need_unlock) 2321 so_unlock_single(so, SOLOCKED); 2322 mutex_exit(&so->so_lock); 2323 return (error); 2324 2325 so_bad: error = sogeterr(so); 2326 bad: eprintsoline(so, error); 2327 goto done; 2328 } 2329 2330 int 2331 sotpi_shutdown(struct sonode *so, int how) 2332 { 2333 struct T_ordrel_req ordrel_req; 2334 mblk_t *mp; 2335 uint_t old_state, state_change; 2336 int error = 0; 2337 2338 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2339 so, how, pr_state(so->so_state, so->so_mode))); 2340 2341 mutex_enter(&so->so_lock); 2342 so_lock_single(so); /* Set SOLOCKED */ 2343 2344 /* 2345 * SunOS 4.X has no check for datagram sockets. 2346 * 5.X checks that it is connected (ENOTCONN) 2347 * X/Open requires that we check the connected state. 2348 */ 2349 if (!(so->so_state & SS_ISCONNECTED)) { 2350 if (!xnet_skip_checks) { 2351 error = ENOTCONN; 2352 if (xnet_check_print) { 2353 printf("sockfs: X/Open shutdown check " 2354 "caused ENOTCONN\n"); 2355 } 2356 } 2357 goto done; 2358 } 2359 /* 2360 * Record the current state and then perform any state changes. 2361 * Then use the difference between the old and new states to 2362 * determine which messages need to be sent. 2363 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2364 * duplicate calls to shutdown(). 2365 */ 2366 old_state = so->so_state; 2367 2368 switch (how) { 2369 case 0: 2370 socantrcvmore(so); 2371 break; 2372 case 1: 2373 socantsendmore(so); 2374 break; 2375 case 2: 2376 socantsendmore(so); 2377 socantrcvmore(so); 2378 break; 2379 default: 2380 error = EINVAL; 2381 goto done; 2382 } 2383 2384 /* 2385 * Assumes that the SS_CANT* flags are never cleared in the above code. 2386 */ 2387 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2388 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2389 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2390 2391 switch (state_change) { 2392 case 0: 2393 dprintso(so, 1, 2394 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2395 so->so_state)); 2396 goto done; 2397 2398 case SS_CANTRCVMORE: 2399 mutex_exit(&so->so_lock); 2400 strseteof(SOTOV(so), 1); 2401 /* 2402 * strseteof takes care of read side wakeups, 2403 * pollwakeups, and signals. 2404 */ 2405 /* 2406 * Get the read lock before flushing data to avoid problems 2407 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2408 */ 2409 mutex_enter(&so->so_lock); 2410 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2411 mutex_exit(&so->so_lock); 2412 2413 /* Flush read side queue */ 2414 strflushrq(SOTOV(so), FLUSHALL); 2415 2416 mutex_enter(&so->so_lock); 2417 so_unlock_read(so); /* Clear SOREADLOCKED */ 2418 break; 2419 2420 case SS_CANTSENDMORE: 2421 mutex_exit(&so->so_lock); 2422 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2423 mutex_enter(&so->so_lock); 2424 break; 2425 2426 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2427 mutex_exit(&so->so_lock); 2428 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2429 strseteof(SOTOV(so), 1); 2430 /* 2431 * strseteof takes care of read side wakeups, 2432 * pollwakeups, and signals. 2433 */ 2434 /* 2435 * Get the read lock before flushing data to avoid problems 2436 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2437 */ 2438 mutex_enter(&so->so_lock); 2439 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2440 mutex_exit(&so->so_lock); 2441 2442 /* Flush read side queue */ 2443 strflushrq(SOTOV(so), FLUSHALL); 2444 2445 mutex_enter(&so->so_lock); 2446 so_unlock_read(so); /* Clear SOREADLOCKED */ 2447 break; 2448 } 2449 2450 ASSERT(MUTEX_HELD(&so->so_lock)); 2451 2452 /* 2453 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2454 * was set due to this call and the new state has both of them set: 2455 * Send the AF_UNIX close indication 2456 * For T_COTS send a discon_ind 2457 * 2458 * If cantsend was set due to this call: 2459 * For T_COTSORD send an ordrel_ind 2460 * 2461 * Note that for T_CLTS there is no message sent here. 2462 */ 2463 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2464 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2465 /* 2466 * For SunOS 4.X compatibility we tell the other end 2467 * that we are unable to receive at this point. 2468 */ 2469 if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS) 2470 so_unix_close(so); 2471 2472 if (so->so_serv_type == T_COTS) 2473 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2474 } 2475 if ((state_change & SS_CANTSENDMORE) && 2476 (so->so_serv_type == T_COTS_ORD)) { 2477 /* Send an orderly release */ 2478 ordrel_req.PRIM_type = T_ORDREL_REQ; 2479 2480 mutex_exit(&so->so_lock); 2481 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2482 0, _ALLOC_SLEEP); 2483 /* 2484 * Send down the T_ORDREL_REQ even if there is flow control. 2485 * This prevents shutdown from blocking. 2486 * Note that there is no T_OK_ACK for ordrel_req. 2487 */ 2488 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2489 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2490 mutex_enter(&so->so_lock); 2491 if (error) { 2492 eprintsoline(so, error); 2493 goto done; 2494 } 2495 } 2496 2497 done: 2498 so_unlock_single(so, SOLOCKED); 2499 mutex_exit(&so->so_lock); 2500 return (error); 2501 } 2502 2503 /* 2504 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2505 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2506 * that we have closed. 2507 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2508 * T_UNITDATA_REQ containing the same option. 2509 * 2510 * For SOCK_DGRAM half-connections (somebody connected to this end 2511 * but this end is not connect) we don't know where to send any 2512 * SO_UNIX_CLOSE. 2513 * 2514 * We have to ignore stream head errors just in case there has been 2515 * a shutdown(output). 2516 * Ignore any flow control to try to get the message more quickly to the peer. 2517 * While locally ignoring flow control solves the problem when there 2518 * is only the loopback transport on the stream it would not provide 2519 * the correct AF_UNIX socket semantics when one or more modules have 2520 * been pushed. 2521 */ 2522 void 2523 so_unix_close(struct sonode *so) 2524 { 2525 int error; 2526 struct T_opthdr toh; 2527 mblk_t *mp; 2528 2529 ASSERT(MUTEX_HELD(&so->so_lock)); 2530 2531 ASSERT(so->so_family == AF_UNIX); 2532 2533 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2534 (SS_ISCONNECTED|SS_ISBOUND)) 2535 return; 2536 2537 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2538 so, pr_state(so->so_state, so->so_mode))); 2539 2540 toh.level = SOL_SOCKET; 2541 toh.name = SO_UNIX_CLOSE; 2542 2543 /* zero length + header */ 2544 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2545 toh.status = 0; 2546 2547 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2548 struct T_optdata_req tdr; 2549 2550 tdr.PRIM_type = T_OPTDATA_REQ; 2551 tdr.DATA_flag = 0; 2552 2553 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2554 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2555 2556 /* NOTE: holding so_lock while sleeping */ 2557 mp = soallocproto2(&tdr, sizeof (tdr), 2558 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2559 } else { 2560 struct T_unitdata_req tudr; 2561 void *addr; 2562 socklen_t addrlen; 2563 void *src; 2564 socklen_t srclen; 2565 struct T_opthdr toh2; 2566 t_scalar_t size; 2567 2568 /* Connecteded DGRAM socket */ 2569 2570 /* 2571 * For AF_UNIX the destination address is translated to 2572 * an internal name and the source address is passed as 2573 * an option. 2574 */ 2575 /* 2576 * Length and family checks. 2577 */ 2578 error = so_addr_verify(so, so->so_faddr_sa, 2579 (t_uscalar_t)so->so_faddr_len); 2580 if (error) { 2581 eprintsoline(so, error); 2582 return; 2583 } 2584 if (so->so_state & SS_FADDR_NOXLATE) { 2585 /* 2586 * Already have a transport internal address. Do not 2587 * pass any (transport internal) source address. 2588 */ 2589 addr = so->so_faddr_sa; 2590 addrlen = (t_uscalar_t)so->so_faddr_len; 2591 src = NULL; 2592 srclen = 0; 2593 } else { 2594 /* 2595 * Pass the sockaddr_un source address as an option 2596 * and translate the remote address. 2597 * Holding so_lock thus so_laddr_sa can not change. 2598 */ 2599 src = so->so_laddr_sa; 2600 srclen = (socklen_t)so->so_laddr_len; 2601 dprintso(so, 1, 2602 ("so_ux_close: srclen %d, src %p\n", 2603 srclen, src)); 2604 error = so_ux_addr_xlate(so, 2605 so->so_faddr_sa, 2606 (socklen_t)so->so_faddr_len, 0, 2607 &addr, &addrlen); 2608 if (error) { 2609 eprintsoline(so, error); 2610 return; 2611 } 2612 } 2613 tudr.PRIM_type = T_UNITDATA_REQ; 2614 tudr.DEST_length = addrlen; 2615 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2616 if (srclen == 0) { 2617 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2618 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2619 _TPI_ALIGN_TOPT(addrlen)); 2620 2621 size = tudr.OPT_offset + tudr.OPT_length; 2622 /* NOTE: holding so_lock while sleeping */ 2623 mp = soallocproto2(&tudr, sizeof (tudr), 2624 addr, addrlen, size, _ALLOC_SLEEP); 2625 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2626 soappendmsg(mp, &toh, sizeof (toh)); 2627 } else { 2628 /* 2629 * There is a AF_UNIX sockaddr_un to include as a 2630 * source address option. 2631 */ 2632 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2633 _TPI_ALIGN_TOPT(srclen)); 2634 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2635 _TPI_ALIGN_TOPT(addrlen)); 2636 2637 toh2.level = SOL_SOCKET; 2638 toh2.name = SO_SRCADDR; 2639 toh2.len = (t_uscalar_t)(srclen + 2640 sizeof (struct T_opthdr)); 2641 toh2.status = 0; 2642 2643 size = tudr.OPT_offset + tudr.OPT_length; 2644 2645 /* NOTE: holding so_lock while sleeping */ 2646 mp = soallocproto2(&tudr, sizeof (tudr), 2647 addr, addrlen, size, _ALLOC_SLEEP); 2648 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2649 soappendmsg(mp, &toh, sizeof (toh)); 2650 soappendmsg(mp, &toh2, sizeof (toh2)); 2651 soappendmsg(mp, src, srclen); 2652 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2653 } 2654 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2655 } 2656 mutex_exit(&so->so_lock); 2657 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2658 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2659 mutex_enter(&so->so_lock); 2660 } 2661 2662 /* 2663 * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 2664 */ 2665 int 2666 sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) 2667 { 2668 mblk_t *mp, *nmp; 2669 int error; 2670 2671 dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", so, msg, flags)); 2672 2673 /* 2674 * There is never any oob data with addresses or control since 2675 * the T_EXDATA_IND does not carry any options. 2676 */ 2677 msg->msg_controllen = 0; 2678 msg->msg_namelen = 0; 2679 2680 mutex_enter(&so->so_lock); 2681 ASSERT(so_verify_oobstate(so)); 2682 if ((so->so_options & SO_OOBINLINE) || 2683 (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 2684 dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 2685 mutex_exit(&so->so_lock); 2686 return (EINVAL); 2687 } 2688 if (!(so->so_state & SS_HAVEOOBDATA)) { 2689 dprintso(so, 1, ("sorecvoob: no data yet\n")); 2690 mutex_exit(&so->so_lock); 2691 return (EWOULDBLOCK); 2692 } 2693 ASSERT(so->so_oobmsg != NULL); 2694 mp = so->so_oobmsg; 2695 if (flags & MSG_PEEK) { 2696 /* 2697 * Since recv* can not return ENOBUFS we can not use dupmsg. 2698 * Instead we revert to the consolidation private 2699 * allocb_wait plus bcopy. 2700 */ 2701 mblk_t *mp1; 2702 2703 mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 2704 ASSERT(mp1); 2705 2706 while (mp != NULL) { 2707 ssize_t size; 2708 2709 size = MBLKL(mp); 2710 bcopy(mp->b_rptr, mp1->b_wptr, size); 2711 mp1->b_wptr += size; 2712 ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 2713 mp = mp->b_cont; 2714 } 2715 mp = mp1; 2716 } else { 2717 /* 2718 * Update the state indicating that the data has been consumed. 2719 * Keep SS_OOBPEND set until data is consumed past the mark. 2720 */ 2721 so->so_oobmsg = NULL; 2722 so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 2723 } 2724 dprintso(so, 1, 2725 ("after recvoob(%p): counts %d/%d state %s\n", 2726 so, so->so_oobsigcnt, 2727 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2728 ASSERT(so_verify_oobstate(so)); 2729 mutex_exit(&so->so_lock); 2730 2731 error = 0; 2732 nmp = mp; 2733 while (nmp != NULL && uiop->uio_resid > 0) { 2734 ssize_t n = MBLKL(nmp); 2735 2736 n = MIN(n, uiop->uio_resid); 2737 if (n > 0) 2738 error = uiomove(nmp->b_rptr, n, 2739 UIO_READ, uiop); 2740 if (error) 2741 break; 2742 nmp = nmp->b_cont; 2743 } 2744 freemsg(mp); 2745 return (error); 2746 } 2747 2748 /* 2749 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2750 * In addition, the caller typically verifies that there is some 2751 * potential state to clear by checking 2752 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2753 * before calling this routine. 2754 * Note that such a check can be made without holding so_lock since 2755 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2756 * decrements so_oobsigcnt. 2757 * 2758 * When data is read *after* the point that all pending 2759 * oob data has been consumed the oob indication is cleared. 2760 * 2761 * This logic keeps select/poll returning POLLRDBAND and 2762 * SIOCATMARK returning true until we have read past 2763 * the mark. 2764 */ 2765 static void 2766 sorecv_update_oobstate(struct sonode *so) 2767 { 2768 mutex_enter(&so->so_lock); 2769 ASSERT(so_verify_oobstate(so)); 2770 dprintso(so, 1, 2771 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2772 so->so_oobsigcnt, 2773 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2774 if (so->so_oobsigcnt == 0) { 2775 /* No more pending oob indications */ 2776 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2777 freemsg(so->so_oobmsg); 2778 so->so_oobmsg = NULL; 2779 } 2780 ASSERT(so_verify_oobstate(so)); 2781 mutex_exit(&so->so_lock); 2782 } 2783 2784 /* 2785 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2786 */ 2787 static int 2788 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2789 { 2790 int error = 0; 2791 mblk_t *tmp = NULL; 2792 mblk_t *pmp = NULL; 2793 mblk_t *nmp = so->so_nl7c_rcv_mp; 2794 2795 ASSERT(nmp != NULL); 2796 2797 while (nmp != NULL && uiop->uio_resid > 0) { 2798 ssize_t n; 2799 2800 if (DB_TYPE(nmp) == M_DATA) { 2801 /* 2802 * We have some data, uiomove up to resid bytes. 2803 */ 2804 n = MIN(MBLKL(nmp), uiop->uio_resid); 2805 if (n > 0) 2806 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2807 nmp->b_rptr += n; 2808 if (nmp->b_rptr == nmp->b_wptr) { 2809 pmp = nmp; 2810 nmp = nmp->b_cont; 2811 } 2812 if (error) 2813 break; 2814 } else { 2815 /* 2816 * We only handle data, save for caller to handle. 2817 */ 2818 if (pmp != NULL) { 2819 pmp->b_cont = nmp->b_cont; 2820 } 2821 nmp->b_cont = NULL; 2822 if (*rmp == NULL) { 2823 *rmp = nmp; 2824 } else { 2825 tmp->b_cont = nmp; 2826 } 2827 nmp = nmp->b_cont; 2828 tmp = nmp; 2829 } 2830 } 2831 if (pmp != NULL) { 2832 /* Free any mblk_t(s) which we have consumed */ 2833 pmp->b_cont = NULL; 2834 freemsg(so->so_nl7c_rcv_mp); 2835 } 2836 if ((so->so_nl7c_rcv_mp = nmp) == NULL) { 2837 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 2838 if (error == 0) { 2839 rval_t *p = (rval_t *)&so->so_nl7c_rcv_rval; 2840 2841 error = p->r_v.r_v2; 2842 p->r_v.r_v2 = 0; 2843 } 2844 rp->r_vals = so->so_nl7c_rcv_rval; 2845 so->so_nl7c_rcv_rval = 0; 2846 } else { 2847 /* More mblk_t(s) to process so no rval to return */ 2848 rp->r_vals = 0; 2849 } 2850 return (error); 2851 } 2852 2853 /* 2854 * Receive the next message on the queue. 2855 * If msg_controllen is non-zero when called the caller is interested in 2856 * any received control info (options). 2857 * If msg_namelen is non-zero when called the caller is interested in 2858 * any received source address. 2859 * The routine returns with msg_control and msg_name pointing to 2860 * kmem_alloc'ed memory which the caller has to free. 2861 */ 2862 int 2863 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2864 { 2865 union T_primitives *tpr; 2866 mblk_t *mp; 2867 uchar_t pri; 2868 int pflag, opflag; 2869 void *control; 2870 t_uscalar_t controllen; 2871 t_uscalar_t namelen; 2872 int so_state = so->so_state; /* Snapshot */ 2873 ssize_t saved_resid; 2874 int error; 2875 rval_t rval; 2876 int flags; 2877 clock_t timout; 2878 int first; 2879 2880 flags = msg->msg_flags; 2881 msg->msg_flags = 0; 2882 2883 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2884 so, msg, flags, 2885 pr_state(so->so_state, so->so_mode), so->so_error)); 2886 2887 /* 2888 * If we are not connected because we have never been connected 2889 * we return ENOTCONN. If we have been connected (but are no longer 2890 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2891 * the EOF. 2892 * 2893 * An alternative would be to post an ENOTCONN error in stream head 2894 * (read+write) and clear it when we're connected. However, that error 2895 * would cause incorrect poll/select behavior! 2896 */ 2897 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2898 (so->so_mode & SM_CONNREQUIRED)) { 2899 return (ENOTCONN); 2900 } 2901 2902 /* 2903 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2904 * after checking that the read queue is empty) and returns zero. 2905 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2906 * is zero. 2907 */ 2908 2909 if (flags & MSG_OOB) { 2910 /* Check that the transport supports OOB */ 2911 if (!(so->so_mode & SM_EXDATA)) 2912 return (EOPNOTSUPP); 2913 return (sorecvoob(so, msg, uiop, flags)); 2914 } 2915 2916 /* 2917 * Set msg_controllen and msg_namelen to zero here to make it 2918 * simpler in the cases that no control or name is returned. 2919 */ 2920 controllen = msg->msg_controllen; 2921 namelen = msg->msg_namelen; 2922 msg->msg_controllen = 0; 2923 msg->msg_namelen = 0; 2924 2925 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2926 namelen, controllen)); 2927 2928 mutex_enter(&so->so_lock); 2929 /* 2930 * If an NL7C enabled socket and not waiting for write data. 2931 */ 2932 if ((so->so_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 2933 NL7C_ENABLED) { 2934 if (so->so_nl7c_uri) { 2935 /* Close uri processing for a previous request */ 2936 nl7c_close(so); 2937 } 2938 if ((so_state & SS_CANTRCVMORE) && so->so_nl7c_rcv_mp == NULL) { 2939 /* Nothing to process, EOF */ 2940 mutex_exit(&so->so_lock); 2941 return (0); 2942 } else if (so->so_nl7c_flags & NL7C_SOPERSIST) { 2943 /* Persistent NL7C socket, try to process request */ 2944 boolean_t ret; 2945 2946 ret = nl7c_process(so, 2947 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 2948 rval.r_vals = so->so_nl7c_rcv_rval; 2949 error = rval.r_v.r_v2; 2950 if (error) { 2951 /* Error of some sort, return it */ 2952 mutex_exit(&so->so_lock); 2953 return (error); 2954 } 2955 if (so->so_nl7c_flags && 2956 ! (so->so_nl7c_flags & NL7C_WAITWRITE)) { 2957 /* 2958 * Still an NL7C socket and no data 2959 * to pass up to the caller. 2960 */ 2961 mutex_exit(&so->so_lock); 2962 if (ret) { 2963 /* EOF */ 2964 return (0); 2965 } else { 2966 /* Need more data */ 2967 return (EAGAIN); 2968 } 2969 } 2970 } else { 2971 /* 2972 * Not persistent so no further NL7C processing. 2973 */ 2974 so->so_nl7c_flags = 0; 2975 } 2976 } 2977 /* 2978 * Only one reader is allowed at any given time. This is needed 2979 * for T_EXDATA handling and, in the future, MSG_WAITALL. 2980 * 2981 * This is slightly different that BSD behavior in that it fails with 2982 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 2983 * is single-threaded using sblock(), which is dropped while waiting 2984 * for data to appear. The difference shows up e.g. if one 2985 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 2986 * does use nonblocking io and different threads are reading each 2987 * file descriptor. In BSD there would never be an EWOULDBLOCK error 2988 * in this case as long as the read queue doesn't get empty. 2989 * In this implementation the thread using nonblocking io can 2990 * get an EWOULDBLOCK error due to the blocking thread executing 2991 * e.g. in the uiomove in kstrgetmsg. 2992 * This difference is not believed to be significant. 2993 */ 2994 error = so_lock_read_intr(so, uiop->uio_fmode); /* Set SOREADLOCKED */ 2995 mutex_exit(&so->so_lock); 2996 if (error) 2997 return (error); 2998 2999 /* 3000 * Tell kstrgetmsg to not inspect the stream head errors until all 3001 * queued data has been consumed. 3002 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3003 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3004 * 3005 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3006 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3007 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3008 */ 3009 pflag = MSG_ANY | MSG_DELAYERROR; 3010 if (flags & MSG_PEEK) { 3011 pflag |= MSG_IPEEK; 3012 flags &= ~MSG_WAITALL; 3013 } 3014 if (so->so_mode & SM_ATOMIC) 3015 pflag |= MSG_DISCARDTAIL; 3016 3017 if (flags & MSG_DONTWAIT) 3018 timout = 0; 3019 else 3020 timout = -1; 3021 opflag = pflag; 3022 first = 1; 3023 3024 retry: 3025 saved_resid = uiop->uio_resid; 3026 pri = 0; 3027 mp = NULL; 3028 if (so->so_nl7c_rcv_mp != NULL) { 3029 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3030 error = nl7c_sorecv(so, &mp, uiop, &rval); 3031 } else { 3032 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3033 timout, &rval); 3034 } 3035 if (error) { 3036 switch (error) { 3037 case EINTR: 3038 case EWOULDBLOCK: 3039 if (!first) 3040 error = 0; 3041 break; 3042 case ETIME: 3043 /* Returned from kstrgetmsg when timeout expires */ 3044 if (!first) 3045 error = 0; 3046 else 3047 error = EWOULDBLOCK; 3048 break; 3049 default: 3050 eprintsoline(so, error); 3051 break; 3052 } 3053 mutex_enter(&so->so_lock); 3054 so_unlock_read(so); /* Clear SOREADLOCKED */ 3055 mutex_exit(&so->so_lock); 3056 return (error); 3057 } 3058 /* 3059 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3060 * For non-datagrams MOREDATA is used to set MSG_EOR. 3061 */ 3062 ASSERT(!(rval.r_val1 & MORECTL)); 3063 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3064 msg->msg_flags |= MSG_TRUNC; 3065 3066 if (mp == NULL) { 3067 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3068 /* 3069 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3070 * The draft Posix socket spec states that the mark should 3071 * not be cleared when peeking. We follow the latter. 3072 */ 3073 if ((so->so_state & 3074 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3075 (uiop->uio_resid != saved_resid) && 3076 !(flags & MSG_PEEK)) { 3077 sorecv_update_oobstate(so); 3078 } 3079 3080 mutex_enter(&so->so_lock); 3081 /* Set MSG_EOR based on MOREDATA */ 3082 if (!(rval.r_val1 & MOREDATA)) { 3083 if (so->so_state & SS_SAVEDEOR) { 3084 msg->msg_flags |= MSG_EOR; 3085 so->so_state &= ~SS_SAVEDEOR; 3086 } 3087 } 3088 /* 3089 * If some data was received (i.e. not EOF) and the 3090 * read/recv* has not been satisfied wait for some more. 3091 */ 3092 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3093 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3094 mutex_exit(&so->so_lock); 3095 first = 0; 3096 pflag = opflag | MSG_NOMARK; 3097 goto retry; 3098 } 3099 so_unlock_read(so); /* Clear SOREADLOCKED */ 3100 mutex_exit(&so->so_lock); 3101 return (0); 3102 } 3103 3104 /* strsock_proto has already verified length and alignment */ 3105 tpr = (union T_primitives *)mp->b_rptr; 3106 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3107 3108 switch (tpr->type) { 3109 case T_DATA_IND: { 3110 if ((so->so_state & 3111 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3112 (uiop->uio_resid != saved_resid) && 3113 !(flags & MSG_PEEK)) { 3114 sorecv_update_oobstate(so); 3115 } 3116 3117 /* 3118 * Set msg_flags to MSG_EOR based on 3119 * MORE_flag and MOREDATA. 3120 */ 3121 mutex_enter(&so->so_lock); 3122 so->so_state &= ~SS_SAVEDEOR; 3123 if (!(tpr->data_ind.MORE_flag & 1)) { 3124 if (!(rval.r_val1 & MOREDATA)) 3125 msg->msg_flags |= MSG_EOR; 3126 else 3127 so->so_state |= SS_SAVEDEOR; 3128 } 3129 freemsg(mp); 3130 /* 3131 * If some data was received (i.e. not EOF) and the 3132 * read/recv* has not been satisfied wait for some more. 3133 */ 3134 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3135 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3136 mutex_exit(&so->so_lock); 3137 first = 0; 3138 pflag = opflag | MSG_NOMARK; 3139 goto retry; 3140 } 3141 so_unlock_read(so); /* Clear SOREADLOCKED */ 3142 mutex_exit(&so->so_lock); 3143 return (0); 3144 } 3145 case T_UNITDATA_IND: { 3146 void *addr; 3147 t_uscalar_t addrlen; 3148 void *abuf; 3149 t_uscalar_t optlen; 3150 void *opt; 3151 3152 if ((so->so_state & 3153 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3154 (uiop->uio_resid != saved_resid) && 3155 !(flags & MSG_PEEK)) { 3156 sorecv_update_oobstate(so); 3157 } 3158 3159 if (namelen != 0) { 3160 /* Caller wants source address */ 3161 addrlen = tpr->unitdata_ind.SRC_length; 3162 addr = sogetoff(mp, 3163 tpr->unitdata_ind.SRC_offset, 3164 addrlen, 1); 3165 if (addr == NULL) { 3166 freemsg(mp); 3167 error = EPROTO; 3168 eprintsoline(so, error); 3169 goto err; 3170 } 3171 if (so->so_family == AF_UNIX) { 3172 /* 3173 * Can not use the transport level address. 3174 * If there is a SO_SRCADDR option carrying 3175 * the socket level address it will be 3176 * extracted below. 3177 */ 3178 addr = NULL; 3179 addrlen = 0; 3180 } 3181 } 3182 optlen = tpr->unitdata_ind.OPT_length; 3183 if (optlen != 0) { 3184 t_uscalar_t ncontrollen; 3185 3186 /* 3187 * Extract any source address option. 3188 * Determine how large cmsg buffer is needed. 3189 */ 3190 opt = sogetoff(mp, 3191 tpr->unitdata_ind.OPT_offset, 3192 optlen, __TPI_ALIGN_SIZE); 3193 3194 if (opt == NULL) { 3195 freemsg(mp); 3196 error = EPROTO; 3197 eprintsoline(so, error); 3198 goto err; 3199 } 3200 if (so->so_family == AF_UNIX) 3201 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3202 ncontrollen = so_cmsglen(mp, opt, optlen, 3203 !(flags & MSG_XPG4_2)); 3204 if (controllen != 0) 3205 controllen = ncontrollen; 3206 else if (ncontrollen != 0) 3207 msg->msg_flags |= MSG_CTRUNC; 3208 } else { 3209 controllen = 0; 3210 } 3211 3212 if (namelen != 0) { 3213 /* 3214 * Return address to caller. 3215 * Caller handles truncation if length 3216 * exceeds msg_namelen. 3217 * NOTE: AF_UNIX NUL termination is ensured by 3218 * the sender's copyin_name(). 3219 */ 3220 abuf = kmem_alloc(addrlen, KM_SLEEP); 3221 3222 bcopy(addr, abuf, addrlen); 3223 msg->msg_name = abuf; 3224 msg->msg_namelen = addrlen; 3225 } 3226 3227 if (controllen != 0) { 3228 /* 3229 * Return control msg to caller. 3230 * Caller handles truncation if length 3231 * exceeds msg_controllen. 3232 */ 3233 control = kmem_alloc(controllen, KM_SLEEP); 3234 3235 error = so_opt2cmsg(mp, opt, optlen, 3236 !(flags & MSG_XPG4_2), 3237 control, controllen); 3238 if (error) { 3239 freemsg(mp); 3240 if (msg->msg_namelen != 0) 3241 kmem_free(msg->msg_name, 3242 msg->msg_namelen); 3243 kmem_free(control, controllen); 3244 eprintsoline(so, error); 3245 goto err; 3246 } 3247 msg->msg_control = control; 3248 msg->msg_controllen = controllen; 3249 } 3250 3251 freemsg(mp); 3252 mutex_enter(&so->so_lock); 3253 so_unlock_read(so); /* Clear SOREADLOCKED */ 3254 mutex_exit(&so->so_lock); 3255 return (0); 3256 } 3257 case T_OPTDATA_IND: { 3258 struct T_optdata_req *tdr; 3259 void *opt; 3260 t_uscalar_t optlen; 3261 3262 if ((so->so_state & 3263 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3264 (uiop->uio_resid != saved_resid) && 3265 !(flags & MSG_PEEK)) { 3266 sorecv_update_oobstate(so); 3267 } 3268 3269 tdr = (struct T_optdata_req *)mp->b_rptr; 3270 optlen = tdr->OPT_length; 3271 if (optlen != 0) { 3272 t_uscalar_t ncontrollen; 3273 /* 3274 * Determine how large cmsg buffer is needed. 3275 */ 3276 opt = sogetoff(mp, 3277 tpr->optdata_ind.OPT_offset, 3278 optlen, __TPI_ALIGN_SIZE); 3279 3280 if (opt == NULL) { 3281 freemsg(mp); 3282 error = EPROTO; 3283 eprintsoline(so, error); 3284 goto err; 3285 } 3286 3287 ncontrollen = so_cmsglen(mp, opt, optlen, 3288 !(flags & MSG_XPG4_2)); 3289 if (controllen != 0) 3290 controllen = ncontrollen; 3291 else if (ncontrollen != 0) 3292 msg->msg_flags |= MSG_CTRUNC; 3293 } else { 3294 controllen = 0; 3295 } 3296 3297 if (controllen != 0) { 3298 /* 3299 * Return control msg to caller. 3300 * Caller handles truncation if length 3301 * exceeds msg_controllen. 3302 */ 3303 control = kmem_alloc(controllen, KM_SLEEP); 3304 3305 error = so_opt2cmsg(mp, opt, optlen, 3306 !(flags & MSG_XPG4_2), 3307 control, controllen); 3308 if (error) { 3309 freemsg(mp); 3310 kmem_free(control, controllen); 3311 eprintsoline(so, error); 3312 goto err; 3313 } 3314 msg->msg_control = control; 3315 msg->msg_controllen = controllen; 3316 } 3317 3318 /* 3319 * Set msg_flags to MSG_EOR based on 3320 * DATA_flag and MOREDATA. 3321 */ 3322 mutex_enter(&so->so_lock); 3323 so->so_state &= ~SS_SAVEDEOR; 3324 if (!(tpr->data_ind.MORE_flag & 1)) { 3325 if (!(rval.r_val1 & MOREDATA)) 3326 msg->msg_flags |= MSG_EOR; 3327 else 3328 so->so_state |= SS_SAVEDEOR; 3329 } 3330 freemsg(mp); 3331 /* 3332 * If some data was received (i.e. not EOF) and the 3333 * read/recv* has not been satisfied wait for some more. 3334 * Not possible to wait if control info was received. 3335 */ 3336 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3337 controllen == 0 && 3338 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3339 mutex_exit(&so->so_lock); 3340 first = 0; 3341 pflag = opflag | MSG_NOMARK; 3342 goto retry; 3343 } 3344 so_unlock_read(so); /* Clear SOREADLOCKED */ 3345 mutex_exit(&so->so_lock); 3346 return (0); 3347 } 3348 case T_EXDATA_IND: { 3349 dprintso(so, 1, 3350 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3351 "state %s\n", 3352 so->so_oobsigcnt, so->so_oobcnt, 3353 saved_resid - uiop->uio_resid, 3354 pr_state(so->so_state, so->so_mode))); 3355 /* 3356 * kstrgetmsg handles MSGMARK so there is nothing to 3357 * inspect in the T_EXDATA_IND. 3358 * strsock_proto makes the stream head queue the T_EXDATA_IND 3359 * as a separate message with no M_DATA component. Furthermore, 3360 * the stream head does not consolidate M_DATA messages onto 3361 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3362 * remains a message by itself. This is needed since MSGMARK 3363 * marks both the whole message as well as the last byte 3364 * of the message. 3365 */ 3366 freemsg(mp); 3367 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3368 if (flags & MSG_PEEK) { 3369 /* 3370 * Even though we are peeking we consume the 3371 * T_EXDATA_IND thereby moving the mark information 3372 * to SS_RCVATMARK. Then the oob code below will 3373 * retry the peeking kstrgetmsg. 3374 * Note that the stream head read queue is 3375 * never flushed without holding SOREADLOCKED 3376 * thus the T_EXDATA_IND can not disappear 3377 * underneath us. 3378 */ 3379 dprintso(so, 1, 3380 ("sotpi_recvmsg: consume EXDATA_IND " 3381 "counts %d/%d state %s\n", 3382 so->so_oobsigcnt, 3383 so->so_oobcnt, 3384 pr_state(so->so_state, so->so_mode))); 3385 3386 pflag = MSG_ANY | MSG_DELAYERROR; 3387 if (so->so_mode & SM_ATOMIC) 3388 pflag |= MSG_DISCARDTAIL; 3389 3390 pri = 0; 3391 mp = NULL; 3392 3393 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3394 &pri, &pflag, (clock_t)-1, &rval); 3395 ASSERT(uiop->uio_resid == saved_resid); 3396 3397 if (error) { 3398 #ifdef SOCK_DEBUG 3399 if (error != EWOULDBLOCK && error != EINTR) { 3400 eprintsoline(so, error); 3401 } 3402 #endif /* SOCK_DEBUG */ 3403 mutex_enter(&so->so_lock); 3404 so_unlock_read(so); /* Clear SOREADLOCKED */ 3405 mutex_exit(&so->so_lock); 3406 return (error); 3407 } 3408 ASSERT(mp); 3409 tpr = (union T_primitives *)mp->b_rptr; 3410 ASSERT(tpr->type == T_EXDATA_IND); 3411 freemsg(mp); 3412 } /* end "if (flags & MSG_PEEK)" */ 3413 3414 /* 3415 * Decrement the number of queued and pending oob. 3416 * 3417 * SS_RCVATMARK is cleared when we read past a mark. 3418 * SS_HAVEOOBDATA is cleared when we've read past the 3419 * last mark. 3420 * SS_OOBPEND is cleared if we've read past the last 3421 * mark and no (new) SIGURG has been posted. 3422 */ 3423 mutex_enter(&so->so_lock); 3424 ASSERT(so_verify_oobstate(so)); 3425 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 3426 ASSERT(so->so_oobsigcnt > 0); 3427 so->so_oobsigcnt--; 3428 ASSERT(so->so_oobcnt > 0); 3429 so->so_oobcnt--; 3430 /* 3431 * Since the T_EXDATA_IND has been removed from the stream 3432 * head, but we have not read data past the mark, 3433 * sockfs needs to track that the socket is still at the mark. 3434 * 3435 * Since no data was received call kstrgetmsg again to wait 3436 * for data. 3437 */ 3438 so->so_state |= SS_RCVATMARK; 3439 mutex_exit(&so->so_lock); 3440 dprintso(so, 1, 3441 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3442 so->so_oobsigcnt, so->so_oobcnt, 3443 pr_state(so->so_state, so->so_mode))); 3444 pflag = opflag; 3445 goto retry; 3446 } 3447 default: 3448 ASSERT(0); 3449 freemsg(mp); 3450 error = EPROTO; 3451 eprintsoline(so, error); 3452 goto err; 3453 } 3454 /* NOTREACHED */ 3455 err: 3456 mutex_enter(&so->so_lock); 3457 so_unlock_read(so); /* Clear SOREADLOCKED */ 3458 mutex_exit(&so->so_lock); 3459 return (error); 3460 } 3461 3462 /* 3463 * Sending data with options on a datagram socket. 3464 * Assumes caller has verified that SS_ISBOUND etc. are set. 3465 */ 3466 static int 3467 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3468 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3469 { 3470 struct T_unitdata_req tudr; 3471 mblk_t *mp; 3472 int error; 3473 void *addr; 3474 socklen_t addrlen; 3475 void *src; 3476 socklen_t srclen; 3477 ssize_t len; 3478 int size; 3479 struct T_opthdr toh; 3480 struct fdbuf *fdbuf; 3481 t_uscalar_t optlen; 3482 void *fds; 3483 int fdlen; 3484 3485 ASSERT(name && namelen); 3486 ASSERT(control && controllen); 3487 3488 len = uiop->uio_resid; 3489 if (len > (ssize_t)so->so_tidu_size) { 3490 return (EMSGSIZE); 3491 } 3492 3493 /* 3494 * For AF_UNIX the destination address is translated to an internal 3495 * name and the source address is passed as an option. 3496 * Also, file descriptors are passed as file pointers in an 3497 * option. 3498 */ 3499 3500 /* 3501 * Length and family checks. 3502 */ 3503 error = so_addr_verify(so, name, namelen); 3504 if (error) { 3505 eprintsoline(so, error); 3506 return (error); 3507 } 3508 if (so->so_family == AF_UNIX) { 3509 if (so->so_state & SS_FADDR_NOXLATE) { 3510 /* 3511 * Already have a transport internal address. Do not 3512 * pass any (transport internal) source address. 3513 */ 3514 addr = name; 3515 addrlen = namelen; 3516 src = NULL; 3517 srclen = 0; 3518 } else { 3519 /* 3520 * Pass the sockaddr_un source address as an option 3521 * and translate the remote address. 3522 * 3523 * Note that this code does not prevent so_laddr_sa 3524 * from changing while it is being used. Thus 3525 * if an unbind+bind occurs concurrently with this 3526 * send the peer might see a partially new and a 3527 * partially old "from" address. 3528 */ 3529 src = so->so_laddr_sa; 3530 srclen = (t_uscalar_t)so->so_laddr_len; 3531 dprintso(so, 1, 3532 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3533 srclen, src)); 3534 error = so_ux_addr_xlate(so, name, namelen, 3535 (flags & MSG_XPG4_2), 3536 &addr, &addrlen); 3537 if (error) { 3538 eprintsoline(so, error); 3539 return (error); 3540 } 3541 } 3542 } else { 3543 addr = name; 3544 addrlen = namelen; 3545 src = NULL; 3546 srclen = 0; 3547 } 3548 optlen = so_optlen(control, controllen, 3549 !(flags & MSG_XPG4_2)); 3550 tudr.PRIM_type = T_UNITDATA_REQ; 3551 tudr.DEST_length = addrlen; 3552 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3553 if (srclen != 0) 3554 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3555 _TPI_ALIGN_TOPT(srclen)); 3556 else 3557 tudr.OPT_length = optlen; 3558 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3559 _TPI_ALIGN_TOPT(addrlen)); 3560 3561 size = tudr.OPT_offset + tudr.OPT_length; 3562 3563 /* 3564 * File descriptors only when SM_FDPASSING set. 3565 */ 3566 error = so_getfdopt(control, controllen, 3567 !(flags & MSG_XPG4_2), &fds, &fdlen); 3568 if (error) 3569 return (error); 3570 if (fdlen != -1) { 3571 if (!(so->so_mode & SM_FDPASSING)) 3572 return (EOPNOTSUPP); 3573 3574 error = fdbuf_create(fds, fdlen, &fdbuf); 3575 if (error) 3576 return (error); 3577 mp = fdbuf_allocmsg(size, fdbuf); 3578 } else { 3579 mp = soallocproto(size, _ALLOC_INTR); 3580 if (mp == NULL) { 3581 /* 3582 * Caught a signal waiting for memory. 3583 * Let send* return EINTR. 3584 */ 3585 return (EINTR); 3586 } 3587 } 3588 soappendmsg(mp, &tudr, sizeof (tudr)); 3589 soappendmsg(mp, addr, addrlen); 3590 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3591 3592 if (fdlen != -1) { 3593 ASSERT(fdbuf != NULL); 3594 toh.level = SOL_SOCKET; 3595 toh.name = SO_FILEP; 3596 toh.len = fdbuf->fd_size + 3597 (t_uscalar_t)sizeof (struct T_opthdr); 3598 toh.status = 0; 3599 soappendmsg(mp, &toh, sizeof (toh)); 3600 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3601 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3602 } 3603 if (srclen != 0) { 3604 /* 3605 * There is a AF_UNIX sockaddr_un to include as a source 3606 * address option. 3607 */ 3608 toh.level = SOL_SOCKET; 3609 toh.name = SO_SRCADDR; 3610 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3611 toh.status = 0; 3612 soappendmsg(mp, &toh, sizeof (toh)); 3613 soappendmsg(mp, src, srclen); 3614 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3615 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3616 } 3617 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3618 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3619 /* At most 3 bytes left in the message */ 3620 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3621 ASSERT(MBLKL(mp) <= (ssize_t)size); 3622 3623 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3624 #ifdef C2_AUDIT 3625 if (audit_active) 3626 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3627 #endif /* C2_AUDIT */ 3628 3629 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3630 #ifdef SOCK_DEBUG 3631 if (error) { 3632 eprintsoline(so, error); 3633 } 3634 #endif /* SOCK_DEBUG */ 3635 return (error); 3636 } 3637 3638 /* 3639 * Sending data with options on a connected stream socket. 3640 * Assumes caller has verified that SS_ISCONNECTED is set. 3641 */ 3642 static int 3643 sosend_svccmsg(struct sonode *so, 3644 struct uio *uiop, 3645 int more, 3646 void *control, 3647 t_uscalar_t controllen, 3648 int flags) 3649 { 3650 struct T_optdata_req tdr; 3651 mblk_t *mp; 3652 int error; 3653 ssize_t iosize; 3654 int first = 1; 3655 int size; 3656 struct fdbuf *fdbuf; 3657 t_uscalar_t optlen; 3658 void *fds; 3659 int fdlen; 3660 struct T_opthdr toh; 3661 3662 dprintso(so, 1, 3663 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3664 3665 /* 3666 * Has to be bound and connected. However, since no locks are 3667 * held the state could have changed after sotpi_sendmsg checked it 3668 * thus it is not possible to ASSERT on the state. 3669 */ 3670 3671 /* Options on connection-oriented only when SM_OPTDATA set. */ 3672 if (!(so->so_mode & SM_OPTDATA)) 3673 return (EOPNOTSUPP); 3674 3675 do { 3676 /* 3677 * Set the MORE flag if uio_resid does not fit in this 3678 * message or if the caller passed in "more". 3679 * Error for transports with zero tidu_size. 3680 */ 3681 tdr.PRIM_type = T_OPTDATA_REQ; 3682 iosize = so->so_tidu_size; 3683 if (iosize <= 0) 3684 return (EMSGSIZE); 3685 if (uiop->uio_resid > iosize) { 3686 tdr.DATA_flag = 1; 3687 } else { 3688 if (more) 3689 tdr.DATA_flag = 1; 3690 else 3691 tdr.DATA_flag = 0; 3692 iosize = uiop->uio_resid; 3693 } 3694 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3695 tdr.DATA_flag, iosize)); 3696 3697 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3698 tdr.OPT_length = optlen; 3699 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3700 3701 size = (int)sizeof (tdr) + optlen; 3702 /* 3703 * File descriptors only when SM_FDPASSING set. 3704 */ 3705 error = so_getfdopt(control, controllen, 3706 !(flags & MSG_XPG4_2), &fds, &fdlen); 3707 if (error) 3708 return (error); 3709 if (fdlen != -1) { 3710 if (!(so->so_mode & SM_FDPASSING)) 3711 return (EOPNOTSUPP); 3712 3713 error = fdbuf_create(fds, fdlen, &fdbuf); 3714 if (error) 3715 return (error); 3716 mp = fdbuf_allocmsg(size, fdbuf); 3717 } else { 3718 mp = soallocproto(size, _ALLOC_INTR); 3719 if (mp == NULL) { 3720 /* 3721 * Caught a signal waiting for memory. 3722 * Let send* return EINTR. 3723 */ 3724 return (first ? EINTR : 0); 3725 } 3726 } 3727 soappendmsg(mp, &tdr, sizeof (tdr)); 3728 3729 if (fdlen != -1) { 3730 ASSERT(fdbuf != NULL); 3731 toh.level = SOL_SOCKET; 3732 toh.name = SO_FILEP; 3733 toh.len = fdbuf->fd_size + 3734 (t_uscalar_t)sizeof (struct T_opthdr); 3735 toh.status = 0; 3736 soappendmsg(mp, &toh, sizeof (toh)); 3737 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3738 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3739 } 3740 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3741 /* At most 3 bytes left in the message */ 3742 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3743 ASSERT(MBLKL(mp) <= (ssize_t)size); 3744 3745 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3746 3747 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3748 0, MSG_BAND, 0); 3749 if (error) { 3750 if (!first && error == EWOULDBLOCK) 3751 return (0); 3752 eprintsoline(so, error); 3753 return (error); 3754 } 3755 control = NULL; 3756 first = 0; 3757 if (uiop->uio_resid > 0) { 3758 /* 3759 * Recheck for fatal errors. Fail write even though 3760 * some data have been written. This is consistent 3761 * with strwrite semantics and BSD sockets semantics. 3762 */ 3763 if (so->so_state & SS_CANTSENDMORE) { 3764 tsignal(curthread, SIGPIPE); 3765 eprintsoline(so, error); 3766 return (EPIPE); 3767 } 3768 if (so->so_error != 0) { 3769 mutex_enter(&so->so_lock); 3770 error = sogeterr(so); 3771 mutex_exit(&so->so_lock); 3772 if (error != 0) { 3773 eprintsoline(so, error); 3774 return (error); 3775 } 3776 } 3777 } 3778 } while (uiop->uio_resid > 0); 3779 return (0); 3780 } 3781 3782 /* 3783 * Sending data on a datagram socket. 3784 * Assumes caller has verified that SS_ISBOUND etc. are set. 3785 * 3786 * For AF_UNIX the destination address is translated to an internal 3787 * name and the source address is passed as an option. 3788 */ 3789 int 3790 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3791 struct uio *uiop, int flags) 3792 { 3793 struct T_unitdata_req tudr; 3794 mblk_t *mp; 3795 int error; 3796 void *addr; 3797 socklen_t addrlen; 3798 void *src; 3799 socklen_t srclen; 3800 ssize_t len; 3801 3802 ASSERT(name != NULL && namelen != 0); 3803 3804 len = uiop->uio_resid; 3805 if (len > so->so_tidu_size) { 3806 error = EMSGSIZE; 3807 goto done; 3808 } 3809 3810 /* Length and family checks */ 3811 error = so_addr_verify(so, name, namelen); 3812 if (error != 0) 3813 goto done; 3814 3815 if (so->so_state & SS_DIRECT) 3816 return (sodgram_direct(so, name, namelen, uiop, flags)); 3817 3818 if (so->so_family == AF_UNIX) { 3819 if (so->so_state & SS_FADDR_NOXLATE) { 3820 /* 3821 * Already have a transport internal address. Do not 3822 * pass any (transport internal) source address. 3823 */ 3824 addr = name; 3825 addrlen = namelen; 3826 src = NULL; 3827 srclen = 0; 3828 } else { 3829 /* 3830 * Pass the sockaddr_un source address as an option 3831 * and translate the remote address. 3832 * 3833 * Note that this code does not prevent so_laddr_sa 3834 * from changing while it is being used. Thus 3835 * if an unbind+bind occurs concurrently with this 3836 * send the peer might see a partially new and a 3837 * partially old "from" address. 3838 */ 3839 src = so->so_laddr_sa; 3840 srclen = (socklen_t)so->so_laddr_len; 3841 dprintso(so, 1, 3842 ("sosend_dgram UNIX: srclen %d, src %p\n", 3843 srclen, src)); 3844 error = so_ux_addr_xlate(so, name, namelen, 3845 (flags & MSG_XPG4_2), 3846 &addr, &addrlen); 3847 if (error) { 3848 eprintsoline(so, error); 3849 goto done; 3850 } 3851 } 3852 } else { 3853 addr = name; 3854 addrlen = namelen; 3855 src = NULL; 3856 srclen = 0; 3857 } 3858 tudr.PRIM_type = T_UNITDATA_REQ; 3859 tudr.DEST_length = addrlen; 3860 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3861 if (srclen == 0) { 3862 tudr.OPT_length = 0; 3863 tudr.OPT_offset = 0; 3864 3865 mp = soallocproto2(&tudr, sizeof (tudr), 3866 addr, addrlen, 0, _ALLOC_INTR); 3867 if (mp == NULL) { 3868 /* 3869 * Caught a signal waiting for memory. 3870 * Let send* return EINTR. 3871 */ 3872 error = EINTR; 3873 goto done; 3874 } 3875 } else { 3876 /* 3877 * There is a AF_UNIX sockaddr_un to include as a source 3878 * address option. 3879 */ 3880 struct T_opthdr toh; 3881 ssize_t size; 3882 3883 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3884 _TPI_ALIGN_TOPT(srclen)); 3885 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3886 _TPI_ALIGN_TOPT(addrlen)); 3887 3888 toh.level = SOL_SOCKET; 3889 toh.name = SO_SRCADDR; 3890 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3891 toh.status = 0; 3892 3893 size = tudr.OPT_offset + tudr.OPT_length; 3894 mp = soallocproto2(&tudr, sizeof (tudr), 3895 addr, addrlen, size, _ALLOC_INTR); 3896 if (mp == NULL) { 3897 /* 3898 * Caught a signal waiting for memory. 3899 * Let send* return EINTR. 3900 */ 3901 error = EINTR; 3902 goto done; 3903 } 3904 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3905 soappendmsg(mp, &toh, sizeof (toh)); 3906 soappendmsg(mp, src, srclen); 3907 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3908 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3909 } 3910 3911 #ifdef C2_AUDIT 3912 if (audit_active) 3913 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3914 #endif /* C2_AUDIT */ 3915 3916 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3917 done: 3918 #ifdef SOCK_DEBUG 3919 if (error) { 3920 eprintsoline(so, error); 3921 } 3922 #endif /* SOCK_DEBUG */ 3923 return (error); 3924 } 3925 3926 /* 3927 * Sending data on a connected stream socket. 3928 * Assumes caller has verified that SS_ISCONNECTED is set. 3929 */ 3930 int 3931 sosend_svc(struct sonode *so, 3932 struct uio *uiop, 3933 t_scalar_t prim, 3934 int more, 3935 int sflag) 3936 { 3937 struct T_data_req tdr; 3938 mblk_t *mp; 3939 int error; 3940 ssize_t iosize; 3941 int first = 1; 3942 3943 dprintso(so, 1, 3944 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 3945 so, uiop->uio_resid, prim, sflag)); 3946 3947 /* 3948 * Has to be bound and connected. However, since no locks are 3949 * held the state could have changed after sotpi_sendmsg checked it 3950 * thus it is not possible to ASSERT on the state. 3951 */ 3952 3953 do { 3954 /* 3955 * Set the MORE flag if uio_resid does not fit in this 3956 * message or if the caller passed in "more". 3957 * Error for transports with zero tidu_size. 3958 */ 3959 tdr.PRIM_type = prim; 3960 iosize = so->so_tidu_size; 3961 if (iosize <= 0) 3962 return (EMSGSIZE); 3963 if (uiop->uio_resid > iosize) { 3964 tdr.MORE_flag = 1; 3965 } else { 3966 if (more) 3967 tdr.MORE_flag = 1; 3968 else 3969 tdr.MORE_flag = 0; 3970 iosize = uiop->uio_resid; 3971 } 3972 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 3973 prim, tdr.MORE_flag, iosize)); 3974 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 3975 if (mp == NULL) { 3976 /* 3977 * Caught a signal waiting for memory. 3978 * Let send* return EINTR. 3979 */ 3980 if (first) 3981 return (EINTR); 3982 else 3983 return (0); 3984 } 3985 3986 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3987 0, sflag | MSG_BAND, 0); 3988 if (error) { 3989 if (!first && error == EWOULDBLOCK) 3990 return (0); 3991 eprintsoline(so, error); 3992 return (error); 3993 } 3994 first = 0; 3995 if (uiop->uio_resid > 0) { 3996 /* 3997 * Recheck for fatal errors. Fail write even though 3998 * some data have been written. This is consistent 3999 * with strwrite semantics and BSD sockets semantics. 4000 */ 4001 if (so->so_state & SS_CANTSENDMORE) { 4002 tsignal(curthread, SIGPIPE); 4003 eprintsoline(so, error); 4004 return (EPIPE); 4005 } 4006 if (so->so_error != 0) { 4007 mutex_enter(&so->so_lock); 4008 error = sogeterr(so); 4009 mutex_exit(&so->so_lock); 4010 if (error != 0) { 4011 eprintsoline(so, error); 4012 return (error); 4013 } 4014 } 4015 } 4016 } while (uiop->uio_resid > 0); 4017 return (0); 4018 } 4019 4020 /* 4021 * Check the state for errors and call the appropriate send function. 4022 * 4023 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4024 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4025 * after sending the message. 4026 */ 4027 static int 4028 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 4029 { 4030 int so_state; 4031 int so_mode; 4032 int error; 4033 struct sockaddr *name; 4034 t_uscalar_t namelen; 4035 int dontroute; 4036 int flags; 4037 4038 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4039 so, msg, msg->msg_flags, 4040 pr_state(so->so_state, so->so_mode), so->so_error)); 4041 4042 mutex_enter(&so->so_lock); 4043 so_state = so->so_state; 4044 4045 if (so_state & SS_CANTSENDMORE) { 4046 mutex_exit(&so->so_lock); 4047 tsignal(curthread, SIGPIPE); 4048 return (EPIPE); 4049 } 4050 4051 if (so->so_error != 0) { 4052 error = sogeterr(so); 4053 if (error != 0) { 4054 mutex_exit(&so->so_lock); 4055 return (error); 4056 } 4057 } 4058 4059 name = (struct sockaddr *)msg->msg_name; 4060 namelen = msg->msg_namelen; 4061 4062 so_mode = so->so_mode; 4063 4064 if (name == NULL) { 4065 if (!(so_state & SS_ISCONNECTED)) { 4066 mutex_exit(&so->so_lock); 4067 if (so_mode & SM_CONNREQUIRED) 4068 return (ENOTCONN); 4069 else 4070 return (EDESTADDRREQ); 4071 } 4072 if (so_mode & SM_CONNREQUIRED) { 4073 name = NULL; 4074 namelen = 0; 4075 } else { 4076 /* 4077 * Note that this code does not prevent so_faddr_sa 4078 * from changing while it is being used. Thus 4079 * if an "unconnect"+connect occurs concurrently with 4080 * this send the datagram might be delivered to a 4081 * garbaled address. 4082 */ 4083 ASSERT(so->so_faddr_sa); 4084 name = so->so_faddr_sa; 4085 namelen = (t_uscalar_t)so->so_faddr_len; 4086 } 4087 } else { 4088 if (!(so_state & SS_ISCONNECTED) && 4089 (so_mode & SM_CONNREQUIRED)) { 4090 /* Required but not connected */ 4091 mutex_exit(&so->so_lock); 4092 return (ENOTCONN); 4093 } 4094 /* 4095 * Ignore the address on connection-oriented sockets. 4096 * Just like BSD this code does not generate an error for 4097 * TCP (a CONNREQUIRED socket) when sending to an address 4098 * passed in with sendto/sendmsg. Instead the data is 4099 * delivered on the connection as if no address had been 4100 * supplied. 4101 */ 4102 if ((so_state & SS_ISCONNECTED) && 4103 !(so_mode & SM_CONNREQUIRED)) { 4104 mutex_exit(&so->so_lock); 4105 return (EISCONN); 4106 } 4107 if (!(so_state & SS_ISBOUND)) { 4108 so_lock_single(so); /* Set SOLOCKED */ 4109 error = sotpi_bind(so, NULL, 0, 4110 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 4111 so_unlock_single(so, SOLOCKED); 4112 if (error) { 4113 mutex_exit(&so->so_lock); 4114 eprintsoline(so, error); 4115 return (error); 4116 } 4117 } 4118 /* 4119 * Handle delayed datagram errors. These are only queued 4120 * when the application sets SO_DGRAM_ERRIND. 4121 * Return the error if we are sending to the address 4122 * that was returned in the last T_UDERROR_IND. 4123 * If sending to some other address discard the delayed 4124 * error indication. 4125 */ 4126 if (so->so_delayed_error) { 4127 struct T_uderror_ind *tudi; 4128 void *addr; 4129 t_uscalar_t addrlen; 4130 boolean_t match = B_FALSE; 4131 4132 ASSERT(so->so_eaddr_mp); 4133 error = so->so_delayed_error; 4134 so->so_delayed_error = 0; 4135 tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; 4136 addrlen = tudi->DEST_length; 4137 addr = sogetoff(so->so_eaddr_mp, 4138 tudi->DEST_offset, 4139 addrlen, 1); 4140 ASSERT(addr); /* Checked by strsock_proto */ 4141 switch (so->so_family) { 4142 case AF_INET: { 4143 /* Compare just IP address and port */ 4144 sin_t *sin1 = (sin_t *)name; 4145 sin_t *sin2 = (sin_t *)addr; 4146 4147 if (addrlen == sizeof (sin_t) && 4148 namelen == addrlen && 4149 sin1->sin_port == sin2->sin_port && 4150 sin1->sin_addr.s_addr == 4151 sin2->sin_addr.s_addr) 4152 match = B_TRUE; 4153 break; 4154 } 4155 case AF_INET6: { 4156 /* Compare just IP address and port. Not flow */ 4157 sin6_t *sin1 = (sin6_t *)name; 4158 sin6_t *sin2 = (sin6_t *)addr; 4159 4160 if (addrlen == sizeof (sin6_t) && 4161 namelen == addrlen && 4162 sin1->sin6_port == sin2->sin6_port && 4163 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4164 &sin2->sin6_addr)) 4165 match = B_TRUE; 4166 break; 4167 } 4168 case AF_UNIX: 4169 default: 4170 if (namelen == addrlen && 4171 bcmp(name, addr, namelen) == 0) 4172 match = B_TRUE; 4173 } 4174 if (match) { 4175 freemsg(so->so_eaddr_mp); 4176 so->so_eaddr_mp = NULL; 4177 mutex_exit(&so->so_lock); 4178 #ifdef DEBUG 4179 dprintso(so, 0, 4180 ("sockfs delayed error %d for %s\n", 4181 error, 4182 pr_addr(so->so_family, name, namelen))); 4183 #endif /* DEBUG */ 4184 return (error); 4185 } 4186 freemsg(so->so_eaddr_mp); 4187 so->so_eaddr_mp = NULL; 4188 } 4189 } 4190 mutex_exit(&so->so_lock); 4191 4192 flags = msg->msg_flags; 4193 dontroute = 0; 4194 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4195 uint32_t val; 4196 4197 val = 1; 4198 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4199 &val, (t_uscalar_t)sizeof (val)); 4200 if (error) 4201 return (error); 4202 dontroute = 1; 4203 } 4204 4205 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4206 error = EOPNOTSUPP; 4207 goto done; 4208 } 4209 if (msg->msg_controllen != 0) { 4210 if (!(so_mode & SM_CONNREQUIRED)) { 4211 error = sosend_dgramcmsg(so, name, namelen, uiop, 4212 msg->msg_control, msg->msg_controllen, flags); 4213 } else { 4214 if (flags & MSG_OOB) { 4215 /* Can't generate T_EXDATA_REQ with options */ 4216 error = EOPNOTSUPP; 4217 goto done; 4218 } 4219 error = sosend_svccmsg(so, uiop, 4220 !(flags & MSG_EOR), 4221 msg->msg_control, msg->msg_controllen, 4222 flags); 4223 } 4224 goto done; 4225 } 4226 4227 if (!(so_mode & SM_CONNREQUIRED)) { 4228 /* 4229 * If there is no SO_DONTROUTE to turn off return immediately 4230 * from send_dgram. This can allow tail-call optimizations. 4231 */ 4232 if (!dontroute) { 4233 return (sosend_dgram(so, name, namelen, uiop, flags)); 4234 } 4235 error = sosend_dgram(so, name, namelen, uiop, flags); 4236 } else { 4237 t_scalar_t prim; 4238 int sflag; 4239 4240 /* Ignore msg_name in the connected state */ 4241 if (flags & MSG_OOB) { 4242 prim = T_EXDATA_REQ; 4243 /* 4244 * Send down T_EXDATA_REQ even if there is flow 4245 * control for data. 4246 */ 4247 sflag = MSG_IGNFLOW; 4248 } else { 4249 if (so_mode & SM_BYTESTREAM) { 4250 /* Byte stream transport - use write */ 4251 4252 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4253 /* 4254 * If there is no SO_DONTROUTE to turn off, 4255 * SS_DIRECT is on, and there is no flow 4256 * control, we can take the fast path. 4257 */ 4258 if (!dontroute && 4259 (so_state & SS_DIRECT) && 4260 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4261 return (sostream_direct(so, uiop, 4262 NULL, CRED())); 4263 } 4264 error = strwrite(SOTOV(so), uiop, CRED()); 4265 goto done; 4266 } 4267 prim = T_DATA_REQ; 4268 sflag = 0; 4269 } 4270 /* 4271 * If there is no SO_DONTROUTE to turn off return immediately 4272 * from sosend_svc. This can allow tail-call optimizations. 4273 */ 4274 if (!dontroute) 4275 return (sosend_svc(so, uiop, prim, 4276 !(flags & MSG_EOR), sflag)); 4277 error = sosend_svc(so, uiop, prim, 4278 !(flags & MSG_EOR), sflag); 4279 } 4280 ASSERT(dontroute); 4281 done: 4282 if (dontroute) { 4283 uint32_t val; 4284 4285 val = 0; 4286 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4287 &val, (t_uscalar_t)sizeof (val)); 4288 } 4289 return (error); 4290 } 4291 4292 /* 4293 * Sending data on a datagram socket. 4294 * Assumes caller has verified that SS_ISBOUND etc. are set. 4295 */ 4296 /* ARGSUSED */ 4297 static int 4298 sodgram_direct(struct sonode *so, struct sockaddr *name, 4299 socklen_t namelen, struct uio *uiop, int flags) 4300 { 4301 struct T_unitdata_req tudr; 4302 mblk_t *mp; 4303 int error = 0; 4304 void *addr; 4305 socklen_t addrlen; 4306 ssize_t len; 4307 struct stdata *stp = SOTOV(so)->v_stream; 4308 int so_state; 4309 queue_t *udp_wq; 4310 4311 ASSERT(name != NULL && namelen != 0); 4312 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4313 ASSERT(!(so->so_mode & SM_EXDATA)); 4314 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4315 ASSERT(SOTOV(so)->v_type == VSOCK); 4316 4317 /* Caller checked for proper length */ 4318 len = uiop->uio_resid; 4319 ASSERT(len <= so->so_tidu_size); 4320 4321 /* Length and family checks have been done by caller */ 4322 ASSERT(name->sa_family == so->so_family); 4323 ASSERT(so->so_family == AF_INET || 4324 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4325 ASSERT(so->so_family == AF_INET6 || 4326 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4327 4328 addr = name; 4329 addrlen = namelen; 4330 4331 if (stp->sd_sidp != NULL && 4332 (error = straccess(stp, JCWRITE)) != 0) 4333 goto done; 4334 4335 so_state = so->so_state; 4336 4337 /* 4338 * For UDP we don't break up the copyin into smaller pieces 4339 * as in the TCP case. That means if ENOMEM is returned by 4340 * mcopyinuio() then the uio vector has not been modified at 4341 * all and we fallback to either strwrite() or kstrputmsg() 4342 * below. Note also that we never generate priority messages 4343 * from here. 4344 */ 4345 udp_wq = stp->sd_wrq->q_next; 4346 if (canput(udp_wq) && 4347 (mp = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4348 ASSERT(DB_TYPE(mp) == M_DATA); 4349 ASSERT(uiop->uio_resid == 0); 4350 #ifdef C2_AUDIT 4351 if (audit_active) 4352 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4353 #endif /* C2_AUDIT */ 4354 udp_wput_data(udp_wq, mp, addr, addrlen); 4355 return (0); 4356 } 4357 if (error != 0 && error != ENOMEM) 4358 return (error); 4359 4360 /* 4361 * For connected, let strwrite() handle the blocking case. 4362 * Otherwise we fall thru and use kstrputmsg(). 4363 */ 4364 if (so_state & SS_ISCONNECTED) 4365 return (strwrite(SOTOV(so), uiop, CRED())); 4366 4367 tudr.PRIM_type = T_UNITDATA_REQ; 4368 tudr.DEST_length = addrlen; 4369 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4370 tudr.OPT_length = 0; 4371 tudr.OPT_offset = 0; 4372 4373 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, _ALLOC_INTR); 4374 if (mp == NULL) { 4375 /* 4376 * Caught a signal waiting for memory. 4377 * Let send* return EINTR. 4378 */ 4379 error = EINTR; 4380 goto done; 4381 } 4382 4383 #ifdef C2_AUDIT 4384 if (audit_active) 4385 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4386 #endif /* C2_AUDIT */ 4387 4388 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4389 done: 4390 #ifdef SOCK_DEBUG 4391 if (error != 0) { 4392 eprintsoline(so, error); 4393 } 4394 #endif /* SOCK_DEBUG */ 4395 return (error); 4396 } 4397 4398 int 4399 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4400 { 4401 struct stdata *stp = SOTOV(so)->v_stream; 4402 ssize_t iosize, rmax, maxblk; 4403 queue_t *tcp_wq = stp->sd_wrq->q_next; 4404 mblk_t *newmp; 4405 int error = 0, wflag = 0; 4406 4407 ASSERT(so->so_mode & SM_BYTESTREAM); 4408 ASSERT(SOTOV(so)->v_type == VSOCK); 4409 4410 if (stp->sd_sidp != NULL && 4411 (error = straccess(stp, JCWRITE)) != 0) 4412 return (error); 4413 4414 if (uiop == NULL) { 4415 /* 4416 * kstrwritemp() should have checked sd_flag and 4417 * flow-control before coming here. If we end up 4418 * here it means that we can simply pass down the 4419 * data to tcp. 4420 */ 4421 ASSERT(mp != NULL); 4422 if (stp->sd_wputdatafunc != NULL) { 4423 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4424 NULL, NULL, NULL); 4425 if (newmp == NULL) { 4426 /* The caller will free mp */ 4427 return (ECOMM); 4428 } 4429 mp = newmp; 4430 } 4431 tcp_wput(tcp_wq, mp); 4432 return (0); 4433 } 4434 4435 /* Fallback to strwrite() to do proper error handling */ 4436 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4437 return (strwrite(SOTOV(so), uiop, cr)); 4438 4439 rmax = stp->sd_qn_maxpsz; 4440 ASSERT(rmax >= 0 || rmax == INFPSZ); 4441 if (rmax == 0 || uiop->uio_resid <= 0) 4442 return (0); 4443 4444 if (rmax == INFPSZ) 4445 rmax = uiop->uio_resid; 4446 4447 maxblk = stp->sd_maxblk; 4448 4449 for (;;) { 4450 iosize = MIN(uiop->uio_resid, rmax); 4451 4452 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4453 if (mp == NULL) { 4454 /* 4455 * Fallback to strwrite() for ENOMEM; if this 4456 * is our first time in this routine and the uio 4457 * vector has not been modified, we will end up 4458 * calling strwrite() without any flag set. 4459 */ 4460 if (error == ENOMEM) 4461 goto slow_send; 4462 else 4463 return (error); 4464 } 4465 ASSERT(uiop->uio_resid >= 0); 4466 /* 4467 * If mp is non-NULL and ENOMEM is set, it means that 4468 * mcopyinuio() was able to break down some of the user 4469 * data into one or more mblks. Send the partial data 4470 * to tcp and let the rest be handled in strwrite(). 4471 */ 4472 ASSERT(error == 0 || error == ENOMEM); 4473 if (stp->sd_wputdatafunc != NULL) { 4474 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4475 NULL, NULL, NULL); 4476 if (newmp == NULL) { 4477 /* The caller will free mp */ 4478 return (ECOMM); 4479 } 4480 mp = newmp; 4481 } 4482 tcp_wput(tcp_wq, mp); 4483 4484 wflag |= NOINTR; 4485 4486 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4487 ASSERT(error == 0); 4488 break; 4489 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4490 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4491 slow_send: 4492 /* 4493 * We were able to send down partial data using 4494 * the direct call interface, but are now relying 4495 * on strwrite() to handle the non-fastpath cases. 4496 * If the socket is blocking we will sleep in 4497 * strwaitq() until write is permitted, otherwise, 4498 * we will need to return the amount of bytes 4499 * written so far back to the app. This is the 4500 * reason why we pass NOINTR flag to strwrite() 4501 * for non-blocking socket, because we don't want 4502 * to return EAGAIN when portion of the user data 4503 * has actually been sent down. 4504 */ 4505 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4506 } 4507 } 4508 return (0); 4509 } 4510 4511 /* 4512 * Update so_faddr by asking the transport (unless AF_UNIX). 4513 */ 4514 int 4515 sotpi_getpeername(struct sonode *so) 4516 { 4517 struct strbuf strbuf; 4518 int error = 0, res; 4519 void *addr; 4520 t_uscalar_t addrlen; 4521 k_sigset_t smask; 4522 4523 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4524 so, pr_state(so->so_state, so->so_mode))); 4525 4526 mutex_enter(&so->so_lock); 4527 so_lock_single(so); /* Set SOLOCKED */ 4528 if (!(so->so_state & SS_ISCONNECTED)) { 4529 error = ENOTCONN; 4530 goto done; 4531 } 4532 /* Added this check for X/Open */ 4533 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4534 error = EINVAL; 4535 if (xnet_check_print) { 4536 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4537 } 4538 goto done; 4539 } 4540 #ifdef DEBUG 4541 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4542 pr_addr(so->so_family, so->so_faddr_sa, 4543 (t_uscalar_t)so->so_faddr_len))); 4544 #endif /* DEBUG */ 4545 4546 if (so->so_family == AF_UNIX) { 4547 /* Transport has different name space - return local info */ 4548 error = 0; 4549 goto done; 4550 } 4551 4552 ASSERT(so->so_faddr_sa); 4553 /* Allocate local buffer to use with ioctl */ 4554 addrlen = (t_uscalar_t)so->so_faddr_maxlen; 4555 mutex_exit(&so->so_lock); 4556 addr = kmem_alloc(addrlen, KM_SLEEP); 4557 4558 /* 4559 * Issue TI_GETPEERNAME with signals masked. 4560 * Put the result in so_faddr_sa so that getpeername works after 4561 * a shutdown(output). 4562 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4563 * back to the socket. 4564 */ 4565 strbuf.buf = addr; 4566 strbuf.maxlen = addrlen; 4567 strbuf.len = 0; 4568 4569 sigintr(&smask, 0); 4570 res = 0; 4571 ASSERT(CRED()); 4572 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4573 0, K_TO_K, CRED(), &res); 4574 sigunintr(&smask); 4575 4576 mutex_enter(&so->so_lock); 4577 /* 4578 * If there is an error record the error in so_error put don't fail 4579 * the getpeername. Instead fallback on the recorded 4580 * so->so_faddr_sa. 4581 */ 4582 if (error) { 4583 /* 4584 * Various stream head errors can be returned to the ioctl. 4585 * However, it is impossible to determine which ones of 4586 * these are really socket level errors that were incorrectly 4587 * consumed by the ioctl. Thus this code silently ignores the 4588 * error - to code explicitly does not reinstate the error 4589 * using soseterror(). 4590 * Experiments have shows that at least this set of 4591 * errors are reported and should not be reinstated on the 4592 * socket: 4593 * EINVAL E.g. if an I_LINK was in effect when 4594 * getpeername was called. 4595 * EPIPE The ioctl error semantics prefer the write 4596 * side error over the read side error. 4597 * ENOTCONN The transport just got disconnected but 4598 * sockfs had not yet seen the T_DISCON_IND 4599 * when issuing the ioctl. 4600 */ 4601 error = 0; 4602 } else if (res == 0 && strbuf.len > 0 && 4603 (so->so_state & SS_ISCONNECTED)) { 4604 ASSERT(strbuf.len <= (int)so->so_faddr_maxlen); 4605 so->so_faddr_len = (socklen_t)strbuf.len; 4606 bcopy(addr, so->so_faddr_sa, so->so_faddr_len); 4607 so->so_state |= SS_FADDR_VALID; 4608 } 4609 kmem_free(addr, addrlen); 4610 #ifdef DEBUG 4611 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4612 pr_addr(so->so_family, so->so_faddr_sa, 4613 (t_uscalar_t)so->so_faddr_len))); 4614 #endif /* DEBUG */ 4615 done: 4616 so_unlock_single(so, SOLOCKED); 4617 mutex_exit(&so->so_lock); 4618 return (error); 4619 } 4620 4621 /* 4622 * Update so_laddr by asking the transport (unless AF_UNIX). 4623 */ 4624 int 4625 sotpi_getsockname(struct sonode *so) 4626 { 4627 struct strbuf strbuf; 4628 int error = 0, res; 4629 void *addr; 4630 t_uscalar_t addrlen; 4631 k_sigset_t smask; 4632 4633 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4634 so, pr_state(so->so_state, so->so_mode))); 4635 4636 mutex_enter(&so->so_lock); 4637 so_lock_single(so); /* Set SOLOCKED */ 4638 if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) { 4639 /* Return an all zero address except for the family */ 4640 if (so->so_family == AF_INET) 4641 so->so_laddr_len = (socklen_t)sizeof (sin_t); 4642 else if (so->so_family == AF_INET6) 4643 so->so_laddr_len = (socklen_t)sizeof (sin6_t); 4644 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 4645 bzero(so->so_laddr_sa, so->so_laddr_len); 4646 /* 4647 * Can not assume there is a sa_family for all 4648 * protocol families. 4649 */ 4650 if (so->so_family == AF_INET || so->so_family == AF_INET6) 4651 so->so_laddr_sa->sa_family = so->so_family; 4652 } 4653 #ifdef DEBUG 4654 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4655 pr_addr(so->so_family, so->so_laddr_sa, 4656 (t_uscalar_t)so->so_laddr_len))); 4657 #endif /* DEBUG */ 4658 if (so->so_family == AF_UNIX) { 4659 /* Transport has different name space - return local info */ 4660 error = 0; 4661 goto done; 4662 } 4663 if (!(so->so_state & SS_ISBOUND)) { 4664 /* If not bound, then nothing to return. */ 4665 error = 0; 4666 goto done; 4667 } 4668 /* Allocate local buffer to use with ioctl */ 4669 addrlen = (t_uscalar_t)so->so_laddr_maxlen; 4670 mutex_exit(&so->so_lock); 4671 addr = kmem_alloc(addrlen, KM_SLEEP); 4672 4673 /* 4674 * Issue TI_GETMYNAME with signals masked. 4675 * Put the result in so_laddr_sa so that getsockname works after 4676 * a shutdown(output). 4677 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4678 * back to the socket. 4679 */ 4680 strbuf.buf = addr; 4681 strbuf.maxlen = addrlen; 4682 strbuf.len = 0; 4683 4684 sigintr(&smask, 0); 4685 res = 0; 4686 ASSERT(CRED()); 4687 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4688 0, K_TO_K, CRED(), &res); 4689 sigunintr(&smask); 4690 4691 mutex_enter(&so->so_lock); 4692 /* 4693 * If there is an error record the error in so_error put don't fail 4694 * the getsockname. Instead fallback on the recorded 4695 * so->so_laddr_sa. 4696 */ 4697 if (error) { 4698 /* 4699 * Various stream head errors can be returned to the ioctl. 4700 * However, it is impossible to determine which ones of 4701 * these are really socket level errors that were incorrectly 4702 * consumed by the ioctl. Thus this code silently ignores the 4703 * error - to code explicitly does not reinstate the error 4704 * using soseterror(). 4705 * Experiments have shows that at least this set of 4706 * errors are reported and should not be reinstated on the 4707 * socket: 4708 * EINVAL E.g. if an I_LINK was in effect when 4709 * getsockname was called. 4710 * EPIPE The ioctl error semantics prefer the write 4711 * side error over the read side error. 4712 */ 4713 error = 0; 4714 } else if (res == 0 && strbuf.len > 0 && 4715 (so->so_state & SS_ISBOUND)) { 4716 ASSERT(strbuf.len <= (int)so->so_laddr_maxlen); 4717 so->so_laddr_len = (socklen_t)strbuf.len; 4718 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 4719 so->so_state |= SS_LADDR_VALID; 4720 } 4721 kmem_free(addr, addrlen); 4722 #ifdef DEBUG 4723 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4724 pr_addr(so->so_family, so->so_laddr_sa, 4725 (t_uscalar_t)so->so_laddr_len))); 4726 #endif /* DEBUG */ 4727 done: 4728 so_unlock_single(so, SOLOCKED); 4729 mutex_exit(&so->so_lock); 4730 return (error); 4731 } 4732 4733 /* 4734 * Get socket options. For SOL_SOCKET options some options are handled 4735 * by the sockfs while others use the value recorded in the sonode as a 4736 * fallback should the T_SVR4_OPTMGMT_REQ fail. 4737 * 4738 * On the return most *optlenp bytes are copied to optval. 4739 */ 4740 int 4741 sotpi_getsockopt(struct sonode *so, int level, int option_name, 4742 void *optval, socklen_t *optlenp, int flags) 4743 { 4744 struct T_optmgmt_req optmgmt_req; 4745 struct T_optmgmt_ack *optmgmt_ack; 4746 struct opthdr oh; 4747 struct opthdr *opt_res; 4748 mblk_t *mp = NULL; 4749 int error = 0; 4750 void *option = NULL; /* Set if fallback value */ 4751 t_uscalar_t maxlen = *optlenp; 4752 t_uscalar_t len; 4753 uint32_t value; 4754 4755 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 4756 so, level, option_name, optval, optlenp, 4757 pr_state(so->so_state, so->so_mode))); 4758 4759 mutex_enter(&so->so_lock); 4760 so_lock_single(so); /* Set SOLOCKED */ 4761 4762 /* 4763 * Check for SOL_SOCKET options. 4764 * Certain SOL_SOCKET options are returned directly whereas 4765 * others only provide a default (fallback) value should 4766 * the T_SVR4_OPTMGMT_REQ fail. 4767 */ 4768 if (level == SOL_SOCKET) { 4769 /* Check parameters */ 4770 switch (option_name) { 4771 case SO_TYPE: 4772 case SO_ERROR: 4773 case SO_DEBUG: 4774 case SO_ACCEPTCONN: 4775 case SO_REUSEADDR: 4776 case SO_KEEPALIVE: 4777 case SO_DONTROUTE: 4778 case SO_BROADCAST: 4779 case SO_USELOOPBACK: 4780 case SO_OOBINLINE: 4781 case SO_SNDBUF: 4782 case SO_RCVBUF: 4783 #ifdef notyet 4784 case SO_SNDLOWAT: 4785 case SO_RCVLOWAT: 4786 case SO_SNDTIMEO: 4787 case SO_RCVTIMEO: 4788 #endif /* notyet */ 4789 case SO_DGRAM_ERRIND: 4790 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 4791 error = EINVAL; 4792 eprintsoline(so, error); 4793 goto done2; 4794 } 4795 break; 4796 case SO_LINGER: 4797 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 4798 error = EINVAL; 4799 eprintsoline(so, error); 4800 goto done2; 4801 } 4802 break; 4803 } 4804 4805 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 4806 4807 switch (option_name) { 4808 case SO_TYPE: 4809 value = so->so_type; 4810 option = &value; 4811 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4812 4813 case SO_ERROR: 4814 value = sogeterr(so); 4815 option = &value; 4816 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4817 4818 case SO_ACCEPTCONN: 4819 if (so->so_state & SS_ACCEPTCONN) 4820 value = SO_ACCEPTCONN; 4821 else 4822 value = 0; 4823 #ifdef DEBUG 4824 if (value) { 4825 dprintso(so, 1, 4826 ("sotpi_getsockopt: 0x%x is set\n", 4827 option_name)); 4828 } else { 4829 dprintso(so, 1, 4830 ("sotpi_getsockopt: 0x%x not set\n", 4831 option_name)); 4832 } 4833 #endif /* DEBUG */ 4834 option = &value; 4835 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4836 4837 case SO_DEBUG: 4838 case SO_REUSEADDR: 4839 case SO_KEEPALIVE: 4840 case SO_DONTROUTE: 4841 case SO_BROADCAST: 4842 case SO_USELOOPBACK: 4843 case SO_OOBINLINE: 4844 case SO_DGRAM_ERRIND: 4845 value = (so->so_options & option_name); 4846 #ifdef DEBUG 4847 if (value) { 4848 dprintso(so, 1, 4849 ("sotpi_getsockopt: 0x%x is set\n", 4850 option_name)); 4851 } else { 4852 dprintso(so, 1, 4853 ("sotpi_getsockopt: 0x%x not set\n", 4854 option_name)); 4855 } 4856 #endif /* DEBUG */ 4857 option = &value; 4858 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4859 4860 /* 4861 * The following options are only returned by sockfs when the 4862 * T_SVR4_OPTMGMT_REQ fails. 4863 */ 4864 case SO_LINGER: 4865 option = &so->so_linger; 4866 len = (t_uscalar_t)sizeof (struct linger); 4867 break; 4868 case SO_SNDBUF: { 4869 ssize_t lvalue; 4870 4871 /* 4872 * If the option has not been set then get a default 4873 * value from the read queue. This value is 4874 * returned if the transport fails 4875 * the T_SVR4_OPTMGMT_REQ. 4876 */ 4877 lvalue = so->so_sndbuf; 4878 if (lvalue == 0) { 4879 mutex_exit(&so->so_lock); 4880 (void) strqget(strvp2wq(SOTOV(so))->q_next, 4881 QHIWAT, 0, &lvalue); 4882 mutex_enter(&so->so_lock); 4883 dprintso(so, 1, 4884 ("got SO_SNDBUF %ld from q\n", lvalue)); 4885 } 4886 value = (int)lvalue; 4887 option = &value; 4888 len = (t_uscalar_t)sizeof (so->so_sndbuf); 4889 break; 4890 } 4891 case SO_RCVBUF: { 4892 ssize_t lvalue; 4893 4894 /* 4895 * If the option has not been set then get a default 4896 * value from the read queue. This value is 4897 * returned if the transport fails 4898 * the T_SVR4_OPTMGMT_REQ. 4899 * 4900 * XXX If SO_RCVBUF has been set and this is an 4901 * XPG 4.2 application then do not ask the transport 4902 * since the transport might adjust the value and not 4903 * return exactly what was set by the application. 4904 * For non-XPG 4.2 application we return the value 4905 * that the transport is actually using. 4906 */ 4907 lvalue = so->so_rcvbuf; 4908 if (lvalue == 0) { 4909 mutex_exit(&so->so_lock); 4910 (void) strqget(RD(strvp2wq(SOTOV(so))), 4911 QHIWAT, 0, &lvalue); 4912 mutex_enter(&so->so_lock); 4913 dprintso(so, 1, 4914 ("got SO_RCVBUF %ld from q\n", lvalue)); 4915 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 4916 value = (int)lvalue; 4917 option = &value; 4918 goto copyout; /* skip asking transport */ 4919 } 4920 value = (int)lvalue; 4921 option = &value; 4922 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 4923 break; 4924 } 4925 #ifdef notyet 4926 /* 4927 * We do not implement the semantics of these options 4928 * thus we shouldn't implement the options either. 4929 */ 4930 case SO_SNDLOWAT: 4931 value = so->so_sndlowat; 4932 option = &value; 4933 break; 4934 case SO_RCVLOWAT: 4935 value = so->so_rcvlowat; 4936 option = &value; 4937 break; 4938 case SO_SNDTIMEO: 4939 value = so->so_sndtimeo; 4940 option = &value; 4941 break; 4942 case SO_RCVTIMEO: 4943 value = so->so_rcvtimeo; 4944 option = &value; 4945 break; 4946 #endif /* notyet */ 4947 } 4948 } 4949 4950 mutex_exit(&so->so_lock); 4951 4952 /* Send request */ 4953 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 4954 optmgmt_req.MGMT_flags = T_CHECK; 4955 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 4956 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 4957 4958 oh.level = level; 4959 oh.name = option_name; 4960 oh.len = maxlen; 4961 4962 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 4963 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 4964 /* Let option management work in the presence of data flow control */ 4965 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 4966 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 4967 mp = NULL; 4968 mutex_enter(&so->so_lock); 4969 if (error) { 4970 eprintsoline(so, error); 4971 goto done2; 4972 } 4973 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 4974 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 4975 if (error) { 4976 if (option != NULL) { 4977 /* We have a fallback value */ 4978 error = 0; 4979 goto copyout; 4980 } 4981 eprintsoline(so, error); 4982 goto done2; 4983 } 4984 ASSERT(mp); 4985 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 4986 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 4987 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 4988 if (opt_res == NULL) { 4989 if (option != NULL) { 4990 /* We have a fallback value */ 4991 error = 0; 4992 goto copyout; 4993 } 4994 error = EPROTO; 4995 eprintsoline(so, error); 4996 goto done; 4997 } 4998 option = &opt_res[1]; 4999 5000 /* check to ensure that the option is within bounds */ 5001 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5002 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5003 if (option != NULL) { 5004 /* We have a fallback value */ 5005 error = 0; 5006 goto copyout; 5007 } 5008 error = EPROTO; 5009 eprintsoline(so, error); 5010 goto done; 5011 } 5012 5013 len = opt_res->len; 5014 5015 copyout: { 5016 t_uscalar_t size = MIN(len, maxlen); 5017 bcopy(option, optval, size); 5018 bcopy(&size, optlenp, sizeof (size)); 5019 } 5020 done: 5021 freemsg(mp); 5022 done2: 5023 so_unlock_single(so, SOLOCKED); 5024 mutex_exit(&so->so_lock); 5025 return (error); 5026 } 5027 5028 /* 5029 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5030 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5031 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5032 * setsockopt has to work even if the transport does not support the option. 5033 */ 5034 int 5035 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5036 const void *optval, t_uscalar_t optlen) 5037 { 5038 struct T_optmgmt_req optmgmt_req; 5039 struct opthdr oh; 5040 mblk_t *mp; 5041 int error = 0; 5042 boolean_t handled = B_FALSE; 5043 5044 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5045 so, level, option_name, optval, optlen, 5046 pr_state(so->so_state, so->so_mode))); 5047 5048 5049 /* X/Open requires this check */ 5050 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5051 if (xnet_check_print) 5052 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5053 return (EINVAL); 5054 } 5055 5056 /* Caller allocates aligned optval, or passes null */ 5057 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 5058 /* If optval is null optlen is 0, and vice-versa */ 5059 ASSERT(optval != NULL || optlen == 0); 5060 ASSERT(optlen != 0 || optval == NULL); 5061 5062 mutex_enter(&so->so_lock); 5063 so_lock_single(so); /* Set SOLOCKED */ 5064 mutex_exit(&so->so_lock); 5065 5066 /* 5067 * For SOCKET or TCP level options, try to set it here itself 5068 * provided socket has not been popped and we know the tcp 5069 * structure (stored in so_priv). 5070 */ 5071 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5072 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5073 (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) { 5074 tcp_t *tcp = so->so_priv; 5075 boolean_t onoff; 5076 5077 #define intvalue (*(int32_t *)optval) 5078 5079 switch (level) { 5080 case SOL_SOCKET: 5081 switch (option_name) { /* Check length param */ 5082 case SO_DEBUG: 5083 case SO_REUSEADDR: 5084 case SO_DONTROUTE: 5085 case SO_BROADCAST: 5086 case SO_USELOOPBACK: 5087 case SO_OOBINLINE: 5088 case SO_DGRAM_ERRIND: 5089 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5090 error = EINVAL; 5091 eprintsoline(so, error); 5092 mutex_enter(&so->so_lock); 5093 goto done2; 5094 } 5095 ASSERT(optval); 5096 onoff = intvalue != 0; 5097 handled = B_TRUE; 5098 break; 5099 case SO_LINGER: 5100 if (optlen != 5101 (t_uscalar_t)sizeof (struct linger)) { 5102 error = EINVAL; 5103 eprintsoline(so, error); 5104 mutex_enter(&so->so_lock); 5105 goto done2; 5106 } 5107 ASSERT(optval); 5108 handled = B_TRUE; 5109 break; 5110 } 5111 5112 switch (option_name) { /* Do actions */ 5113 case SO_LINGER: { 5114 struct linger *lgr = (struct linger *)optval; 5115 5116 if (lgr->l_onoff) { 5117 tcp->tcp_linger = 1; 5118 tcp->tcp_lingertime = lgr->l_linger; 5119 so->so_linger.l_onoff = SO_LINGER; 5120 so->so_options |= SO_LINGER; 5121 } else { 5122 tcp->tcp_linger = 0; 5123 tcp->tcp_lingertime = 0; 5124 so->so_linger.l_onoff = 0; 5125 so->so_options &= ~SO_LINGER; 5126 } 5127 so->so_linger.l_linger = lgr->l_linger; 5128 handled = B_TRUE; 5129 break; 5130 } 5131 case SO_DEBUG: 5132 tcp->tcp_debug = onoff; 5133 #ifdef SOCK_TEST 5134 if (intvalue & 2) 5135 sock_test_timelimit = 10 * hz; 5136 else 5137 sock_test_timelimit = 0; 5138 5139 if (intvalue & 4) 5140 do_useracc = 0; 5141 else 5142 do_useracc = 1; 5143 #endif /* SOCK_TEST */ 5144 break; 5145 case SO_DONTROUTE: 5146 /* 5147 * SO_DONTROUTE, SO_USELOOPBACK and 5148 * SO_BROADCAST are only of interest to IP. 5149 * We track them here only so 5150 * that we can report their current value. 5151 */ 5152 tcp->tcp_dontroute = onoff; 5153 if (onoff) 5154 so->so_options |= option_name; 5155 else 5156 so->so_options &= ~option_name; 5157 break; 5158 case SO_USELOOPBACK: 5159 tcp->tcp_useloopback = onoff; 5160 if (onoff) 5161 so->so_options |= option_name; 5162 else 5163 so->so_options &= ~option_name; 5164 break; 5165 case SO_BROADCAST: 5166 tcp->tcp_broadcast = onoff; 5167 if (onoff) 5168 so->so_options |= option_name; 5169 else 5170 so->so_options &= ~option_name; 5171 break; 5172 case SO_REUSEADDR: 5173 tcp->tcp_reuseaddr = onoff; 5174 if (onoff) 5175 so->so_options |= option_name; 5176 else 5177 so->so_options &= ~option_name; 5178 break; 5179 case SO_OOBINLINE: 5180 tcp->tcp_oobinline = onoff; 5181 if (onoff) 5182 so->so_options |= option_name; 5183 else 5184 so->so_options &= ~option_name; 5185 break; 5186 case SO_DGRAM_ERRIND: 5187 tcp->tcp_dgram_errind = onoff; 5188 if (onoff) 5189 so->so_options |= option_name; 5190 else 5191 so->so_options &= ~option_name; 5192 break; 5193 } 5194 break; 5195 case IPPROTO_TCP: 5196 switch (option_name) { 5197 case TCP_NODELAY: 5198 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5199 error = EINVAL; 5200 eprintsoline(so, error); 5201 mutex_enter(&so->so_lock); 5202 goto done2; 5203 } 5204 ASSERT(optval); 5205 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5206 handled = B_TRUE; 5207 break; 5208 } 5209 break; 5210 default: 5211 handled = B_FALSE; 5212 break; 5213 } 5214 } 5215 5216 if (handled) { 5217 mutex_enter(&so->so_lock); 5218 goto done2; 5219 } 5220 5221 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5222 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5223 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5224 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5225 5226 oh.level = level; 5227 oh.name = option_name; 5228 oh.len = optlen; 5229 5230 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5231 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 5232 /* Let option management work in the presence of data flow control */ 5233 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5234 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5235 mp = NULL; 5236 mutex_enter(&so->so_lock); 5237 if (error) { 5238 eprintsoline(so, error); 5239 goto done; 5240 } 5241 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5242 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5243 if (error) { 5244 eprintsoline(so, error); 5245 goto done; 5246 } 5247 ASSERT(mp); 5248 /* No need to verify T_optmgmt_ack */ 5249 freemsg(mp); 5250 done: 5251 /* 5252 * Check for SOL_SOCKET options and record their values. 5253 * If we know about a SOL_SOCKET parameter and the transport 5254 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5255 * EPROTO) we let the setsockopt succeed. 5256 */ 5257 if (level == SOL_SOCKET) { 5258 /* Check parameters */ 5259 switch (option_name) { 5260 case SO_DEBUG: 5261 case SO_REUSEADDR: 5262 case SO_KEEPALIVE: 5263 case SO_DONTROUTE: 5264 case SO_BROADCAST: 5265 case SO_USELOOPBACK: 5266 case SO_OOBINLINE: 5267 case SO_SNDBUF: 5268 case SO_RCVBUF: 5269 #ifdef notyet 5270 case SO_SNDLOWAT: 5271 case SO_RCVLOWAT: 5272 case SO_SNDTIMEO: 5273 case SO_RCVTIMEO: 5274 #endif /* notyet */ 5275 case SO_DGRAM_ERRIND: 5276 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5277 error = EINVAL; 5278 eprintsoline(so, error); 5279 goto done2; 5280 } 5281 ASSERT(optval); 5282 handled = B_TRUE; 5283 break; 5284 case SO_LINGER: 5285 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5286 error = EINVAL; 5287 eprintsoline(so, error); 5288 goto done2; 5289 } 5290 ASSERT(optval); 5291 handled = B_TRUE; 5292 break; 5293 } 5294 5295 #define intvalue (*(int32_t *)optval) 5296 5297 switch (option_name) { 5298 case SO_TYPE: 5299 case SO_ERROR: 5300 case SO_ACCEPTCONN: 5301 /* Can't be set */ 5302 error = ENOPROTOOPT; 5303 goto done2; 5304 case SO_LINGER: { 5305 struct linger *l = (struct linger *)optval; 5306 5307 so->so_linger.l_linger = l->l_linger; 5308 if (l->l_onoff) { 5309 so->so_linger.l_onoff = SO_LINGER; 5310 so->so_options |= SO_LINGER; 5311 } else { 5312 so->so_linger.l_onoff = 0; 5313 so->so_options &= ~SO_LINGER; 5314 } 5315 break; 5316 } 5317 5318 case SO_DEBUG: 5319 #ifdef SOCK_TEST 5320 if (intvalue & 2) 5321 sock_test_timelimit = 10 * hz; 5322 else 5323 sock_test_timelimit = 0; 5324 5325 if (intvalue & 4) 5326 do_useracc = 0; 5327 else 5328 do_useracc = 1; 5329 #endif /* SOCK_TEST */ 5330 /* FALLTHRU */ 5331 case SO_REUSEADDR: 5332 case SO_KEEPALIVE: 5333 case SO_DONTROUTE: 5334 case SO_BROADCAST: 5335 case SO_USELOOPBACK: 5336 case SO_OOBINLINE: 5337 case SO_DGRAM_ERRIND: 5338 if (intvalue != 0) { 5339 dprintso(so, 1, 5340 ("sotpi_setsockopt: setting 0x%x\n", 5341 option_name)); 5342 so->so_options |= option_name; 5343 } else { 5344 dprintso(so, 1, 5345 ("sotpi_setsockopt: clearing 0x%x\n", 5346 option_name)); 5347 so->so_options &= ~option_name; 5348 } 5349 break; 5350 /* 5351 * The following options are only returned by us when the 5352 * T_SVR4_OPTMGMT_REQ fails. 5353 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5354 * since the transport might adjust the value and not 5355 * return exactly what was set by the application. 5356 */ 5357 case SO_SNDBUF: 5358 so->so_sndbuf = intvalue; 5359 break; 5360 case SO_RCVBUF: 5361 so->so_rcvbuf = intvalue; 5362 break; 5363 #ifdef notyet 5364 /* 5365 * We do not implement the semantics of these options 5366 * thus we shouldn't implement the options either. 5367 */ 5368 case SO_SNDLOWAT: 5369 so->so_sndlowat = intvalue; 5370 break; 5371 case SO_RCVLOWAT: 5372 so->so_rcvlowat = intvalue; 5373 break; 5374 case SO_SNDTIMEO: 5375 so->so_sndtimeo = intvalue; 5376 break; 5377 case SO_RCVTIMEO: 5378 so->so_rcvtimeo = intvalue; 5379 break; 5380 #endif /* notyet */ 5381 } 5382 #undef intvalue 5383 5384 if (error) { 5385 if ((error == ENOPROTOOPT || error == EPROTO || 5386 error == EINVAL) && handled) { 5387 dprintso(so, 1, 5388 ("setsockopt: ignoring error %d for 0x%x\n", 5389 error, option_name)); 5390 error = 0; 5391 } 5392 } 5393 } 5394 done2: 5395 ret: 5396 so_unlock_single(so, SOLOCKED); 5397 mutex_exit(&so->so_lock); 5398 return (error); 5399 } 5400