1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <netinet/in.h> 62 #include <sys/un.h> 63 #include <sys/strsun.h> 64 65 #include <sys/tiuser.h> 66 #define _SUN_TPI_VERSION 2 67 #include <sys/tihdr.h> 68 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 69 70 #include <c2/audit.h> 71 72 #include <inet/common.h> 73 #include <inet/ip.h> 74 #include <inet/ip6.h> 75 #include <inet/tcp.h> 76 #include <inet/udp_impl.h> 77 78 #include <fs/sockfs/nl7c.h> 79 #include <sys/zone.h> 80 81 /* 82 * Possible failures when memory can't be allocated. The documented behavior: 83 * 84 * 5.5: 4.X: XNET: 85 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 86 * EINTR 87 * (4.X does not document EINTR but returns it) 88 * bind: ENOSR - ENOBUFS/ENOSR 89 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 90 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 91 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 92 * (4.X getpeername and getsockname do not fail in practice) 93 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 94 * listen: - - ENOBUFS 95 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 96 * EINTR 97 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 98 * EINTR 99 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 100 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 101 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 102 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 103 * 104 * Resolution. When allocation fails: 105 * recv: return EINTR 106 * send: return EINTR 107 * connect, accept: EINTR 108 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 109 * socket, socketpair: ENOBUFS 110 * getpeername, getsockname: sleep 111 * getsockopt, setsockopt: sleep 112 */ 113 114 #ifdef SOCK_TEST 115 /* 116 * Variables that make sockfs do something other than the standard TPI 117 * for the AF_INET transports. 118 * 119 * solisten_tpi_tcp: 120 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 121 * the transport is already bound. This is needed to avoid loosing the 122 * port number should listen() do a T_UNBIND_REQ followed by a 123 * O_T_BIND_REQ. 124 * 125 * soconnect_tpi_udp: 126 * UDP and ICMP can handle a T_CONN_REQ. 127 * This is needed to make the sequence of connect(), getsockname() 128 * return the local IP address used to send packets to the connected to 129 * destination. 130 * 131 * soconnect_tpi_tcp: 132 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 133 * Set this to non-zero to send TPI conformant messages to TCP in this 134 * respect. This is a performance optimization. 135 * 136 * soaccept_tpi_tcp: 137 * TCP can handle a T_CONN_REQ without the acceptor being bound. 138 * This is a performance optimization that has been picked up in XTI. 139 * 140 * soaccept_tpi_multioptions: 141 * When inheriting SOL_SOCKET options from the listener to the accepting 142 * socket send them as a single message for AF_INET{,6}. 143 */ 144 int solisten_tpi_tcp = 0; 145 int soconnect_tpi_udp = 0; 146 int soconnect_tpi_tcp = 0; 147 int soaccept_tpi_tcp = 0; 148 int soaccept_tpi_multioptions = 1; 149 #else /* SOCK_TEST */ 150 #define soconnect_tpi_tcp 0 151 #define soconnect_tpi_udp 0 152 #define solisten_tpi_tcp 0 153 #define soaccept_tpi_tcp 0 154 #define soaccept_tpi_multioptions 1 155 #endif /* SOCK_TEST */ 156 157 #ifdef SOCK_TEST 158 extern int do_useracc; 159 extern clock_t sock_test_timelimit; 160 #endif /* SOCK_TEST */ 161 162 /* 163 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 164 * applications working. Turn on this flag to disable these checks. 165 */ 166 int xnet_skip_checks = 0; 167 int xnet_check_print = 0; 168 int xnet_truncate_print = 0; 169 170 extern void sigintr(k_sigset_t *, int); 171 extern void sigunintr(k_sigset_t *); 172 173 extern void *nl7c_lookup_addr(void *, t_uscalar_t); 174 extern void *nl7c_add_addr(void *, t_uscalar_t); 175 extern void nl7c_listener_addr(void *, queue_t *); 176 177 static int sotpi_unbind(struct sonode *, int); 178 179 /* TPI sockfs sonode operations */ 180 static int sotpi_accept(struct sonode *, int, struct sonode **); 181 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 182 int); 183 static int sotpi_connect(struct sonode *, const struct sockaddr *, 184 socklen_t, int, int); 185 static int sotpi_listen(struct sonode *, int); 186 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 187 struct uio *); 188 static int sotpi_shutdown(struct sonode *, int); 189 static int sotpi_getsockname(struct sonode *); 190 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 191 struct uio *, void *, t_uscalar_t, int); 192 static int sodgram_direct(struct sonode *, struct sockaddr *, 193 socklen_t, struct uio *, int); 194 195 sonodeops_t sotpi_sonodeops = { 196 sotpi_accept, /* sop_accept */ 197 sotpi_bind, /* sop_bind */ 198 sotpi_listen, /* sop_listen */ 199 sotpi_connect, /* sop_connect */ 200 sotpi_recvmsg, /* sop_recvmsg */ 201 sotpi_sendmsg, /* sop_sendmsg */ 202 sotpi_getpeername, /* sop_getpeername */ 203 sotpi_getsockname, /* sop_getsockname */ 204 sotpi_shutdown, /* sop_shutdown */ 205 sotpi_getsockopt, /* sop_getsockopt */ 206 sotpi_setsockopt /* sop_setsockopt */ 207 }; 208 209 /* 210 * Common create code for socket and accept. If tso is set the values 211 * from that node is used instead of issuing a T_INFO_REQ. 212 * 213 * Assumes that the caller has a VN_HOLD on accessvp. 214 * The VN_RELE will occur either when sotpi_create() fails or when 215 * the returned sonode is freed. 216 */ 217 struct sonode * 218 sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version, 219 struct sonode *tso, int *errorp) 220 { 221 struct sonode *so; 222 vnode_t *vp; 223 int flags, error; 224 225 ASSERT(accessvp != NULL); 226 vp = makesockvp(accessvp, domain, type, protocol); 227 ASSERT(vp != NULL); 228 so = VTOSO(vp); 229 230 flags = FREAD|FWRITE; 231 232 if ((type == SOCK_STREAM || type == SOCK_DGRAM) && 233 (domain == AF_INET || domain == AF_INET6) && 234 (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 235 protocol == IPPROTO_IP)) { 236 /* Tell tcp or udp that it's talking to sockets */ 237 flags |= SO_SOCKSTR; 238 239 /* 240 * Here we indicate to socktpi_open() our attempt to 241 * make direct calls between sockfs and transport. 242 * The final decision is left to socktpi_open(). 243 */ 244 so->so_state |= SS_DIRECT; 245 246 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 247 if (so->so_type == SOCK_STREAM && tso != NULL) { 248 if (tso->so_state & SS_DIRECT) { 249 /* 250 * Inherit SS_DIRECT from listener and pass 251 * SO_ACCEPTOR open flag to tcp, indicating 252 * that this is an accept fast-path instance. 253 */ 254 flags |= SO_ACCEPTOR; 255 } else { 256 /* 257 * SS_DIRECT is not set on listener, meaning 258 * that the listener has been converted from 259 * a socket to a stream. Ensure that the 260 * acceptor inherits these settings. 261 */ 262 so->so_state &= ~SS_DIRECT; 263 flags &= ~SO_SOCKSTR; 264 } 265 } 266 } 267 268 /* 269 * Tell local transport that it is talking to sockets. 270 */ 271 if (so->so_family == AF_UNIX) { 272 flags |= SO_SOCKSTR; 273 } 274 275 if (error = socktpi_open(&vp, flags, CRED())) { 276 VN_RELE(vp); 277 *errorp = error; 278 return (NULL); 279 } 280 281 if (error = so_strinit(so, tso)) { 282 (void) VOP_CLOSE(vp, 0, 1, 0, CRED()); 283 VN_RELE(vp); 284 *errorp = error; 285 return (NULL); 286 } 287 288 if (version == SOV_DEFAULT) 289 version = so_default_version; 290 291 so->so_version = (short)version; 292 return (so); 293 } 294 295 /* 296 * Bind the socket to an unspecified address in sockfs only. 297 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 298 * required in all cases. 299 */ 300 static void 301 so_automatic_bind(struct sonode *so) 302 { 303 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 304 305 ASSERT(MUTEX_HELD(&so->so_lock)); 306 ASSERT(!(so->so_state & SS_ISBOUND)); 307 ASSERT(so->so_unbind_mp); 308 309 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 310 bzero(so->so_laddr_sa, so->so_laddr_len); 311 so->so_laddr_sa->sa_family = so->so_family; 312 so->so_state |= SS_ISBOUND; 313 } 314 315 316 /* 317 * bind the socket. 318 * 319 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 320 * are passed in we allow rebinding. Note that for backwards compatibility 321 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 322 * Thus the rebinding code is currently not executed. 323 * 324 * The constraints for rebinding are: 325 * - it is a SOCK_DGRAM, or 326 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 327 * and no listen() has been done. 328 * This rebinding code was added based on some language in the XNET book 329 * about not returning EINVAL it the protocol allows rebinding. However, 330 * this language is not present in the Posix socket draft. Thus maybe the 331 * rebinding logic should be deleted from the source. 332 * 333 * A null "name" can be used to unbind the socket if: 334 * - it is a SOCK_DGRAM, or 335 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 336 * and no listen() has been done. 337 */ 338 static int 339 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 340 socklen_t namelen, int backlog, int flags) 341 { 342 struct T_bind_req bind_req; 343 struct T_bind_ack *bind_ack; 344 int error = 0; 345 mblk_t *mp; 346 void *addr; 347 t_uscalar_t addrlen; 348 int unbind_on_err = 1; 349 boolean_t clear_acceptconn_on_err = B_FALSE; 350 boolean_t restore_backlog_on_err = B_FALSE; 351 int save_so_backlog; 352 t_scalar_t PRIM_type = O_T_BIND_REQ; 353 boolean_t tcp_udp_xport; 354 void *nl7c = NULL; 355 356 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 357 so, name, namelen, backlog, flags, 358 pr_state(so->so_state, so->so_mode))); 359 360 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 361 362 if (!(flags & _SOBIND_LOCK_HELD)) { 363 mutex_enter(&so->so_lock); 364 so_lock_single(so); /* Set SOLOCKED */ 365 } else { 366 ASSERT(MUTEX_HELD(&so->so_lock)); 367 ASSERT(so->so_flag & SOLOCKED); 368 } 369 370 /* 371 * Make sure that there is a preallocated unbind_req message 372 * before binding. This message allocated when the socket is 373 * created but it might be have been consumed. 374 */ 375 if (so->so_unbind_mp == NULL) { 376 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 377 /* NOTE: holding so_lock while sleeping */ 378 so->so_unbind_mp = 379 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 380 } 381 382 if (flags & _SOBIND_REBIND) { 383 /* 384 * Called from solisten after doing an sotpi_unbind() or 385 * potentially without the unbind (latter for AF_INET{,6}). 386 */ 387 ASSERT(name == NULL && namelen == 0); 388 389 if (so->so_family == AF_UNIX) { 390 ASSERT(so->so_ux_bound_vp); 391 addr = &so->so_ux_laddr; 392 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 393 dprintso(so, 1, 394 ("sobind rebind UNIX: addrlen %d, addr 0x%p, vp %p\n", 395 addrlen, 396 ((struct so_ux_addr *)addr)->soua_vp, 397 so->so_ux_bound_vp)); 398 } else { 399 addr = so->so_laddr_sa; 400 addrlen = (t_uscalar_t)so->so_laddr_len; 401 } 402 } else if (flags & _SOBIND_UNSPEC) { 403 ASSERT(name == NULL && namelen == 0); 404 405 /* 406 * The caller checked SS_ISBOUND but not necessarily 407 * under so_lock 408 */ 409 if (so->so_state & SS_ISBOUND) { 410 /* No error */ 411 goto done; 412 } 413 414 /* Set an initial local address */ 415 switch (so->so_family) { 416 case AF_UNIX: 417 /* 418 * Use an address with same size as struct sockaddr 419 * just like BSD. 420 */ 421 so->so_laddr_len = 422 (socklen_t)sizeof (struct sockaddr); 423 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 424 bzero(so->so_laddr_sa, so->so_laddr_len); 425 so->so_laddr_sa->sa_family = so->so_family; 426 427 /* 428 * Pass down an address with the implicit bind 429 * magic number and the rest all zeros. 430 * The transport will return a unique address. 431 */ 432 so->so_ux_laddr.soua_vp = NULL; 433 so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 434 addr = &so->so_ux_laddr; 435 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 436 break; 437 438 case AF_INET: 439 case AF_INET6: 440 /* 441 * An unspecified bind in TPI has a NULL address. 442 * Set the address in sockfs to have the sa_family. 443 */ 444 so->so_laddr_len = (so->so_family == AF_INET) ? 445 (socklen_t)sizeof (sin_t) : 446 (socklen_t)sizeof (sin6_t); 447 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 448 bzero(so->so_laddr_sa, so->so_laddr_len); 449 so->so_laddr_sa->sa_family = so->so_family; 450 addr = NULL; 451 addrlen = 0; 452 break; 453 454 default: 455 /* 456 * An unspecified bind in TPI has a NULL address. 457 * Set the address in sockfs to be zero length. 458 * 459 * Can not assume there is a sa_family for all 460 * protocol families. For example, AF_X25 does not 461 * have a family field. 462 */ 463 so->so_laddr_len = 0; /* XXX correct? */ 464 bzero(so->so_laddr_sa, so->so_laddr_len); 465 addr = NULL; 466 addrlen = 0; 467 break; 468 } 469 470 } else { 471 if (so->so_state & SS_ISBOUND) { 472 /* 473 * If it is ok to rebind the socket, first unbind 474 * with the transport. A rebind to the NULL address 475 * is interpreted as an unbind. 476 * Note that a bind to NULL in BSD does unbind the 477 * socket but it fails with EINVAL. 478 * Note that regular sockets set SOV_SOCKBSD i.e. 479 * _SOBIND_SOCKBSD gets set here hence no type of 480 * socket does currently allow rebinding. 481 * 482 * If the name is NULL just do an unbind. 483 */ 484 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 485 name != NULL) { 486 error = EINVAL; 487 unbind_on_err = 0; 488 eprintsoline(so, error); 489 goto done; 490 } 491 if ((so->so_mode & SM_CONNREQUIRED) && 492 (so->so_state & SS_CANTREBIND)) { 493 error = EINVAL; 494 unbind_on_err = 0; 495 eprintsoline(so, error); 496 goto done; 497 } 498 error = sotpi_unbind(so, 0); 499 if (error) { 500 eprintsoline(so, error); 501 goto done; 502 } 503 ASSERT(!(so->so_state & SS_ISBOUND)); 504 if (name == NULL) { 505 so->so_state &= 506 ~(SS_ISCONNECTED|SS_ISCONNECTING); 507 goto done; 508 } 509 } 510 /* X/Open requires this check */ 511 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 512 if (xnet_check_print) { 513 printf("sockfs: X/Open bind state check " 514 "caused EINVAL\n"); 515 } 516 error = EINVAL; 517 goto done; 518 } 519 520 switch (so->so_family) { 521 case AF_UNIX: 522 /* 523 * All AF_UNIX addresses are nul terminated 524 * when copied (copyin_name) in so the minimum 525 * length is 3 bytes. 526 */ 527 if (name == NULL || 528 (ssize_t)namelen <= sizeof (short) + 1) { 529 error = EISDIR; 530 eprintsoline(so, error); 531 goto done; 532 } 533 /* 534 * Verify so_family matches the bound family. 535 * BSD does not check this for AF_UNIX resulting 536 * in funny mknods. 537 */ 538 if (name->sa_family != so->so_family) { 539 error = EAFNOSUPPORT; 540 goto done; 541 } 542 break; 543 case AF_INET: 544 if (name == NULL) { 545 error = EINVAL; 546 eprintsoline(so, error); 547 goto done; 548 } 549 if ((size_t)namelen != sizeof (sin_t)) { 550 error = name->sa_family != so->so_family ? 551 EAFNOSUPPORT : EINVAL; 552 eprintsoline(so, error); 553 goto done; 554 } 555 if ((flags & _SOBIND_XPG4_2) && 556 (name->sa_family != so->so_family)) { 557 /* 558 * This check has to be made for X/Open 559 * sockets however application failures have 560 * been observed when it is applied to 561 * all sockets. 562 */ 563 error = EAFNOSUPPORT; 564 eprintsoline(so, error); 565 goto done; 566 } 567 /* 568 * Force a zero sa_family to match so_family. 569 * 570 * Some programs like inetd(1M) don't set the 571 * family field. Other programs leave 572 * sin_family set to garbage - SunOS 4.X does 573 * not check the family field on a bind. 574 * We use the family field that 575 * was passed in to the socket() call. 576 */ 577 name->sa_family = so->so_family; 578 break; 579 580 case AF_INET6: { 581 #ifdef DEBUG 582 sin6_t *sin6 = (sin6_t *)name; 583 #endif /* DEBUG */ 584 585 if (name == NULL) { 586 error = EINVAL; 587 eprintsoline(so, error); 588 goto done; 589 } 590 if ((size_t)namelen != sizeof (sin6_t)) { 591 error = name->sa_family != so->so_family ? 592 EAFNOSUPPORT : EINVAL; 593 eprintsoline(so, error); 594 goto done; 595 } 596 if (name->sa_family != so->so_family) { 597 /* 598 * With IPv6 we require the family to match 599 * unlike in IPv4. 600 */ 601 error = EAFNOSUPPORT; 602 eprintsoline(so, error); 603 goto done; 604 } 605 #ifdef DEBUG 606 /* 607 * Verify that apps don't forget to clear 608 * sin6_scope_id etc 609 */ 610 if (sin6->sin6_scope_id != 0 && 611 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 612 cmn_err(CE_WARN, 613 "bind with uninitialized sin6_scope_id " 614 "(%d) on socket. Pid = %d\n", 615 (int)sin6->sin6_scope_id, 616 (int)curproc->p_pid); 617 } 618 if (sin6->__sin6_src_id != 0) { 619 cmn_err(CE_WARN, 620 "bind with uninitialized __sin6_src_id " 621 "(%d) on socket. Pid = %d\n", 622 (int)sin6->__sin6_src_id, 623 (int)curproc->p_pid); 624 } 625 #endif /* DEBUG */ 626 break; 627 } 628 default: 629 /* 630 * Don't do any length or sa_family check to allow 631 * non-sockaddr style addresses. 632 */ 633 if (name == NULL) { 634 error = EINVAL; 635 eprintsoline(so, error); 636 goto done; 637 } 638 break; 639 } 640 641 if (namelen > (t_uscalar_t)so->so_laddr_maxlen) { 642 error = ENAMETOOLONG; 643 eprintsoline(so, error); 644 goto done; 645 } 646 /* 647 * Save local address. 648 */ 649 so->so_laddr_len = (socklen_t)namelen; 650 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 651 bcopy(name, so->so_laddr_sa, namelen); 652 653 addr = so->so_laddr_sa; 654 addrlen = (t_uscalar_t)so->so_laddr_len; 655 switch (so->so_family) { 656 case AF_INET6: 657 case AF_INET: 658 break; 659 case AF_UNIX: { 660 struct sockaddr_un *soun = 661 (struct sockaddr_un *)so->so_laddr_sa; 662 struct vnode *vp; 663 struct vattr vattr; 664 665 ASSERT(so->so_ux_bound_vp == NULL); 666 /* 667 * Create vnode for the specified path name. 668 * Keep vnode held with a reference in so_ux_bound_vp. 669 * Use the vnode pointer as the address used in the 670 * bind with the transport. 671 * 672 * Use the same mode as in BSD. In particular this does 673 * not observe the umask. 674 */ 675 /* MAXPATHLEN + soun_family + nul termination */ 676 if (so->so_laddr_len > 677 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 678 error = ENAMETOOLONG; 679 eprintsoline(so, error); 680 goto done; 681 } 682 vattr.va_type = VSOCK; 683 vattr.va_mode = 0777 & ~u.u_cmask; 684 vattr.va_mask = AT_TYPE|AT_MODE; 685 /* NOTE: holding so_lock */ 686 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 687 EXCL, 0, &vp, CRMKNOD, 0, 0); 688 if (error) { 689 if (error == EEXIST) 690 error = EADDRINUSE; 691 eprintsoline(so, error); 692 goto done; 693 } 694 /* 695 * Establish pointer from the underlying filesystem 696 * vnode to the socket node. 697 * so_ux_bound_vp and v_stream->sd_vnode form the 698 * cross-linkage between the underlying filesystem 699 * node and the socket node. 700 */ 701 ASSERT(SOTOV(so)->v_stream); 702 mutex_enter(&vp->v_lock); 703 vp->v_stream = SOTOV(so)->v_stream; 704 so->so_ux_bound_vp = vp; 705 mutex_exit(&vp->v_lock); 706 707 /* 708 * Use the vnode pointer value as a unique address 709 * (together with the magic number to avoid conflicts 710 * with implicit binds) in the transport provider. 711 */ 712 so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp; 713 so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 714 addr = &so->so_ux_laddr; 715 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 716 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 717 addrlen, 718 ((struct so_ux_addr *)addr)->soua_vp)); 719 break; 720 } 721 } /* end switch (so->so_family) */ 722 } 723 724 /* 725 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 726 * the transport can start passing up T_CONN_IND messages 727 * as soon as it receives the bind req and strsock_proto() 728 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 729 */ 730 if (flags & _SOBIND_LISTEN) { 731 if ((so->so_state & SS_ACCEPTCONN) == 0) 732 clear_acceptconn_on_err = B_TRUE; 733 save_so_backlog = so->so_backlog; 734 restore_backlog_on_err = B_TRUE; 735 so->so_state |= SS_ACCEPTCONN; 736 so->so_backlog = backlog; 737 } 738 739 /* 740 * If NL7C addr(s) have been configured check for addr/port match, 741 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 742 * 743 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 744 * family sockets only. If match mark as such. 745 */ 746 if ((nl7c_enabled && addr != NULL && 747 (so->so_family == AF_INET || so->so_family == AF_INET6) && 748 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 749 so->so_nl7c_flags == NL7C_AF_NCA) { 750 /* 751 * NL7C is not supported in non-global zones, 752 * we enforce this restriction here. 753 */ 754 if (so->so_zoneid == GLOBAL_ZONEID) { 755 /* An NL7C socket, mark it */ 756 so->so_nl7c_flags |= NL7C_ENABLED; 757 } else 758 nl7c = NULL; 759 } 760 /* 761 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 762 * for other transports we will send in a O_T_BIND_REQ. 763 */ 764 if (tcp_udp_xport && 765 (so->so_family == AF_INET || so->so_family == AF_INET6)) 766 PRIM_type = T_BIND_REQ; 767 768 bind_req.PRIM_type = PRIM_type; 769 bind_req.ADDR_length = addrlen; 770 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 771 bind_req.CONIND_number = backlog; 772 /* NOTE: holding so_lock while sleeping */ 773 mp = soallocproto2(&bind_req, sizeof (bind_req), 774 addr, addrlen, 0, _ALLOC_SLEEP); 775 so->so_state &= ~SS_LADDR_VALID; 776 /* Done using so_laddr_sa - can drop the lock */ 777 mutex_exit(&so->so_lock); 778 779 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 780 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 781 if (error) { 782 eprintsoline(so, error); 783 mutex_enter(&so->so_lock); 784 goto done; 785 } 786 787 mutex_enter(&so->so_lock); 788 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 789 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 790 if (error) { 791 eprintsoline(so, error); 792 goto done; 793 } 794 ASSERT(mp); 795 /* 796 * Even if some TPI message (e.g. T_DISCON_IND) was received in 797 * strsock_proto while the lock was dropped above, the bind 798 * is allowed to complete. 799 */ 800 801 /* Mark as bound. This will be undone if we detect errors below. */ 802 if (flags & _SOBIND_NOXLATE) { 803 ASSERT(so->so_family == AF_UNIX); 804 so->so_state |= SS_FADDR_NOXLATE; 805 } 806 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 807 so->so_state |= SS_ISBOUND; 808 ASSERT(so->so_unbind_mp); 809 810 /* note that we've already set SS_ACCEPTCONN above */ 811 812 /* 813 * Recompute addrlen - an unspecied bind sent down an 814 * address of length zero but we expect the appropriate length 815 * in return. 816 */ 817 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 818 sizeof (so->so_ux_laddr) : so->so_laddr_len); 819 820 bind_ack = (struct T_bind_ack *)mp->b_rptr; 821 /* 822 * The alignment restriction is really too strict but 823 * we want enough alignment to inspect the fields of 824 * a sockaddr_in. 825 */ 826 addr = sogetoff(mp, bind_ack->ADDR_offset, 827 bind_ack->ADDR_length, 828 __TPI_ALIGN_SIZE); 829 if (addr == NULL) { 830 freemsg(mp); 831 error = EPROTO; 832 eprintsoline(so, error); 833 goto done; 834 } 835 if (!(flags & _SOBIND_UNSPEC)) { 836 /* 837 * Verify that the transport didn't return something we 838 * did not want e.g. an address other than what we asked for. 839 * 840 * NOTE: These checks would go away if/when we switch to 841 * using the new TPI (in which the transport would fail 842 * the request instead of assigning a different address). 843 * 844 * NOTE2: For protocols that we don't know (i.e. any 845 * other than AF_INET6, AF_INET and AF_UNIX), we 846 * cannot know if the transport should be expected to 847 * return the same address as that requested. 848 * 849 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 850 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 851 * 852 * For example, in the case of netatalk it may be 853 * inappropriate for the transport to return the 854 * requested address (as it may have allocated a local 855 * port number in behaviour similar to that of an 856 * AF_INET bind request with a port number of zero). 857 * 858 * Given the definition of O_T_BIND_REQ, where the 859 * transport may bind to an address other than the 860 * requested address, it's not possible to determine 861 * whether a returned address that differs from the 862 * requested address is a reason to fail (because the 863 * requested address was not available) or succeed 864 * (because the transport allocated an appropriate 865 * address and/or port). 866 * 867 * sockfs currently requires that the transport return 868 * the requested address in the T_BIND_ACK, unless 869 * there is code here to allow for any discrepancy. 870 * Such code exists for AF_INET and AF_INET6. 871 * 872 * Netatalk chooses to return the requested address 873 * rather than the (correct) allocated address. This 874 * means that netatalk violates the TPI specification 875 * (and would not function correctly if used from a 876 * TLI application), but it does mean that it works 877 * with sockfs. 878 * 879 * As noted above, using the newer XTI bind primitive 880 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 881 * allow sockfs to be more sure about whether or not 882 * the bind request had succeeded (as transports are 883 * not permitted to bind to a different address than 884 * that requested - they must return failure). 885 * Unfortunately, support for T_BIND_REQ may not be 886 * present in all transport implementations (netatalk, 887 * for example, doesn't have it), making the 888 * transition difficult. 889 */ 890 if (bind_ack->ADDR_length != addrlen) { 891 /* Assumes that the requested address was in use */ 892 freemsg(mp); 893 error = EADDRINUSE; 894 eprintsoline(so, error); 895 goto done; 896 } 897 898 switch (so->so_family) { 899 case AF_INET6: 900 case AF_INET: { 901 sin_t *rname, *aname; 902 903 rname = (sin_t *)addr; 904 aname = (sin_t *)so->so_laddr_sa; 905 906 /* 907 * Take advantage of the alignment 908 * of sin_port and sin6_port which fall 909 * in the same place in their data structures. 910 * Just use sin_port for either address family. 911 * 912 * This may become a problem if (heaven forbid) 913 * there's a separate ipv6port_reserved... :-P 914 * 915 * Binding to port 0 has the semantics of letting 916 * the transport bind to any port. 917 * 918 * If the transport is TCP or UDP since we had sent 919 * a T_BIND_REQ we would not get a port other than 920 * what we asked for. 921 */ 922 if (tcp_udp_xport) { 923 /* 924 * Pick up the new port number if we bound to 925 * port 0. 926 */ 927 if (aname->sin_port == 0) 928 aname->sin_port = rname->sin_port; 929 so->so_state |= SS_LADDR_VALID; 930 break; 931 } 932 if (aname->sin_port != 0 && 933 aname->sin_port != rname->sin_port) { 934 freemsg(mp); 935 error = EADDRINUSE; 936 eprintsoline(so, error); 937 goto done; 938 } 939 /* 940 * Pick up the new port number if we bound to port 0. 941 */ 942 aname->sin_port = rname->sin_port; 943 944 /* 945 * Unfortunately, addresses aren't _quite_ the same. 946 */ 947 if (so->so_family == AF_INET) { 948 if (aname->sin_addr.s_addr != 949 rname->sin_addr.s_addr) { 950 freemsg(mp); 951 error = EADDRNOTAVAIL; 952 eprintsoline(so, error); 953 goto done; 954 } 955 } else { 956 sin6_t *rname6 = (sin6_t *)rname; 957 sin6_t *aname6 = (sin6_t *)aname; 958 959 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 960 &rname6->sin6_addr)) { 961 freemsg(mp); 962 error = EADDRNOTAVAIL; 963 eprintsoline(so, error); 964 goto done; 965 } 966 } 967 break; 968 } 969 case AF_UNIX: 970 if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) { 971 freemsg(mp); 972 error = EADDRINUSE; 973 eprintsoline(so, error); 974 eprintso(so, 975 ("addrlen %d, addr 0x%x, vp %p\n", 976 addrlen, *((int *)addr), 977 so->so_ux_bound_vp)); 978 goto done; 979 } 980 so->so_state |= SS_LADDR_VALID; 981 break; 982 default: 983 /* 984 * NOTE: This assumes that addresses can be 985 * byte-compared for equivalence. 986 */ 987 if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) { 988 freemsg(mp); 989 error = EADDRINUSE; 990 eprintsoline(so, error); 991 goto done; 992 } 993 /* 994 * Don't mark SS_LADDR_VALID, as we cannot be 995 * sure that the returned address is the real 996 * bound address when talking to an unknown 997 * transport. 998 */ 999 break; 1000 } 1001 } else { 1002 /* 1003 * Save for returned address for getsockname. 1004 * Needed for unspecific bind unless transport supports 1005 * the TI_GETMYNAME ioctl. 1006 * Do this for AF_INET{,6} even though they do, as 1007 * caching info here is much better performance than 1008 * a TPI/STREAMS trip to the transport for getsockname. 1009 * Any which can't for some reason _must_ _not_ set 1010 * LADDR_VALID here for the caching version of getsockname 1011 * to not break; 1012 */ 1013 switch (so->so_family) { 1014 case AF_UNIX: 1015 /* 1016 * Record the address bound with the transport 1017 * for use by socketpair. 1018 */ 1019 bcopy(addr, &so->so_ux_laddr, addrlen); 1020 so->so_state |= SS_LADDR_VALID; 1021 break; 1022 case AF_INET: 1023 case AF_INET6: 1024 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 1025 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 1026 so->so_state |= SS_LADDR_VALID; 1027 break; 1028 default: 1029 /* 1030 * Don't mark SS_LADDR_VALID, as we cannot be 1031 * sure that the returned address is the real 1032 * bound address when talking to an unknown 1033 * transport. 1034 */ 1035 break; 1036 } 1037 } 1038 1039 if (nl7c == NULL && (so->so_nl7c_flags & NL7C_AF_NCA) && 1040 (so->so_nl7c_flags & NL7C_ENABLED)) { 1041 /* 1042 * Was an AF_NCA bind() so add it to the addr list for 1043 * reporting purposes. 1044 */ 1045 nl7c = nl7c_add_addr(addr, addrlen); 1046 } 1047 if (nl7c != NULL) { 1048 nl7c_listener_addr(nl7c, strvp2wq(SOTOV(so))); 1049 } 1050 1051 freemsg(mp); 1052 1053 done: 1054 if (error) { 1055 /* reset state & backlog to values held on entry */ 1056 if (clear_acceptconn_on_err == B_TRUE) 1057 so->so_state &= ~SS_ACCEPTCONN; 1058 if (restore_backlog_on_err == B_TRUE) 1059 so->so_backlog = save_so_backlog; 1060 1061 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1062 int err; 1063 1064 err = sotpi_unbind(so, 0); 1065 /* LINTED - statement has no consequent: if */ 1066 if (err) { 1067 eprintsoline(so, error); 1068 } else { 1069 ASSERT(!(so->so_state & SS_ISBOUND)); 1070 } 1071 } 1072 } 1073 if (!(flags & _SOBIND_LOCK_HELD)) { 1074 so_unlock_single(so, SOLOCKED); 1075 mutex_exit(&so->so_lock); 1076 } else { 1077 /* If the caller held the lock don't release it here */ 1078 ASSERT(MUTEX_HELD(&so->so_lock)); 1079 ASSERT(so->so_flag & SOLOCKED); 1080 } 1081 return (error); 1082 } 1083 1084 /* bind the socket */ 1085 static int 1086 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1087 int flags) 1088 { 1089 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1090 return (sotpi_bindlisten(so, name, namelen, 0, flags)); 1091 1092 flags &= ~_SOBIND_SOCKETPAIR; 1093 return (sotpi_bindlisten(so, name, namelen, 1, flags)); 1094 } 1095 1096 /* 1097 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1098 * address, or when listen needs to unbind and bind. 1099 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1100 * so that a sobind can pick them up. 1101 */ 1102 static int 1103 sotpi_unbind(struct sonode *so, int flags) 1104 { 1105 struct T_unbind_req unbind_req; 1106 int error = 0; 1107 mblk_t *mp; 1108 1109 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1110 so, flags, pr_state(so->so_state, so->so_mode))); 1111 1112 ASSERT(MUTEX_HELD(&so->so_lock)); 1113 ASSERT(so->so_flag & SOLOCKED); 1114 1115 if (!(so->so_state & SS_ISBOUND)) { 1116 error = EINVAL; 1117 eprintsoline(so, error); 1118 goto done; 1119 } 1120 1121 mutex_exit(&so->so_lock); 1122 1123 /* 1124 * Flush the read and write side (except stream head read queue) 1125 * and send down T_UNBIND_REQ. 1126 */ 1127 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1128 1129 unbind_req.PRIM_type = T_UNBIND_REQ; 1130 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1131 0, _ALLOC_SLEEP); 1132 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1133 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1134 mutex_enter(&so->so_lock); 1135 if (error) { 1136 eprintsoline(so, error); 1137 goto done; 1138 } 1139 1140 error = sowaitokack(so, T_UNBIND_REQ); 1141 if (error) { 1142 eprintsoline(so, error); 1143 goto done; 1144 } 1145 1146 /* 1147 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1148 * strsock_proto while the lock was dropped above, the unbind 1149 * is allowed to complete. 1150 */ 1151 if (!(flags & _SOUNBIND_REBIND)) { 1152 /* 1153 * Clear out bound address. 1154 */ 1155 vnode_t *vp; 1156 1157 if ((vp = so->so_ux_bound_vp) != NULL) { 1158 ASSERT(vp->v_stream); 1159 so->so_ux_bound_vp = NULL; 1160 vn_rele_stream(vp); 1161 } 1162 /* Clear out address */ 1163 so->so_laddr_len = 0; 1164 } 1165 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1166 done: 1167 /* If the caller held the lock don't release it here */ 1168 ASSERT(MUTEX_HELD(&so->so_lock)); 1169 ASSERT(so->so_flag & SOLOCKED); 1170 1171 return (error); 1172 } 1173 1174 /* 1175 * listen on the socket. 1176 * For TPI conforming transports this has to first unbind with the transport 1177 * and then bind again using the new backlog. 1178 */ 1179 int 1180 sotpi_listen(struct sonode *so, int backlog) 1181 { 1182 int error = 0; 1183 1184 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1185 so, backlog, pr_state(so->so_state, so->so_mode))); 1186 1187 if (so->so_serv_type == T_CLTS) 1188 return (EOPNOTSUPP); 1189 1190 /* 1191 * If the socket is ready to accept connections already, then 1192 * return without doing anything. This avoids a problem where 1193 * a second listen() call fails if a connection is pending and 1194 * leaves the socket unbound. Only when we are not unbinding 1195 * with the transport can we safely increase the backlog. 1196 */ 1197 if (so->so_state & SS_ACCEPTCONN && 1198 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1199 /*CONSTCOND*/ 1200 !solisten_tpi_tcp)) 1201 return (0); 1202 1203 if (so->so_state & SS_ISCONNECTED) 1204 return (EINVAL); 1205 1206 mutex_enter(&so->so_lock); 1207 so_lock_single(so); /* Set SOLOCKED */ 1208 1209 if (backlog < 0) 1210 backlog = 0; 1211 /* 1212 * Use the same qlimit as in BSD. BSD checks the qlimit 1213 * before queuing the next connection implying that a 1214 * listen(sock, 0) allows one connection to be queued. 1215 * BSD also uses 1.5 times the requested backlog. 1216 * 1217 * XNS Issue 4 required a strict interpretation of the backlog. 1218 * This has been waived subsequently for Issue 4 and the change 1219 * incorporated in XNS Issue 5. So we aren't required to do 1220 * anything special for XPG apps. 1221 */ 1222 if (backlog >= (INT_MAX - 1) / 3) 1223 backlog = INT_MAX; 1224 else 1225 backlog = backlog * 3 / 2 + 1; 1226 1227 /* 1228 * If the listen doesn't change the backlog we do nothing. 1229 * This avoids an EPROTO error from the transport. 1230 */ 1231 if ((so->so_state & SS_ACCEPTCONN) && 1232 so->so_backlog == backlog) 1233 goto done; 1234 1235 if (!(so->so_state & SS_ISBOUND)) { 1236 /* 1237 * Must have been explicitly bound in the UNIX domain. 1238 */ 1239 if (so->so_family == AF_UNIX) { 1240 error = EINVAL; 1241 goto done; 1242 } 1243 error = sotpi_bindlisten(so, NULL, 0, backlog, 1244 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1245 } else if (backlog > 0) { 1246 /* 1247 * AF_INET{,6} hack to avoid losing the port. 1248 * Assumes that all AF_INET{,6} transports can handle a 1249 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1250 * has already bound thus it is possible to avoid the unbind. 1251 */ 1252 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1253 /*CONSTCOND*/ 1254 !solisten_tpi_tcp)) { 1255 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1256 if (error) 1257 goto done; 1258 } 1259 error = sotpi_bindlisten(so, NULL, 0, backlog, 1260 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1261 } else { 1262 so->so_state |= SS_ACCEPTCONN; 1263 so->so_backlog = backlog; 1264 } 1265 if (error) 1266 goto done; 1267 ASSERT(so->so_state & SS_ACCEPTCONN); 1268 done: 1269 so_unlock_single(so, SOLOCKED); 1270 mutex_exit(&so->so_lock); 1271 return (error); 1272 } 1273 1274 /* 1275 * Disconnect either a specified seqno or all (-1). 1276 * The former is used on listening sockets only. 1277 * 1278 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1279 * the current use of sodisconnect(seqno == -1) is only for shutdown 1280 * so there is no point (and potentially incorrect) to unbind. 1281 */ 1282 int 1283 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1284 { 1285 struct T_discon_req discon_req; 1286 int error = 0; 1287 mblk_t *mp; 1288 1289 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1290 so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1291 1292 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1293 mutex_enter(&so->so_lock); 1294 so_lock_single(so); /* Set SOLOCKED */ 1295 } else { 1296 ASSERT(MUTEX_HELD(&so->so_lock)); 1297 ASSERT(so->so_flag & SOLOCKED); 1298 } 1299 1300 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1301 error = EINVAL; 1302 eprintsoline(so, error); 1303 goto done; 1304 } 1305 1306 mutex_exit(&so->so_lock); 1307 /* 1308 * Flush the write side (unless this is a listener) 1309 * and then send down a T_DISCON_REQ. 1310 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1311 * and other messages.) 1312 */ 1313 if (!(so->so_state & SS_ACCEPTCONN)) 1314 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1315 1316 discon_req.PRIM_type = T_DISCON_REQ; 1317 discon_req.SEQ_number = seqno; 1318 mp = soallocproto1(&discon_req, sizeof (discon_req), 1319 0, _ALLOC_SLEEP); 1320 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1321 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1322 mutex_enter(&so->so_lock); 1323 if (error) { 1324 eprintsoline(so, error); 1325 goto done; 1326 } 1327 1328 error = sowaitokack(so, T_DISCON_REQ); 1329 if (error) { 1330 eprintsoline(so, error); 1331 goto done; 1332 } 1333 /* 1334 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1335 * strsock_proto while the lock was dropped above, the disconnect 1336 * is allowed to complete. However, it is not possible to 1337 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1338 */ 1339 so->so_state &= 1340 ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID); 1341 done: 1342 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1343 so_unlock_single(so, SOLOCKED); 1344 mutex_exit(&so->so_lock); 1345 } else { 1346 /* If the caller held the lock don't release it here */ 1347 ASSERT(MUTEX_HELD(&so->so_lock)); 1348 ASSERT(so->so_flag & SOLOCKED); 1349 } 1350 return (error); 1351 } 1352 1353 int 1354 sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) 1355 { 1356 struct T_conn_ind *conn_ind; 1357 struct T_conn_res *conn_res; 1358 int error = 0; 1359 mblk_t *mp; 1360 struct sonode *nso; 1361 vnode_t *nvp; 1362 void *src; 1363 t_uscalar_t srclen; 1364 void *opt; 1365 t_uscalar_t optlen; 1366 t_scalar_t PRIM_type; 1367 t_scalar_t SEQ_number; 1368 1369 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1370 so, fflag, nsop, pr_state(so->so_state, so->so_mode))); 1371 1372 /* 1373 * Defer single-threading the accepting socket until 1374 * the T_CONN_IND has been received and parsed and the 1375 * new sonode has been opened. 1376 */ 1377 1378 /* Check that we are not already connected */ 1379 if ((so->so_state & SS_ACCEPTCONN) == 0) 1380 goto conn_bad; 1381 again: 1382 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1383 goto e_bad; 1384 1385 ASSERT(mp); 1386 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1387 /* 1388 * Save SEQ_number for error paths. 1389 */ 1390 SEQ_number = conn_ind->SEQ_number; 1391 1392 srclen = conn_ind->SRC_length; 1393 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1394 if (src == NULL) { 1395 error = EPROTO; 1396 freemsg(mp); 1397 eprintsoline(so, error); 1398 goto disconnect_unlocked; 1399 } 1400 optlen = conn_ind->OPT_length; 1401 switch (so->so_family) { 1402 case AF_INET: 1403 case AF_INET6: 1404 if ((optlen == sizeof (intptr_t)) && 1405 ((so->so_state & SS_DIRECT) != 0)) { 1406 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1407 &opt, conn_ind->OPT_length); 1408 } else { 1409 /* 1410 * The transport (in this case TCP) hasn't sent up 1411 * a pointer to an instance for the accept fast-path. 1412 * Disable fast-path completely because the call to 1413 * sotpi_create() below would otherwise create an 1414 * incomplete TCP instance, which would lead to 1415 * problems when sockfs sends a normal T_CONN_RES 1416 * message down the new stream. 1417 */ 1418 if (so->so_state & SS_DIRECT) { 1419 int rval; 1420 /* 1421 * For consistency we inform tcp to disable 1422 * direct interface on the listener, though 1423 * we can certainly live without doing this 1424 * because no data will ever travel upstream 1425 * on the listening socket. 1426 */ 1427 so->so_state &= ~SS_DIRECT; 1428 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1429 0, 0, K_TO_K, CRED(), &rval); 1430 } 1431 opt = NULL; 1432 optlen = 0; 1433 } 1434 break; 1435 case AF_UNIX: 1436 default: 1437 if (optlen != 0) { 1438 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1439 __TPI_ALIGN_SIZE); 1440 if (opt == NULL) { 1441 error = EPROTO; 1442 freemsg(mp); 1443 eprintsoline(so, error); 1444 goto disconnect_unlocked; 1445 } 1446 } 1447 if (so->so_family == AF_UNIX) { 1448 if (!(so->so_state & SS_FADDR_NOXLATE)) { 1449 src = NULL; 1450 srclen = 0; 1451 } 1452 /* Extract src address from options */ 1453 if (optlen != 0) 1454 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1455 } 1456 break; 1457 } 1458 1459 /* 1460 * Create the new socket. 1461 */ 1462 VN_HOLD(so->so_accessvp); 1463 nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, 1464 so->so_protocol, so->so_version, so, &error); 1465 if (nso == NULL) { 1466 ASSERT(error != 0); 1467 /* 1468 * Accept can not fail with ENOBUFS. sotpi_create 1469 * sleeps waiting for memory until a signal is caught 1470 * so return EINTR. 1471 */ 1472 freemsg(mp); 1473 if (error == ENOBUFS) 1474 error = EINTR; 1475 goto e_disc_unl; 1476 } 1477 nvp = SOTOV(nso); 1478 1479 #ifdef DEBUG 1480 /* 1481 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1482 * it's inherited early to allow debugging of the accept code itself. 1483 */ 1484 nso->so_options |= so->so_options & SO_DEBUG; 1485 #endif /* DEBUG */ 1486 1487 /* 1488 * Save the SRC address from the T_CONN_IND 1489 * for getpeername to work on AF_UNIX and on transports that do not 1490 * support TI_GETPEERNAME. 1491 * 1492 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1493 * copyin_name(). 1494 */ 1495 if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) { 1496 error = EINVAL; 1497 freemsg(mp); 1498 eprintsoline(so, error); 1499 goto disconnect_vp_unlocked; 1500 } 1501 nso->so_faddr_len = (socklen_t)srclen; 1502 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1503 bcopy(src, nso->so_faddr_sa, srclen); 1504 nso->so_state |= SS_FADDR_VALID; 1505 1506 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1507 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1508 cred_t *cr; 1509 1510 if ((cr = DB_CRED(mp)) != NULL) { 1511 crhold(cr); 1512 nso->so_peercred = cr; 1513 nso->so_cpid = DB_CPID(mp); 1514 } 1515 freemsg(mp); 1516 1517 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1518 sizeof (intptr_t), 0, _ALLOC_INTR); 1519 if (mp == NULL) { 1520 /* 1521 * Accept can not fail with ENOBUFS. 1522 * A signal was caught so return EINTR. 1523 */ 1524 error = EINTR; 1525 eprintsoline(so, error); 1526 goto disconnect_vp_unlocked; 1527 } 1528 conn_res = (struct T_conn_res *)mp->b_rptr; 1529 } else { 1530 nso->so_peercred = DB_CRED(mp); 1531 nso->so_cpid = DB_CPID(mp); 1532 DB_CRED(mp) = NULL; 1533 1534 mp->b_rptr = DB_BASE(mp); 1535 conn_res = (struct T_conn_res *)mp->b_rptr; 1536 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1537 } 1538 1539 /* 1540 * New socket must be bound at least in sockfs and, except for AF_INET, 1541 * (or AF_INET6) it also has to be bound in the transport provider. 1542 * After accepting the connection on nso so_laddr_sa will be set to 1543 * contain the same address as the listener's local address 1544 * so the address we bind to isn't important. 1545 */ 1546 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1547 /*CONSTCOND*/ 1548 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1549 /* 1550 * Optimization for AF_INET{,6} transports 1551 * that can handle a T_CONN_RES without being bound. 1552 */ 1553 mutex_enter(&nso->so_lock); 1554 so_automatic_bind(nso); 1555 mutex_exit(&nso->so_lock); 1556 } else { 1557 /* Perform NULL bind with the transport provider. */ 1558 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) { 1559 ASSERT(error != ENOBUFS); 1560 freemsg(mp); 1561 eprintsoline(nso, error); 1562 goto disconnect_vp_unlocked; 1563 } 1564 } 1565 1566 /* 1567 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1568 * so that any data arriving on the new socket will cause the 1569 * appropriate signals to be delivered for the new socket. 1570 * 1571 * No other thread (except strsock_proto and strsock_misc) 1572 * can access the new socket thus we relax the locking. 1573 */ 1574 nso->so_pgrp = so->so_pgrp; 1575 nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE); 1576 1577 if (nso->so_pgrp != 0) { 1578 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1579 eprintsoline(nso, error); 1580 error = 0; 1581 nso->so_pgrp = 0; 1582 } 1583 } 1584 1585 /* 1586 * Make note of the socket level options. TCP and IP level options 1587 * are already inherited. We could do all this after accept is 1588 * successful but doing it here simplifies code and no harm done 1589 * for error case. 1590 */ 1591 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1592 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1593 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1594 nso->so_sndbuf = so->so_sndbuf; 1595 nso->so_rcvbuf = so->so_rcvbuf; 1596 if (nso->so_options & SO_LINGER) 1597 nso->so_linger = so->so_linger; 1598 1599 if ((so->so_state & SS_DIRECT) != 0) { 1600 mblk_t *ack_mp; 1601 1602 ASSERT(nso->so_state & SS_DIRECT); 1603 ASSERT(opt != NULL); 1604 1605 conn_res->OPT_length = optlen; 1606 conn_res->OPT_offset = MBLKL(mp); 1607 bcopy(&opt, mp->b_wptr, optlen); 1608 mp->b_wptr += optlen; 1609 conn_res->PRIM_type = T_CONN_RES; 1610 conn_res->ACCEPTOR_id = 0; 1611 PRIM_type = T_CONN_RES; 1612 1613 /* Send down the T_CONN_RES on acceptor STREAM */ 1614 error = kstrputmsg(SOTOV(nso), mp, NULL, 1615 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1616 if (error) { 1617 mutex_enter(&so->so_lock); 1618 so_lock_single(so); 1619 eprintsoline(so, error); 1620 goto disconnect_vp; 1621 } 1622 mutex_enter(&nso->so_lock); 1623 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1624 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1625 if (error) { 1626 mutex_exit(&nso->so_lock); 1627 mutex_enter(&so->so_lock); 1628 so_lock_single(so); 1629 eprintsoline(so, error); 1630 goto disconnect_vp; 1631 } 1632 if (nso->so_family == AF_INET) { 1633 sin_t *sin; 1634 1635 sin = (sin_t *)(ack_mp->b_rptr + 1636 sizeof (struct T_ok_ack)); 1637 bcopy(sin, nso->so_laddr_sa, sizeof (sin_t)); 1638 nso->so_laddr_len = sizeof (sin_t); 1639 } else { 1640 sin6_t *sin6; 1641 1642 sin6 = (sin6_t *)(ack_mp->b_rptr + 1643 sizeof (struct T_ok_ack)); 1644 bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t)); 1645 nso->so_laddr_len = sizeof (sin6_t); 1646 } 1647 freemsg(ack_mp); 1648 1649 nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID; 1650 nso->so_priv = opt; 1651 1652 if (so->so_nl7c_flags & NL7C_ENABLED) { 1653 /* 1654 * An NL7C marked listen()er so the new socket 1655 * inherits the listen()er's NL7C state. 1656 * 1657 * When calling NL7C to process the new socket 1658 * pass the nonblocking i/o state of the listen 1659 * socket as this is the context we are in. 1660 */ 1661 nso->so_nl7c_flags = so->so_nl7c_flags; 1662 if (nl7c_process(nso, 1663 (nso->so_state & (SS_NONBLOCK|SS_NDELAY)), 1664 (int)((tcp_t *)nso->so_priv)->tcp_mss)) { 1665 /* 1666 * NL7C has completed processing on the 1667 * socket, close the socket and back to 1668 * the top to await the next T_CONN_IND. 1669 */ 1670 mutex_exit(&nso->so_lock); 1671 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1672 CRED()); 1673 VN_RELE(nvp); 1674 goto again; 1675 } 1676 /* Pass the new socket out */ 1677 } 1678 1679 mutex_exit(&nso->so_lock); 1680 1681 /* 1682 * Pass out new socket. 1683 */ 1684 if (nsop != NULL) 1685 *nsop = nso; 1686 1687 return (0); 1688 } 1689 1690 /* 1691 * Copy local address from listener. 1692 */ 1693 nso->so_laddr_len = so->so_laddr_len; 1694 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1695 bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len); 1696 nso->so_state |= SS_LADDR_VALID; 1697 1698 /* 1699 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1700 * which don't support the FireEngine accept fast-path. It is also 1701 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1702 * again. Neither sockfs nor TCP attempt to find out if some other 1703 * random module has been inserted in between (in which case we 1704 * should follow TLI accept behaviour). We blindly assume the worst 1705 * case and revert back to old behaviour i.e. TCP will not send us 1706 * any option (eager) and the accept should happen on the listener 1707 * queue. Any queued T_conn_ind have already got their options removed 1708 * by so_sock2_stream() when "sockmod" was I_POP'd. 1709 */ 1710 /* 1711 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1712 */ 1713 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1714 #ifdef _ILP32 1715 queue_t *q; 1716 1717 /* 1718 * Find read queue in driver 1719 * Can safely do this since we "own" nso/nvp. 1720 */ 1721 q = strvp2wq(nvp)->q_next; 1722 while (SAMESTR(q)) 1723 q = q->q_next; 1724 q = RD(q); 1725 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1726 #else 1727 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1728 #endif /* _ILP32 */ 1729 conn_res->PRIM_type = O_T_CONN_RES; 1730 PRIM_type = O_T_CONN_RES; 1731 } else { 1732 conn_res->ACCEPTOR_id = nso->so_acceptor_id; 1733 conn_res->PRIM_type = T_CONN_RES; 1734 PRIM_type = T_CONN_RES; 1735 } 1736 conn_res->SEQ_number = SEQ_number; 1737 conn_res->OPT_length = 0; 1738 conn_res->OPT_offset = 0; 1739 1740 mutex_enter(&so->so_lock); 1741 so_lock_single(so); /* Set SOLOCKED */ 1742 mutex_exit(&so->so_lock); 1743 1744 error = kstrputmsg(SOTOV(so), mp, NULL, 1745 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1746 mutex_enter(&so->so_lock); 1747 if (error) { 1748 eprintsoline(so, error); 1749 goto disconnect_vp; 1750 } 1751 error = sowaitokack(so, PRIM_type); 1752 if (error) { 1753 eprintsoline(so, error); 1754 goto disconnect_vp; 1755 } 1756 so_unlock_single(so, SOLOCKED); 1757 mutex_exit(&so->so_lock); 1758 1759 nso->so_state |= SS_ISCONNECTED; 1760 1761 /* 1762 * Pass out new socket. 1763 */ 1764 if (nsop != NULL) 1765 *nsop = nso; 1766 1767 return (0); 1768 1769 1770 eproto_disc_unl: 1771 error = EPROTO; 1772 e_disc_unl: 1773 eprintsoline(so, error); 1774 goto disconnect_unlocked; 1775 1776 pr_disc_vp_unl: 1777 eprintsoline(so, error); 1778 disconnect_vp_unlocked: 1779 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 1780 VN_RELE(nvp); 1781 disconnect_unlocked: 1782 (void) sodisconnect(so, SEQ_number, 0); 1783 return (error); 1784 1785 pr_disc_vp: 1786 eprintsoline(so, error); 1787 disconnect_vp: 1788 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 1789 so_unlock_single(so, SOLOCKED); 1790 mutex_exit(&so->so_lock); 1791 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 1792 VN_RELE(nvp); 1793 return (error); 1794 1795 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 1796 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 1797 ? EOPNOTSUPP : EINVAL; 1798 e_bad: 1799 eprintsoline(so, error); 1800 return (error); 1801 } 1802 1803 /* 1804 * connect a socket. 1805 * 1806 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 1807 * unconnect (by specifying a null address). 1808 */ 1809 int 1810 sotpi_connect(struct sonode *so, 1811 const struct sockaddr *name, 1812 socklen_t namelen, 1813 int fflag, 1814 int flags) 1815 { 1816 struct T_conn_req conn_req; 1817 int error = 0; 1818 mblk_t *mp; 1819 void *src; 1820 socklen_t srclen; 1821 void *addr; 1822 socklen_t addrlen; 1823 boolean_t need_unlock; 1824 1825 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 1826 so, name, namelen, fflag, flags, 1827 pr_state(so->so_state, so->so_mode))); 1828 1829 /* 1830 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 1831 * avoid sleeping for memory with SOLOCKED held. 1832 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen 1833 * + sizeof (struct T_opthdr). 1834 * (the AF_UNIX so_ux_addr_xlate() does not make the address 1835 * exceed so_faddr_maxlen). 1836 */ 1837 mp = soallocproto(sizeof (struct T_conn_req) + 1838 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 1839 if (mp == NULL) { 1840 /* 1841 * Connect can not fail with ENOBUFS. A signal was 1842 * caught so return EINTR. 1843 */ 1844 error = EINTR; 1845 eprintsoline(so, error); 1846 return (error); 1847 } 1848 1849 mutex_enter(&so->so_lock); 1850 /* 1851 * Make sure that there is a preallocated unbind_req 1852 * message before any binding. This message allocated when 1853 * the socket is created but it might be have been 1854 * consumed. 1855 */ 1856 if (so->so_unbind_mp == NULL) { 1857 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 1858 /* NOTE: holding so_lock while sleeping */ 1859 so->so_unbind_mp = 1860 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 1861 if (so->so_unbind_mp == NULL) { 1862 error = EINTR; 1863 need_unlock = B_FALSE; 1864 goto done; 1865 } 1866 } 1867 1868 so_lock_single(so); /* Set SOLOCKED */ 1869 need_unlock = B_TRUE; 1870 1871 /* 1872 * Can't have done a listen before connecting. 1873 */ 1874 if (so->so_state & SS_ACCEPTCONN) { 1875 error = EOPNOTSUPP; 1876 goto done; 1877 } 1878 1879 /* 1880 * Must be bound with the transport 1881 */ 1882 if (!(so->so_state & SS_ISBOUND)) { 1883 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 1884 /*CONSTCOND*/ 1885 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 1886 /* 1887 * Optimization for AF_INET{,6} transports 1888 * that can handle a T_CONN_REQ without being bound. 1889 */ 1890 so_automatic_bind(so); 1891 } else { 1892 error = sotpi_bind(so, NULL, 0, 1893 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 1894 if (error) 1895 goto done; 1896 } 1897 ASSERT(so->so_state & SS_ISBOUND); 1898 flags |= _SOCONNECT_DID_BIND; 1899 } 1900 1901 /* 1902 * Handle a connect to a name parameter of type AF_UNSPEC like a 1903 * connect to a null address. This is the portable method to 1904 * unconnect a socket. 1905 */ 1906 if ((namelen >= sizeof (sa_family_t)) && 1907 (name->sa_family == AF_UNSPEC)) { 1908 name = NULL; 1909 namelen = 0; 1910 } 1911 1912 /* 1913 * Check that we are not already connected. 1914 * A connection-oriented socket cannot be reconnected. 1915 * A connected connection-less socket can be 1916 * - connected to a different address by a subsequent connect 1917 * - "unconnected" by a connect to the NULL address 1918 */ 1919 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 1920 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 1921 if (so->so_mode & SM_CONNREQUIRED) { 1922 /* Connection-oriented socket */ 1923 error = so->so_state & SS_ISCONNECTED ? 1924 EISCONN : EALREADY; 1925 goto done; 1926 } 1927 /* Connection-less socket */ 1928 if (name == NULL) { 1929 /* 1930 * Remove the connected state and clear SO_DGRAM_ERRIND 1931 * since it was set when the socket was connected. 1932 * If this is UDP also send down a T_DISCON_REQ. 1933 */ 1934 int val; 1935 1936 if ((so->so_family == AF_INET || 1937 so->so_family == AF_INET6) && 1938 (so->so_type == SOCK_DGRAM || 1939 so->so_type == SOCK_RAW) && 1940 /*CONSTCOND*/ 1941 !soconnect_tpi_udp) { 1942 /* XXX What about implicitly unbinding here? */ 1943 error = sodisconnect(so, -1, 1944 _SODISCONNECT_LOCK_HELD); 1945 } else { 1946 so->so_state &= 1947 ~(SS_ISCONNECTED | SS_ISCONNECTING | 1948 SS_FADDR_VALID); 1949 so->so_faddr_len = 0; 1950 } 1951 1952 so_unlock_single(so, SOLOCKED); 1953 mutex_exit(&so->so_lock); 1954 1955 val = 0; 1956 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 1957 &val, (t_uscalar_t)sizeof (val)); 1958 1959 mutex_enter(&so->so_lock); 1960 so_lock_single(so); /* Set SOLOCKED */ 1961 goto done; 1962 } 1963 } 1964 ASSERT(so->so_state & SS_ISBOUND); 1965 1966 if (name == NULL || namelen == 0) { 1967 error = EINVAL; 1968 goto done; 1969 } 1970 /* 1971 * Mark the socket if so_faddr_sa represents the transport level 1972 * address. 1973 */ 1974 if (flags & _SOCONNECT_NOXLATE) { 1975 struct sockaddr_ux *soaddr_ux; 1976 1977 ASSERT(so->so_family == AF_UNIX); 1978 if (namelen != sizeof (struct sockaddr_ux)) { 1979 error = EINVAL; 1980 goto done; 1981 } 1982 soaddr_ux = (struct sockaddr_ux *)name; 1983 name = (struct sockaddr *)&soaddr_ux->sou_addr; 1984 namelen = sizeof (soaddr_ux->sou_addr); 1985 so->so_state |= SS_FADDR_NOXLATE; 1986 } 1987 1988 /* 1989 * Length and family checks. 1990 */ 1991 error = so_addr_verify(so, name, namelen); 1992 if (error) 1993 goto bad; 1994 1995 /* 1996 * Save foreign address. Needed for AF_UNIX as well as 1997 * transport providers that do not support TI_GETPEERNAME. 1998 * Also used for cached foreign address for TCP and UDP. 1999 */ 2000 if (namelen > (t_uscalar_t)so->so_faddr_maxlen) { 2001 error = EINVAL; 2002 goto done; 2003 } 2004 so->so_faddr_len = (socklen_t)namelen; 2005 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2006 bcopy(name, so->so_faddr_sa, namelen); 2007 so->so_state |= SS_FADDR_VALID; 2008 2009 if (so->so_family == AF_UNIX) { 2010 if (so->so_state & SS_FADDR_NOXLATE) { 2011 /* 2012 * Already have a transport internal address. Do not 2013 * pass any (transport internal) source address. 2014 */ 2015 addr = so->so_faddr_sa; 2016 addrlen = (t_uscalar_t)so->so_faddr_len; 2017 src = NULL; 2018 srclen = 0; 2019 } else { 2020 /* 2021 * Pass the sockaddr_un source address as an option 2022 * and translate the remote address. 2023 * Holding so_lock thus so_laddr_sa can not change. 2024 */ 2025 src = so->so_laddr_sa; 2026 srclen = (t_uscalar_t)so->so_laddr_len; 2027 dprintso(so, 1, 2028 ("sotpi_connect UNIX: srclen %d, src %p\n", 2029 srclen, src)); 2030 error = so_ux_addr_xlate(so, 2031 so->so_faddr_sa, (socklen_t)so->so_faddr_len, 2032 (flags & _SOCONNECT_XPG4_2), 2033 &addr, &addrlen); 2034 if (error) 2035 goto bad; 2036 } 2037 } else { 2038 addr = so->so_faddr_sa; 2039 addrlen = (t_uscalar_t)so->so_faddr_len; 2040 src = NULL; 2041 srclen = 0; 2042 } 2043 /* 2044 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2045 * option which asks the transport provider to send T_UDERR_IND 2046 * messages. These T_UDERR_IND messages are used to return connected 2047 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2048 * 2049 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2050 * we send down a T_CONN_REQ. This is needed to let the 2051 * transport assign a local address that is consistent with 2052 * the remote address. Applications depend on a getsockname() 2053 * after a connect() to retrieve the "source" IP address for 2054 * the connected socket. Invalidate the cached local address 2055 * to force getsockname() to enquire of the transport. 2056 */ 2057 if (!(so->so_mode & SM_CONNREQUIRED)) { 2058 /* 2059 * Datagram socket. 2060 */ 2061 int32_t val; 2062 2063 so_unlock_single(so, SOLOCKED); 2064 mutex_exit(&so->so_lock); 2065 2066 val = 1; 2067 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2068 &val, (t_uscalar_t)sizeof (val)); 2069 2070 mutex_enter(&so->so_lock); 2071 so_lock_single(so); /* Set SOLOCKED */ 2072 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2073 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2074 soconnect_tpi_udp) { 2075 soisconnected(so); 2076 goto done; 2077 } 2078 /* 2079 * Send down T_CONN_REQ etc. 2080 * Clear fflag to avoid returning EWOULDBLOCK. 2081 */ 2082 fflag = 0; 2083 ASSERT(so->so_family != AF_UNIX); 2084 so->so_state &= ~SS_LADDR_VALID; 2085 } else if (so->so_laddr_len != 0) { 2086 /* 2087 * If the local address or port was "any" then it may be 2088 * changed by the transport as a result of the 2089 * connect. Invalidate the cached version if we have one. 2090 */ 2091 switch (so->so_family) { 2092 case AF_INET: 2093 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t)); 2094 if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr == 2095 INADDR_ANY || 2096 ((sin_t *)so->so_laddr_sa)->sin_port == 0) 2097 so->so_state &= ~SS_LADDR_VALID; 2098 break; 2099 2100 case AF_INET6: 2101 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t)); 2102 if (IN6_IS_ADDR_UNSPECIFIED( 2103 &((sin6_t *)so->so_laddr_sa) ->sin6_addr) || 2104 IN6_IS_ADDR_V4MAPPED_ANY( 2105 &((sin6_t *)so->so_laddr_sa)->sin6_addr) || 2106 ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) 2107 so->so_state &= ~SS_LADDR_VALID; 2108 break; 2109 2110 default: 2111 break; 2112 } 2113 } 2114 2115 /* 2116 * Check for failure of an earlier call 2117 */ 2118 if (so->so_error != 0) 2119 goto so_bad; 2120 2121 /* 2122 * Send down T_CONN_REQ. Message was allocated above. 2123 */ 2124 conn_req.PRIM_type = T_CONN_REQ; 2125 conn_req.DEST_length = addrlen; 2126 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2127 if (srclen == 0) { 2128 conn_req.OPT_length = 0; 2129 conn_req.OPT_offset = 0; 2130 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2131 soappendmsg(mp, addr, addrlen); 2132 } else { 2133 /* 2134 * There is a AF_UNIX sockaddr_un to include as a source 2135 * address option. 2136 */ 2137 struct T_opthdr toh; 2138 2139 toh.level = SOL_SOCKET; 2140 toh.name = SO_SRCADDR; 2141 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2142 toh.status = 0; 2143 conn_req.OPT_length = 2144 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2145 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2146 _TPI_ALIGN_TOPT(addrlen)); 2147 2148 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2149 soappendmsg(mp, addr, addrlen); 2150 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2151 soappendmsg(mp, &toh, sizeof (toh)); 2152 soappendmsg(mp, src, srclen); 2153 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2154 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2155 } 2156 /* 2157 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2158 * in order to have the right state when the T_CONN_CON shows up. 2159 */ 2160 soisconnecting(so); 2161 mutex_exit(&so->so_lock); 2162 2163 #ifdef C2_AUDIT 2164 if (audit_active) 2165 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2166 #endif /* C2_AUDIT */ 2167 2168 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2169 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2170 mp = NULL; 2171 mutex_enter(&so->so_lock); 2172 if (error != 0) 2173 goto bad; 2174 2175 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2176 goto bad; 2177 2178 /* Allow other threads to access the socket */ 2179 so_unlock_single(so, SOLOCKED); 2180 need_unlock = B_FALSE; 2181 2182 /* 2183 * Wait until we get a T_CONN_CON or an error 2184 */ 2185 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2186 so_lock_single(so); /* Set SOLOCKED */ 2187 need_unlock = B_TRUE; 2188 } 2189 2190 done: 2191 freemsg(mp); 2192 switch (error) { 2193 case EINPROGRESS: 2194 case EALREADY: 2195 case EISCONN: 2196 case EINTR: 2197 /* Non-fatal errors */ 2198 so->so_state &= ~SS_LADDR_VALID; 2199 /* FALLTHRU */ 2200 case 0: 2201 break; 2202 2203 case EHOSTUNREACH: 2204 if (flags & _SOCONNECT_XPG4_2) { 2205 /* 2206 * X/Open specification contains a requirement that 2207 * ENETUNREACH be returned but does not require 2208 * EHOSTUNREACH. In order to keep the test suite 2209 * happy we mess with the errno here. 2210 */ 2211 error = ENETUNREACH; 2212 } 2213 /* FALLTHRU */ 2214 2215 default: 2216 ASSERT(need_unlock); 2217 /* 2218 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2219 * and invalidate local-address cache 2220 */ 2221 so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID); 2222 /* A discon_ind might have already unbound us */ 2223 if ((flags & _SOCONNECT_DID_BIND) && 2224 (so->so_state & SS_ISBOUND)) { 2225 int err; 2226 2227 err = sotpi_unbind(so, 0); 2228 /* LINTED - statement has no conseq */ 2229 if (err) { 2230 eprintsoline(so, err); 2231 } 2232 } 2233 break; 2234 } 2235 if (need_unlock) 2236 so_unlock_single(so, SOLOCKED); 2237 mutex_exit(&so->so_lock); 2238 return (error); 2239 2240 so_bad: error = sogeterr(so); 2241 bad: eprintsoline(so, error); 2242 goto done; 2243 } 2244 2245 int 2246 sotpi_shutdown(struct sonode *so, int how) 2247 { 2248 struct T_ordrel_req ordrel_req; 2249 mblk_t *mp; 2250 uint_t old_state, state_change; 2251 int error = 0; 2252 2253 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2254 so, how, pr_state(so->so_state, so->so_mode))); 2255 2256 mutex_enter(&so->so_lock); 2257 so_lock_single(so); /* Set SOLOCKED */ 2258 2259 /* 2260 * SunOS 4.X has no check for datagram sockets. 2261 * 5.X checks that it is connected (ENOTCONN) 2262 * X/Open requires that we check the connected state. 2263 */ 2264 if (!(so->so_state & SS_ISCONNECTED)) { 2265 if (!xnet_skip_checks) { 2266 error = ENOTCONN; 2267 if (xnet_check_print) { 2268 printf("sockfs: X/Open shutdown check " 2269 "caused ENOTCONN\n"); 2270 } 2271 } 2272 goto done; 2273 } 2274 /* 2275 * Record the current state and then perform any state changes. 2276 * Then use the difference between the old and new states to 2277 * determine which messages need to be sent. 2278 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2279 * duplicate calls to shutdown(). 2280 */ 2281 old_state = so->so_state; 2282 2283 switch (how) { 2284 case 0: 2285 socantrcvmore(so); 2286 break; 2287 case 1: 2288 socantsendmore(so); 2289 break; 2290 case 2: 2291 socantsendmore(so); 2292 socantrcvmore(so); 2293 break; 2294 default: 2295 error = EINVAL; 2296 goto done; 2297 } 2298 2299 /* 2300 * Assumes that the SS_CANT* flags are never cleared in the above code. 2301 */ 2302 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2303 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2304 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2305 2306 switch (state_change) { 2307 case 0: 2308 dprintso(so, 1, 2309 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2310 so->so_state)); 2311 goto done; 2312 2313 case SS_CANTRCVMORE: 2314 mutex_exit(&so->so_lock); 2315 strseteof(SOTOV(so), 1); 2316 /* 2317 * strseteof takes care of read side wakeups, 2318 * pollwakeups, and signals. 2319 */ 2320 /* 2321 * Get the read lock before flushing data to avoid problems 2322 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2323 */ 2324 mutex_enter(&so->so_lock); 2325 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2326 mutex_exit(&so->so_lock); 2327 2328 /* Flush read side queue */ 2329 strflushrq(SOTOV(so), FLUSHALL); 2330 2331 mutex_enter(&so->so_lock); 2332 so_unlock_read(so); /* Clear SOREADLOCKED */ 2333 break; 2334 2335 case SS_CANTSENDMORE: 2336 mutex_exit(&so->so_lock); 2337 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2338 mutex_enter(&so->so_lock); 2339 break; 2340 2341 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2342 mutex_exit(&so->so_lock); 2343 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2344 strseteof(SOTOV(so), 1); 2345 /* 2346 * strseteof takes care of read side wakeups, 2347 * pollwakeups, and signals. 2348 */ 2349 /* 2350 * Get the read lock before flushing data to avoid problems 2351 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2352 */ 2353 mutex_enter(&so->so_lock); 2354 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2355 mutex_exit(&so->so_lock); 2356 2357 /* Flush read side queue */ 2358 strflushrq(SOTOV(so), FLUSHALL); 2359 2360 mutex_enter(&so->so_lock); 2361 so_unlock_read(so); /* Clear SOREADLOCKED */ 2362 break; 2363 } 2364 2365 ASSERT(MUTEX_HELD(&so->so_lock)); 2366 2367 /* 2368 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2369 * was set due to this call and the new state has both of them set: 2370 * Send the AF_UNIX close indication 2371 * For T_COTS send a discon_ind 2372 * 2373 * If cantsend was set due to this call: 2374 * For T_COTSORD send an ordrel_ind 2375 * 2376 * Note that for T_CLTS there is no message sent here. 2377 */ 2378 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2379 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2380 /* 2381 * For SunOS 4.X compatibility we tell the other end 2382 * that we are unable to receive at this point. 2383 */ 2384 if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS) 2385 so_unix_close(so); 2386 2387 if (so->so_serv_type == T_COTS) 2388 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2389 } 2390 if ((state_change & SS_CANTSENDMORE) && 2391 (so->so_serv_type == T_COTS_ORD)) { 2392 /* Send an orderly release */ 2393 ordrel_req.PRIM_type = T_ORDREL_REQ; 2394 2395 mutex_exit(&so->so_lock); 2396 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2397 0, _ALLOC_SLEEP); 2398 /* 2399 * Send down the T_ORDREL_REQ even if there is flow control. 2400 * This prevents shutdown from blocking. 2401 * Note that there is no T_OK_ACK for ordrel_req. 2402 */ 2403 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2404 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2405 mutex_enter(&so->so_lock); 2406 if (error) { 2407 eprintsoline(so, error); 2408 goto done; 2409 } 2410 } 2411 2412 done: 2413 so_unlock_single(so, SOLOCKED); 2414 mutex_exit(&so->so_lock); 2415 return (error); 2416 } 2417 2418 /* 2419 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2420 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2421 * that we have closed. 2422 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2423 * T_UNITDATA_REQ containing the same option. 2424 * 2425 * For SOCK_DGRAM half-connections (somebody connected to this end 2426 * but this end is not connect) we don't know where to send any 2427 * SO_UNIX_CLOSE. 2428 * 2429 * We have to ignore stream head errors just in case there has been 2430 * a shutdown(output). 2431 * Ignore any flow control to try to get the message more quickly to the peer. 2432 * While locally ignoring flow control solves the problem when there 2433 * is only the loopback transport on the stream it would not provide 2434 * the correct AF_UNIX socket semantics when one or more modules have 2435 * been pushed. 2436 */ 2437 void 2438 so_unix_close(struct sonode *so) 2439 { 2440 int error; 2441 struct T_opthdr toh; 2442 mblk_t *mp; 2443 2444 ASSERT(MUTEX_HELD(&so->so_lock)); 2445 2446 ASSERT(so->so_family == AF_UNIX); 2447 2448 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2449 (SS_ISCONNECTED|SS_ISBOUND)) 2450 return; 2451 2452 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2453 so, pr_state(so->so_state, so->so_mode))); 2454 2455 toh.level = SOL_SOCKET; 2456 toh.name = SO_UNIX_CLOSE; 2457 2458 /* zero length + header */ 2459 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2460 toh.status = 0; 2461 2462 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2463 struct T_optdata_req tdr; 2464 2465 tdr.PRIM_type = T_OPTDATA_REQ; 2466 tdr.DATA_flag = 0; 2467 2468 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2469 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2470 2471 /* NOTE: holding so_lock while sleeping */ 2472 mp = soallocproto2(&tdr, sizeof (tdr), 2473 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2474 } else { 2475 struct T_unitdata_req tudr; 2476 void *addr; 2477 socklen_t addrlen; 2478 void *src; 2479 socklen_t srclen; 2480 struct T_opthdr toh2; 2481 t_scalar_t size; 2482 2483 /* Connecteded DGRAM socket */ 2484 2485 /* 2486 * For AF_UNIX the destination address is translated to 2487 * an internal name and the source address is passed as 2488 * an option. 2489 */ 2490 /* 2491 * Length and family checks. 2492 */ 2493 error = so_addr_verify(so, so->so_faddr_sa, 2494 (t_uscalar_t)so->so_faddr_len); 2495 if (error) { 2496 eprintsoline(so, error); 2497 return; 2498 } 2499 if (so->so_state & SS_FADDR_NOXLATE) { 2500 /* 2501 * Already have a transport internal address. Do not 2502 * pass any (transport internal) source address. 2503 */ 2504 addr = so->so_faddr_sa; 2505 addrlen = (t_uscalar_t)so->so_faddr_len; 2506 src = NULL; 2507 srclen = 0; 2508 } else { 2509 /* 2510 * Pass the sockaddr_un source address as an option 2511 * and translate the remote address. 2512 * Holding so_lock thus so_laddr_sa can not change. 2513 */ 2514 src = so->so_laddr_sa; 2515 srclen = (socklen_t)so->so_laddr_len; 2516 dprintso(so, 1, 2517 ("so_ux_close: srclen %d, src %p\n", 2518 srclen, src)); 2519 error = so_ux_addr_xlate(so, 2520 so->so_faddr_sa, 2521 (socklen_t)so->so_faddr_len, 0, 2522 &addr, &addrlen); 2523 if (error) { 2524 eprintsoline(so, error); 2525 return; 2526 } 2527 } 2528 tudr.PRIM_type = T_UNITDATA_REQ; 2529 tudr.DEST_length = addrlen; 2530 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2531 if (srclen == 0) { 2532 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2533 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2534 _TPI_ALIGN_TOPT(addrlen)); 2535 2536 size = tudr.OPT_offset + tudr.OPT_length; 2537 /* NOTE: holding so_lock while sleeping */ 2538 mp = soallocproto2(&tudr, sizeof (tudr), 2539 addr, addrlen, size, _ALLOC_SLEEP); 2540 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2541 soappendmsg(mp, &toh, sizeof (toh)); 2542 } else { 2543 /* 2544 * There is a AF_UNIX sockaddr_un to include as a 2545 * source address option. 2546 */ 2547 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2548 _TPI_ALIGN_TOPT(srclen)); 2549 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2550 _TPI_ALIGN_TOPT(addrlen)); 2551 2552 toh2.level = SOL_SOCKET; 2553 toh2.name = SO_SRCADDR; 2554 toh2.len = (t_uscalar_t)(srclen + 2555 sizeof (struct T_opthdr)); 2556 toh2.status = 0; 2557 2558 size = tudr.OPT_offset + tudr.OPT_length; 2559 2560 /* NOTE: holding so_lock while sleeping */ 2561 mp = soallocproto2(&tudr, sizeof (tudr), 2562 addr, addrlen, size, _ALLOC_SLEEP); 2563 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2564 soappendmsg(mp, &toh, sizeof (toh)); 2565 soappendmsg(mp, &toh2, sizeof (toh2)); 2566 soappendmsg(mp, src, srclen); 2567 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2568 } 2569 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2570 } 2571 mutex_exit(&so->so_lock); 2572 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2573 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2574 mutex_enter(&so->so_lock); 2575 } 2576 2577 /* 2578 * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 2579 */ 2580 int 2581 sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) 2582 { 2583 mblk_t *mp, *nmp; 2584 int error; 2585 2586 dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", so, msg, flags)); 2587 2588 /* 2589 * There is never any oob data with addresses or control since 2590 * the T_EXDATA_IND does not carry any options. 2591 */ 2592 msg->msg_controllen = 0; 2593 msg->msg_namelen = 0; 2594 2595 mutex_enter(&so->so_lock); 2596 ASSERT(so_verify_oobstate(so)); 2597 if ((so->so_options & SO_OOBINLINE) || 2598 (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 2599 dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 2600 mutex_exit(&so->so_lock); 2601 return (EINVAL); 2602 } 2603 if (!(so->so_state & SS_HAVEOOBDATA)) { 2604 dprintso(so, 1, ("sorecvoob: no data yet\n")); 2605 mutex_exit(&so->so_lock); 2606 return (EWOULDBLOCK); 2607 } 2608 ASSERT(so->so_oobmsg != NULL); 2609 mp = so->so_oobmsg; 2610 if (flags & MSG_PEEK) { 2611 /* 2612 * Since recv* can not return ENOBUFS we can not use dupmsg. 2613 * Instead we revert to the consolidation private 2614 * allocb_wait plus bcopy. 2615 */ 2616 mblk_t *mp1; 2617 2618 mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 2619 ASSERT(mp1); 2620 2621 while (mp != NULL) { 2622 ssize_t size; 2623 2624 size = MBLKL(mp); 2625 bcopy(mp->b_rptr, mp1->b_wptr, size); 2626 mp1->b_wptr += size; 2627 ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 2628 mp = mp->b_cont; 2629 } 2630 mp = mp1; 2631 } else { 2632 /* 2633 * Update the state indicating that the data has been consumed. 2634 * Keep SS_OOBPEND set until data is consumed past the mark. 2635 */ 2636 so->so_oobmsg = NULL; 2637 so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 2638 } 2639 dprintso(so, 1, 2640 ("after recvoob(%p): counts %d/%d state %s\n", 2641 so, so->so_oobsigcnt, 2642 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2643 ASSERT(so_verify_oobstate(so)); 2644 mutex_exit(&so->so_lock); 2645 2646 error = 0; 2647 nmp = mp; 2648 while (nmp != NULL && uiop->uio_resid > 0) { 2649 ssize_t n = MBLKL(nmp); 2650 2651 n = MIN(n, uiop->uio_resid); 2652 if (n > 0) 2653 error = uiomove(nmp->b_rptr, n, 2654 UIO_READ, uiop); 2655 if (error) 2656 break; 2657 nmp = nmp->b_cont; 2658 } 2659 freemsg(mp); 2660 return (error); 2661 } 2662 2663 /* 2664 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2665 * In addition, the caller typically verifies that there is some 2666 * potential state to clear by checking 2667 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2668 * before calling this routine. 2669 * Note that such a check can be made without holding so_lock since 2670 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2671 * decrements so_oobsigcnt. 2672 * 2673 * When data is read *after* the point that all pending 2674 * oob data has been consumed the oob indication is cleared. 2675 * 2676 * This logic keeps select/poll returning POLLRDBAND and 2677 * SIOCATMARK returning true until we have read past 2678 * the mark. 2679 */ 2680 static void 2681 sorecv_update_oobstate(struct sonode *so) 2682 { 2683 mutex_enter(&so->so_lock); 2684 ASSERT(so_verify_oobstate(so)); 2685 dprintso(so, 1, 2686 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2687 so->so_oobsigcnt, 2688 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2689 if (so->so_oobsigcnt == 0) { 2690 /* No more pending oob indications */ 2691 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2692 freemsg(so->so_oobmsg); 2693 so->so_oobmsg = NULL; 2694 } 2695 ASSERT(so_verify_oobstate(so)); 2696 mutex_exit(&so->so_lock); 2697 } 2698 2699 /* 2700 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2701 */ 2702 static int 2703 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2704 { 2705 int error = 0; 2706 mblk_t *tmp = NULL; 2707 mblk_t *pmp = NULL; 2708 mblk_t *nmp = so->so_nl7c_rcv_mp; 2709 2710 ASSERT(nmp != NULL); 2711 2712 while (nmp != NULL && uiop->uio_resid > 0) { 2713 ssize_t n; 2714 2715 if (DB_TYPE(nmp) == M_DATA) { 2716 /* 2717 * We have some data, uiomove up to resid bytes. 2718 */ 2719 n = MIN(MBLKL(nmp), uiop->uio_resid); 2720 if (n > 0) 2721 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2722 if (error) 2723 break; 2724 nmp->b_rptr += n; 2725 if (nmp->b_rptr == nmp->b_wptr) { 2726 pmp = nmp; 2727 nmp = nmp->b_cont; 2728 } 2729 } else { 2730 /* 2731 * We only handle data, save for caller to handle. 2732 */ 2733 if (pmp != NULL) { 2734 pmp->b_cont = nmp->b_cont; 2735 } 2736 nmp->b_cont = NULL; 2737 if (*rmp == NULL) { 2738 *rmp = nmp; 2739 } else { 2740 tmp->b_next = nmp; 2741 } 2742 nmp = nmp->b_cont; 2743 tmp = nmp; 2744 } 2745 } 2746 if (pmp != NULL) { 2747 /* Free any mblk_t(s) which we have consumed */ 2748 pmp->b_cont = NULL; 2749 freemsg(so->so_nl7c_rcv_mp); 2750 } 2751 if ((so->so_nl7c_rcv_mp = nmp) == NULL) { 2752 /* Last mblk_t so return the saved rval from kstrgetmsg() */ 2753 rp->r_vals = so->so_nl7c_rcv_rval; 2754 so->so_nl7c_rcv_rval = 0; 2755 } else { 2756 /* More mblk_t(s) to process so no rval to return */ 2757 rp->r_vals = 0; 2758 } 2759 return (error); 2760 } 2761 2762 /* 2763 * Receive the next message on the queue. 2764 * If msg_controllen is non-zero when called the caller is interested in 2765 * any received control info (options). 2766 * If msg_namelen is non-zero when called the caller is interested in 2767 * any received source address. 2768 * The routine returns with msg_control and msg_name pointing to 2769 * kmem_alloc'ed memory which the caller has to free. 2770 */ 2771 int 2772 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2773 { 2774 union T_primitives *tpr; 2775 mblk_t *mp; 2776 uchar_t pri; 2777 int pflag, opflag; 2778 void *control; 2779 t_uscalar_t controllen; 2780 t_uscalar_t namelen; 2781 int so_state = so->so_state; /* Snapshot */ 2782 ssize_t saved_resid; 2783 int error; 2784 rval_t rval; 2785 int flags; 2786 clock_t timout; 2787 int first; 2788 2789 flags = msg->msg_flags; 2790 msg->msg_flags = 0; 2791 2792 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2793 so, msg, flags, 2794 pr_state(so->so_state, so->so_mode), so->so_error)); 2795 2796 /* 2797 * If we are not connected because we have never been connected 2798 * we return ENOTCONN. If we have been connected (but are no longer 2799 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2800 * the EOF. 2801 * 2802 * An alternative would be to post an ENOTCONN error in stream head 2803 * (read+write) and clear it when we're connected. However, that error 2804 * would cause incorrect poll/select behavior! 2805 */ 2806 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2807 (so->so_mode & SM_CONNREQUIRED)) { 2808 return (ENOTCONN); 2809 } 2810 2811 /* 2812 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2813 * after checking that the read queue is empty) and returns zero. 2814 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2815 * is zero. 2816 */ 2817 2818 if (flags & MSG_OOB) { 2819 /* Check that the transport supports OOB */ 2820 if (!(so->so_mode & SM_EXDATA)) 2821 return (EOPNOTSUPP); 2822 return (sorecvoob(so, msg, uiop, flags)); 2823 } 2824 2825 /* 2826 * Set msg_controllen and msg_namelen to zero here to make it 2827 * simpler in the cases that no control or name is returned. 2828 */ 2829 controllen = msg->msg_controllen; 2830 namelen = msg->msg_namelen; 2831 msg->msg_controllen = 0; 2832 msg->msg_namelen = 0; 2833 2834 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2835 namelen, controllen)); 2836 2837 /* 2838 * If an NL7C enabled socket and not waiting for write data. 2839 */ 2840 mutex_enter(&so->so_lock); 2841 if ((so->so_nl7c_flags & (NL7C_ENABLED|NL7C_WAITWRITE)) == 2842 NL7C_ENABLED) { 2843 if (so->so_nl7c_uri) { 2844 /* 2845 * Close uri processing for a previous request. 2846 */ 2847 nl7c_close(so); 2848 } 2849 if (nl7c_process(so, 2850 (so->so_state & (SS_NONBLOCK|SS_NDELAY)), 2851 (int)((tcp_t *)so->so_priv)->tcp_mss)) { 2852 /* 2853 * NL7C has completed processing on the socket, 2854 * clear the enabled bit as no further NL7C 2855 * processing will be needed. 2856 */ 2857 so->so_nl7c_flags = 0; 2858 } 2859 } 2860 2861 /* 2862 * Only one reader is allowed at any given time. This is needed 2863 * for T_EXDATA handling and, in the future, MSG_WAITALL. 2864 * 2865 * This is slightly different that BSD behavior in that it fails with 2866 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 2867 * is single-threaded using sblock(), which is dropped while waiting 2868 * for data to appear. The difference shows up e.g. if one 2869 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 2870 * does use nonblocking io and different threads are reading each 2871 * file descriptor. In BSD there would never be an EWOULDBLOCK error 2872 * in this case as long as the read queue doesn't get empty. 2873 * In this implementation the thread using nonblocking io can 2874 * get an EWOULDBLOCK error due to the blocking thread executing 2875 * e.g. in the uiomove in kstrgetmsg. 2876 * This difference is not believed to be significant. 2877 */ 2878 error = so_lock_read_intr(so, uiop->uio_fmode); /* Set SOREADLOCKED */ 2879 mutex_exit(&so->so_lock); 2880 if (error) 2881 return (error); 2882 2883 /* 2884 * Tell kstrgetmsg to not inspect the stream head errors until all 2885 * queued data has been consumed. 2886 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 2887 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 2888 * 2889 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 2890 * to T_OPTDATA_IND that do not contain any user-visible control msg. 2891 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 2892 */ 2893 pflag = MSG_ANY | MSG_DELAYERROR; 2894 if (flags & MSG_PEEK) { 2895 pflag |= MSG_IPEEK; 2896 flags &= ~MSG_WAITALL; 2897 } 2898 if (so->so_mode & SM_ATOMIC) 2899 pflag |= MSG_DISCARDTAIL; 2900 2901 if (flags & MSG_DONTWAIT) 2902 timout = 0; 2903 else 2904 timout = -1; 2905 opflag = pflag; 2906 first = 1; 2907 2908 /* 2909 * If so saved NL7C rcv mblk_t(s) uiomove them first 2910 * else get'm from the streamhead. 2911 */ 2912 retry: 2913 saved_resid = uiop->uio_resid; 2914 pri = 0; 2915 mp = NULL; 2916 if (so->so_nl7c_rcv_mp != NULL) { 2917 error = nl7c_sorecv(so, &mp, uiop, &rval); 2918 } else { 2919 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 2920 timout, &rval); 2921 } 2922 if (error) { 2923 switch (error) { 2924 case EINTR: 2925 case EWOULDBLOCK: 2926 if (!first) 2927 error = 0; 2928 break; 2929 case ETIME: 2930 /* Returned from kstrgetmsg when timeout expires */ 2931 if (!first) 2932 error = 0; 2933 else 2934 error = EWOULDBLOCK; 2935 break; 2936 default: 2937 eprintsoline(so, error); 2938 break; 2939 } 2940 mutex_enter(&so->so_lock); 2941 so_unlock_read(so); /* Clear SOREADLOCKED */ 2942 mutex_exit(&so->so_lock); 2943 return (error); 2944 } 2945 /* 2946 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 2947 * For non-datagrams MOREDATA is used to set MSG_EOR. 2948 */ 2949 ASSERT(!(rval.r_val1 & MORECTL)); 2950 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 2951 msg->msg_flags |= MSG_TRUNC; 2952 2953 if (mp == NULL) { 2954 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 2955 /* 2956 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 2957 * The draft Posix socket spec states that the mark should 2958 * not be cleared when peeking. We follow the latter. 2959 */ 2960 if ((so->so_state & 2961 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 2962 (uiop->uio_resid != saved_resid) && 2963 !(flags & MSG_PEEK)) { 2964 sorecv_update_oobstate(so); 2965 } 2966 2967 mutex_enter(&so->so_lock); 2968 /* Set MSG_EOR based on MOREDATA */ 2969 if (!(rval.r_val1 & MOREDATA)) { 2970 if (so->so_state & SS_SAVEDEOR) { 2971 msg->msg_flags |= MSG_EOR; 2972 so->so_state &= ~SS_SAVEDEOR; 2973 } 2974 } 2975 /* 2976 * If some data was received (i.e. not EOF) and the 2977 * read/recv* has not been satisfied wait for some more. 2978 */ 2979 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 2980 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 2981 mutex_exit(&so->so_lock); 2982 first = 0; 2983 pflag = opflag | MSG_NOMARK; 2984 goto retry; 2985 } 2986 so_unlock_read(so); /* Clear SOREADLOCKED */ 2987 mutex_exit(&so->so_lock); 2988 return (0); 2989 } 2990 2991 /* strsock_proto has already verified length and alignment */ 2992 tpr = (union T_primitives *)mp->b_rptr; 2993 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 2994 2995 switch (tpr->type) { 2996 case T_DATA_IND: { 2997 if ((so->so_state & 2998 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 2999 (uiop->uio_resid != saved_resid) && 3000 !(flags & MSG_PEEK)) { 3001 sorecv_update_oobstate(so); 3002 } 3003 3004 /* 3005 * Set msg_flags to MSG_EOR based on 3006 * MORE_flag and MOREDATA. 3007 */ 3008 mutex_enter(&so->so_lock); 3009 so->so_state &= ~SS_SAVEDEOR; 3010 if (!(tpr->data_ind.MORE_flag & 1)) { 3011 if (!(rval.r_val1 & MOREDATA)) 3012 msg->msg_flags |= MSG_EOR; 3013 else 3014 so->so_state |= SS_SAVEDEOR; 3015 } 3016 freemsg(mp); 3017 /* 3018 * If some data was received (i.e. not EOF) and the 3019 * read/recv* has not been satisfied wait for some more. 3020 */ 3021 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3022 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3023 mutex_exit(&so->so_lock); 3024 first = 0; 3025 pflag = opflag | MSG_NOMARK; 3026 goto retry; 3027 } 3028 so_unlock_read(so); /* Clear SOREADLOCKED */ 3029 mutex_exit(&so->so_lock); 3030 return (0); 3031 } 3032 case T_UNITDATA_IND: { 3033 void *addr; 3034 t_uscalar_t addrlen; 3035 void *abuf; 3036 t_uscalar_t optlen; 3037 void *opt; 3038 3039 if ((so->so_state & 3040 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3041 (uiop->uio_resid != saved_resid) && 3042 !(flags & MSG_PEEK)) { 3043 sorecv_update_oobstate(so); 3044 } 3045 3046 if (namelen != 0) { 3047 /* Caller wants source address */ 3048 addrlen = tpr->unitdata_ind.SRC_length; 3049 addr = sogetoff(mp, 3050 tpr->unitdata_ind.SRC_offset, 3051 addrlen, 1); 3052 if (addr == NULL) { 3053 freemsg(mp); 3054 error = EPROTO; 3055 eprintsoline(so, error); 3056 goto err; 3057 } 3058 if (so->so_family == AF_UNIX) { 3059 /* 3060 * Can not use the transport level address. 3061 * If there is a SO_SRCADDR option carrying 3062 * the socket level address it will be 3063 * extracted below. 3064 */ 3065 addr = NULL; 3066 addrlen = 0; 3067 } 3068 } 3069 optlen = tpr->unitdata_ind.OPT_length; 3070 if (optlen != 0) { 3071 t_uscalar_t ncontrollen; 3072 3073 /* 3074 * Extract any source address option. 3075 * Determine how large cmsg buffer is needed. 3076 */ 3077 opt = sogetoff(mp, 3078 tpr->unitdata_ind.OPT_offset, 3079 optlen, __TPI_ALIGN_SIZE); 3080 3081 if (opt == NULL) { 3082 freemsg(mp); 3083 error = EPROTO; 3084 eprintsoline(so, error); 3085 goto err; 3086 } 3087 if (so->so_family == AF_UNIX) 3088 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3089 ncontrollen = so_cmsglen(mp, opt, optlen, 3090 !(flags & MSG_XPG4_2)); 3091 if (controllen != 0) 3092 controllen = ncontrollen; 3093 else if (ncontrollen != 0) 3094 msg->msg_flags |= MSG_CTRUNC; 3095 } else { 3096 controllen = 0; 3097 } 3098 3099 if (namelen != 0) { 3100 /* 3101 * Return address to caller. 3102 * Caller handles truncation if length 3103 * exceeds msg_namelen. 3104 * NOTE: AF_UNIX NUL termination is ensured by 3105 * the sender's copyin_name(). 3106 */ 3107 abuf = kmem_alloc(addrlen, KM_SLEEP); 3108 3109 bcopy(addr, abuf, addrlen); 3110 msg->msg_name = abuf; 3111 msg->msg_namelen = addrlen; 3112 } 3113 3114 if (controllen != 0) { 3115 /* 3116 * Return control msg to caller. 3117 * Caller handles truncation if length 3118 * exceeds msg_controllen. 3119 */ 3120 control = kmem_alloc(controllen, KM_SLEEP); 3121 3122 error = so_opt2cmsg(mp, opt, optlen, 3123 !(flags & MSG_XPG4_2), 3124 control, controllen); 3125 if (error) { 3126 freemsg(mp); 3127 if (msg->msg_namelen != 0) 3128 kmem_free(msg->msg_name, 3129 msg->msg_namelen); 3130 kmem_free(control, controllen); 3131 eprintsoline(so, error); 3132 goto err; 3133 } 3134 msg->msg_control = control; 3135 msg->msg_controllen = controllen; 3136 } 3137 3138 freemsg(mp); 3139 mutex_enter(&so->so_lock); 3140 so_unlock_read(so); /* Clear SOREADLOCKED */ 3141 mutex_exit(&so->so_lock); 3142 return (0); 3143 } 3144 case T_OPTDATA_IND: { 3145 struct T_optdata_req *tdr; 3146 void *opt; 3147 t_uscalar_t optlen; 3148 3149 if ((so->so_state & 3150 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3151 (uiop->uio_resid != saved_resid) && 3152 !(flags & MSG_PEEK)) { 3153 sorecv_update_oobstate(so); 3154 } 3155 3156 tdr = (struct T_optdata_req *)mp->b_rptr; 3157 optlen = tdr->OPT_length; 3158 if (optlen != 0) { 3159 t_uscalar_t ncontrollen; 3160 /* 3161 * Determine how large cmsg buffer is needed. 3162 */ 3163 opt = sogetoff(mp, 3164 tpr->optdata_ind.OPT_offset, 3165 optlen, __TPI_ALIGN_SIZE); 3166 3167 if (opt == NULL) { 3168 freemsg(mp); 3169 error = EPROTO; 3170 eprintsoline(so, error); 3171 goto err; 3172 } 3173 3174 ncontrollen = so_cmsglen(mp, opt, optlen, 3175 !(flags & MSG_XPG4_2)); 3176 if (controllen != 0) 3177 controllen = ncontrollen; 3178 else if (ncontrollen != 0) 3179 msg->msg_flags |= MSG_CTRUNC; 3180 } else { 3181 controllen = 0; 3182 } 3183 3184 if (controllen != 0) { 3185 /* 3186 * Return control msg to caller. 3187 * Caller handles truncation if length 3188 * exceeds msg_controllen. 3189 */ 3190 control = kmem_alloc(controllen, KM_SLEEP); 3191 3192 error = so_opt2cmsg(mp, opt, optlen, 3193 !(flags & MSG_XPG4_2), 3194 control, controllen); 3195 if (error) { 3196 freemsg(mp); 3197 kmem_free(control, controllen); 3198 eprintsoline(so, error); 3199 goto err; 3200 } 3201 msg->msg_control = control; 3202 msg->msg_controllen = controllen; 3203 } 3204 3205 /* 3206 * Set msg_flags to MSG_EOR based on 3207 * DATA_flag and MOREDATA. 3208 */ 3209 mutex_enter(&so->so_lock); 3210 so->so_state &= ~SS_SAVEDEOR; 3211 if (!(tpr->data_ind.MORE_flag & 1)) { 3212 if (!(rval.r_val1 & MOREDATA)) 3213 msg->msg_flags |= MSG_EOR; 3214 else 3215 so->so_state |= SS_SAVEDEOR; 3216 } 3217 freemsg(mp); 3218 /* 3219 * If some data was received (i.e. not EOF) and the 3220 * read/recv* has not been satisfied wait for some more. 3221 * Not possible to wait if control info was received. 3222 */ 3223 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3224 controllen == 0 && 3225 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3226 mutex_exit(&so->so_lock); 3227 first = 0; 3228 pflag = opflag | MSG_NOMARK; 3229 goto retry; 3230 } 3231 so_unlock_read(so); /* Clear SOREADLOCKED */ 3232 mutex_exit(&so->so_lock); 3233 return (0); 3234 } 3235 case T_EXDATA_IND: { 3236 dprintso(so, 1, 3237 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3238 "state %s\n", 3239 so->so_oobsigcnt, so->so_oobcnt, 3240 saved_resid - uiop->uio_resid, 3241 pr_state(so->so_state, so->so_mode))); 3242 /* 3243 * kstrgetmsg handles MSGMARK so there is nothing to 3244 * inspect in the T_EXDATA_IND. 3245 * strsock_proto makes the stream head queue the T_EXDATA_IND 3246 * as a separate message with no M_DATA component. Furthermore, 3247 * the stream head does not consolidate M_DATA messages onto 3248 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3249 * remains a message by itself. This is needed since MSGMARK 3250 * marks both the whole message as well as the last byte 3251 * of the message. 3252 */ 3253 freemsg(mp); 3254 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3255 if (flags & MSG_PEEK) { 3256 /* 3257 * Even though we are peeking we consume the 3258 * T_EXDATA_IND thereby moving the mark information 3259 * to SS_RCVATMARK. Then the oob code below will 3260 * retry the peeking kstrgetmsg. 3261 * Note that the stream head read queue is 3262 * never flushed without holding SOREADLOCKED 3263 * thus the T_EXDATA_IND can not disappear 3264 * underneath us. 3265 */ 3266 dprintso(so, 1, 3267 ("sotpi_recvmsg: consume EXDATA_IND " 3268 "counts %d/%d state %s\n", 3269 so->so_oobsigcnt, 3270 so->so_oobcnt, 3271 pr_state(so->so_state, so->so_mode))); 3272 3273 pflag = MSG_ANY | MSG_DELAYERROR; 3274 if (so->so_mode & SM_ATOMIC) 3275 pflag |= MSG_DISCARDTAIL; 3276 3277 pri = 0; 3278 mp = NULL; 3279 3280 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3281 &pri, &pflag, (clock_t)-1, &rval); 3282 ASSERT(uiop->uio_resid == saved_resid); 3283 3284 if (error) { 3285 #ifdef SOCK_DEBUG 3286 if (error != EWOULDBLOCK && error != EINTR) { 3287 eprintsoline(so, error); 3288 } 3289 #endif /* SOCK_DEBUG */ 3290 mutex_enter(&so->so_lock); 3291 so_unlock_read(so); /* Clear SOREADLOCKED */ 3292 mutex_exit(&so->so_lock); 3293 return (error); 3294 } 3295 ASSERT(mp); 3296 tpr = (union T_primitives *)mp->b_rptr; 3297 ASSERT(tpr->type == T_EXDATA_IND); 3298 freemsg(mp); 3299 } /* end "if (flags & MSG_PEEK)" */ 3300 3301 /* 3302 * Decrement the number of queued and pending oob. 3303 * 3304 * SS_RCVATMARK is cleared when we read past a mark. 3305 * SS_HAVEOOBDATA is cleared when we've read past the 3306 * last mark. 3307 * SS_OOBPEND is cleared if we've read past the last 3308 * mark and no (new) SIGURG has been posted. 3309 */ 3310 mutex_enter(&so->so_lock); 3311 ASSERT(so_verify_oobstate(so)); 3312 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 3313 ASSERT(so->so_oobsigcnt > 0); 3314 so->so_oobsigcnt--; 3315 ASSERT(so->so_oobcnt > 0); 3316 so->so_oobcnt--; 3317 /* 3318 * Since the T_EXDATA_IND has been removed from the stream 3319 * head, but we have not read data past the mark, 3320 * sockfs needs to track that the socket is still at the mark. 3321 * 3322 * Since no data was received call kstrgetmsg again to wait 3323 * for data. 3324 */ 3325 so->so_state |= SS_RCVATMARK; 3326 mutex_exit(&so->so_lock); 3327 dprintso(so, 1, 3328 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3329 so->so_oobsigcnt, so->so_oobcnt, 3330 pr_state(so->so_state, so->so_mode))); 3331 pflag = opflag; 3332 goto retry; 3333 } 3334 default: 3335 ASSERT(0); 3336 freemsg(mp); 3337 error = EPROTO; 3338 eprintsoline(so, error); 3339 goto err; 3340 } 3341 /* NOTREACHED */ 3342 err: 3343 mutex_enter(&so->so_lock); 3344 so_unlock_read(so); /* Clear SOREADLOCKED */ 3345 mutex_exit(&so->so_lock); 3346 return (error); 3347 } 3348 3349 /* 3350 * Sending data with options on a datagram socket. 3351 * Assumes caller has verified that SS_ISBOUND etc. are set. 3352 */ 3353 static int 3354 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3355 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3356 { 3357 struct T_unitdata_req tudr; 3358 mblk_t *mp; 3359 int error; 3360 void *addr; 3361 socklen_t addrlen; 3362 void *src; 3363 socklen_t srclen; 3364 ssize_t len; 3365 int size; 3366 struct T_opthdr toh; 3367 struct fdbuf *fdbuf; 3368 t_uscalar_t optlen; 3369 void *fds; 3370 int fdlen; 3371 3372 ASSERT(name && namelen); 3373 ASSERT(control && controllen); 3374 3375 len = uiop->uio_resid; 3376 if (len > (ssize_t)so->so_tidu_size) { 3377 return (EMSGSIZE); 3378 } 3379 3380 /* 3381 * For AF_UNIX the destination address is translated to an internal 3382 * name and the source address is passed as an option. 3383 * Also, file descriptors are passed as file pointers in an 3384 * option. 3385 */ 3386 3387 /* 3388 * Length and family checks. 3389 */ 3390 error = so_addr_verify(so, name, namelen); 3391 if (error) { 3392 eprintsoline(so, error); 3393 return (error); 3394 } 3395 if (so->so_family == AF_UNIX) { 3396 if (so->so_state & SS_FADDR_NOXLATE) { 3397 /* 3398 * Already have a transport internal address. Do not 3399 * pass any (transport internal) source address. 3400 */ 3401 addr = name; 3402 addrlen = namelen; 3403 src = NULL; 3404 srclen = 0; 3405 } else { 3406 /* 3407 * Pass the sockaddr_un source address as an option 3408 * and translate the remote address. 3409 * 3410 * Note that this code does not prevent so_laddr_sa 3411 * from changing while it is being used. Thus 3412 * if an unbind+bind occurs concurrently with this 3413 * send the peer might see a partially new and a 3414 * partially old "from" address. 3415 */ 3416 src = so->so_laddr_sa; 3417 srclen = (t_uscalar_t)so->so_laddr_len; 3418 dprintso(so, 1, 3419 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3420 srclen, src)); 3421 error = so_ux_addr_xlate(so, name, namelen, 3422 (flags & MSG_XPG4_2), 3423 &addr, &addrlen); 3424 if (error) { 3425 eprintsoline(so, error); 3426 return (error); 3427 } 3428 } 3429 } else { 3430 addr = name; 3431 addrlen = namelen; 3432 src = NULL; 3433 srclen = 0; 3434 } 3435 optlen = so_optlen(control, controllen, 3436 !(flags & MSG_XPG4_2)); 3437 tudr.PRIM_type = T_UNITDATA_REQ; 3438 tudr.DEST_length = addrlen; 3439 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3440 if (srclen != 0) 3441 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3442 _TPI_ALIGN_TOPT(srclen)); 3443 else 3444 tudr.OPT_length = optlen; 3445 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3446 _TPI_ALIGN_TOPT(addrlen)); 3447 3448 size = tudr.OPT_offset + tudr.OPT_length; 3449 3450 /* 3451 * File descriptors only when SM_FDPASSING set. 3452 */ 3453 error = so_getfdopt(control, controllen, 3454 !(flags & MSG_XPG4_2), &fds, &fdlen); 3455 if (error) 3456 return (error); 3457 if (fdlen != -1) { 3458 if (!(so->so_mode & SM_FDPASSING)) 3459 return (EOPNOTSUPP); 3460 3461 error = fdbuf_create(fds, fdlen, &fdbuf); 3462 if (error) 3463 return (error); 3464 mp = fdbuf_allocmsg(size, fdbuf); 3465 } else { 3466 mp = soallocproto(size, _ALLOC_INTR); 3467 if (mp == NULL) { 3468 /* 3469 * Caught a signal waiting for memory. 3470 * Let send* return EINTR. 3471 */ 3472 return (EINTR); 3473 } 3474 } 3475 soappendmsg(mp, &tudr, sizeof (tudr)); 3476 soappendmsg(mp, addr, addrlen); 3477 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3478 3479 if (fdlen != -1) { 3480 ASSERT(fdbuf != NULL); 3481 toh.level = SOL_SOCKET; 3482 toh.name = SO_FILEP; 3483 toh.len = fdbuf->fd_size + 3484 (t_uscalar_t)sizeof (struct T_opthdr); 3485 toh.status = 0; 3486 soappendmsg(mp, &toh, sizeof (toh)); 3487 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3488 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3489 } 3490 if (srclen != 0) { 3491 /* 3492 * There is a AF_UNIX sockaddr_un to include as a source 3493 * address option. 3494 */ 3495 toh.level = SOL_SOCKET; 3496 toh.name = SO_SRCADDR; 3497 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3498 toh.status = 0; 3499 soappendmsg(mp, &toh, sizeof (toh)); 3500 soappendmsg(mp, src, srclen); 3501 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3502 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3503 } 3504 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3505 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3506 /* At most 3 bytes left in the message */ 3507 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3508 ASSERT(MBLKL(mp) <= (ssize_t)size); 3509 3510 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3511 #ifdef C2_AUDIT 3512 if (audit_active) 3513 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3514 #endif /* C2_AUDIT */ 3515 3516 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3517 #ifdef SOCK_DEBUG 3518 if (error) { 3519 eprintsoline(so, error); 3520 } 3521 #endif /* SOCK_DEBUG */ 3522 return (error); 3523 } 3524 3525 /* 3526 * Sending data with options on a connected stream socket. 3527 * Assumes caller has verified that SS_ISCONNECTED is set. 3528 */ 3529 static int 3530 sosend_svccmsg(struct sonode *so, 3531 struct uio *uiop, 3532 int more, 3533 void *control, 3534 t_uscalar_t controllen, 3535 int flags) 3536 { 3537 struct T_optdata_req tdr; 3538 mblk_t *mp; 3539 int error; 3540 ssize_t iosize; 3541 int first = 1; 3542 int size; 3543 struct fdbuf *fdbuf; 3544 t_uscalar_t optlen; 3545 void *fds; 3546 int fdlen; 3547 struct T_opthdr toh; 3548 3549 dprintso(so, 1, 3550 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3551 3552 /* 3553 * Has to be bound and connected. However, since no locks are 3554 * held the state could have changed after sotpi_sendmsg checked it 3555 * thus it is not possible to ASSERT on the state. 3556 */ 3557 3558 /* Options on connection-oriented only when SM_OPTDATA set. */ 3559 if (!(so->so_mode & SM_OPTDATA)) 3560 return (EOPNOTSUPP); 3561 3562 do { 3563 /* 3564 * Set the MORE flag if uio_resid does not fit in this 3565 * message or if the caller passed in "more". 3566 * Error for transports with zero tidu_size. 3567 */ 3568 tdr.PRIM_type = T_OPTDATA_REQ; 3569 iosize = so->so_tidu_size; 3570 if (iosize <= 0) 3571 return (EMSGSIZE); 3572 if (uiop->uio_resid > iosize) { 3573 tdr.DATA_flag = 1; 3574 } else { 3575 if (more) 3576 tdr.DATA_flag = 1; 3577 else 3578 tdr.DATA_flag = 0; 3579 iosize = uiop->uio_resid; 3580 } 3581 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3582 tdr.DATA_flag, iosize)); 3583 3584 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3585 tdr.OPT_length = optlen; 3586 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3587 3588 size = (int)sizeof (tdr) + optlen; 3589 /* 3590 * File descriptors only when SM_FDPASSING set. 3591 */ 3592 error = so_getfdopt(control, controllen, 3593 !(flags & MSG_XPG4_2), &fds, &fdlen); 3594 if (error) 3595 return (error); 3596 if (fdlen != -1) { 3597 if (!(so->so_mode & SM_FDPASSING)) 3598 return (EOPNOTSUPP); 3599 3600 error = fdbuf_create(fds, fdlen, &fdbuf); 3601 if (error) 3602 return (error); 3603 mp = fdbuf_allocmsg(size, fdbuf); 3604 } else { 3605 mp = soallocproto(size, _ALLOC_INTR); 3606 if (mp == NULL) { 3607 /* 3608 * Caught a signal waiting for memory. 3609 * Let send* return EINTR. 3610 */ 3611 return (first ? EINTR : 0); 3612 } 3613 } 3614 soappendmsg(mp, &tdr, sizeof (tdr)); 3615 3616 if (fdlen != -1) { 3617 ASSERT(fdbuf != NULL); 3618 toh.level = SOL_SOCKET; 3619 toh.name = SO_FILEP; 3620 toh.len = fdbuf->fd_size + 3621 (t_uscalar_t)sizeof (struct T_opthdr); 3622 toh.status = 0; 3623 soappendmsg(mp, &toh, sizeof (toh)); 3624 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3625 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3626 } 3627 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3628 /* At most 3 bytes left in the message */ 3629 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3630 ASSERT(MBLKL(mp) <= (ssize_t)size); 3631 3632 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3633 3634 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3635 0, MSG_BAND, 0); 3636 if (error) { 3637 if (!first && error == EWOULDBLOCK) 3638 return (0); 3639 eprintsoline(so, error); 3640 return (error); 3641 } 3642 control = NULL; 3643 first = 0; 3644 if (uiop->uio_resid > 0) { 3645 /* 3646 * Recheck for fatal errors. Fail write even though 3647 * some data have been written. This is consistent 3648 * with strwrite semantics and BSD sockets semantics. 3649 */ 3650 if (so->so_state & SS_CANTSENDMORE) { 3651 tsignal(curthread, SIGPIPE); 3652 eprintsoline(so, error); 3653 return (EPIPE); 3654 } 3655 if (so->so_error != 0) { 3656 mutex_enter(&so->so_lock); 3657 error = sogeterr(so); 3658 mutex_exit(&so->so_lock); 3659 if (error != 0) { 3660 eprintsoline(so, error); 3661 return (error); 3662 } 3663 } 3664 } 3665 } while (uiop->uio_resid > 0); 3666 return (0); 3667 } 3668 3669 /* 3670 * Sending data on a datagram socket. 3671 * Assumes caller has verified that SS_ISBOUND etc. are set. 3672 * 3673 * For AF_UNIX the destination address is translated to an internal 3674 * name and the source address is passed as an option. 3675 */ 3676 int 3677 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3678 struct uio *uiop, int flags) 3679 { 3680 struct T_unitdata_req tudr; 3681 mblk_t *mp; 3682 int error; 3683 void *addr; 3684 socklen_t addrlen; 3685 void *src; 3686 socklen_t srclen; 3687 ssize_t len; 3688 3689 ASSERT(name != NULL && namelen != 0); 3690 3691 len = uiop->uio_resid; 3692 if (len > so->so_tidu_size) { 3693 error = EMSGSIZE; 3694 goto done; 3695 } 3696 3697 /* Length and family checks */ 3698 error = so_addr_verify(so, name, namelen); 3699 if (error != 0) 3700 goto done; 3701 3702 if (so->so_state & SS_DIRECT) 3703 return (sodgram_direct(so, name, namelen, uiop, flags)); 3704 3705 if (so->so_family == AF_UNIX) { 3706 if (so->so_state & SS_FADDR_NOXLATE) { 3707 /* 3708 * Already have a transport internal address. Do not 3709 * pass any (transport internal) source address. 3710 */ 3711 addr = name; 3712 addrlen = namelen; 3713 src = NULL; 3714 srclen = 0; 3715 } else { 3716 /* 3717 * Pass the sockaddr_un source address as an option 3718 * and translate the remote address. 3719 * 3720 * Note that this code does not prevent so_laddr_sa 3721 * from changing while it is being used. Thus 3722 * if an unbind+bind occurs concurrently with this 3723 * send the peer might see a partially new and a 3724 * partially old "from" address. 3725 */ 3726 src = so->so_laddr_sa; 3727 srclen = (socklen_t)so->so_laddr_len; 3728 dprintso(so, 1, 3729 ("sosend_dgram UNIX: srclen %d, src %p\n", 3730 srclen, src)); 3731 error = so_ux_addr_xlate(so, name, namelen, 3732 (flags & MSG_XPG4_2), 3733 &addr, &addrlen); 3734 if (error) { 3735 eprintsoline(so, error); 3736 goto done; 3737 } 3738 } 3739 } else { 3740 addr = name; 3741 addrlen = namelen; 3742 src = NULL; 3743 srclen = 0; 3744 } 3745 tudr.PRIM_type = T_UNITDATA_REQ; 3746 tudr.DEST_length = addrlen; 3747 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3748 if (srclen == 0) { 3749 tudr.OPT_length = 0; 3750 tudr.OPT_offset = 0; 3751 3752 mp = soallocproto2(&tudr, sizeof (tudr), 3753 addr, addrlen, 0, _ALLOC_INTR); 3754 if (mp == NULL) { 3755 /* 3756 * Caught a signal waiting for memory. 3757 * Let send* return EINTR. 3758 */ 3759 error = EINTR; 3760 goto done; 3761 } 3762 } else { 3763 /* 3764 * There is a AF_UNIX sockaddr_un to include as a source 3765 * address option. 3766 */ 3767 struct T_opthdr toh; 3768 ssize_t size; 3769 3770 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3771 _TPI_ALIGN_TOPT(srclen)); 3772 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3773 _TPI_ALIGN_TOPT(addrlen)); 3774 3775 toh.level = SOL_SOCKET; 3776 toh.name = SO_SRCADDR; 3777 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3778 toh.status = 0; 3779 3780 size = tudr.OPT_offset + tudr.OPT_length; 3781 mp = soallocproto2(&tudr, sizeof (tudr), 3782 addr, addrlen, size, _ALLOC_INTR); 3783 if (mp == NULL) { 3784 /* 3785 * Caught a signal waiting for memory. 3786 * Let send* return EINTR. 3787 */ 3788 error = EINTR; 3789 goto done; 3790 } 3791 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3792 soappendmsg(mp, &toh, sizeof (toh)); 3793 soappendmsg(mp, src, srclen); 3794 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3795 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3796 } 3797 3798 #ifdef C2_AUDIT 3799 if (audit_active) 3800 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3801 #endif /* C2_AUDIT */ 3802 3803 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3804 done: 3805 #ifdef SOCK_DEBUG 3806 if (error) { 3807 eprintsoline(so, error); 3808 } 3809 #endif /* SOCK_DEBUG */ 3810 return (error); 3811 } 3812 3813 /* 3814 * Sending data on a connected stream socket. 3815 * Assumes caller has verified that SS_ISCONNECTED is set. 3816 */ 3817 int 3818 sosend_svc(struct sonode *so, 3819 struct uio *uiop, 3820 t_scalar_t prim, 3821 int more, 3822 int sflag) 3823 { 3824 struct T_data_req tdr; 3825 mblk_t *mp; 3826 int error; 3827 ssize_t iosize; 3828 int first = 1; 3829 3830 dprintso(so, 1, 3831 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 3832 so, uiop->uio_resid, prim, sflag)); 3833 3834 /* 3835 * Has to be bound and connected. However, since no locks are 3836 * held the state could have changed after sotpi_sendmsg checked it 3837 * thus it is not possible to ASSERT on the state. 3838 */ 3839 3840 do { 3841 /* 3842 * Set the MORE flag if uio_resid does not fit in this 3843 * message or if the caller passed in "more". 3844 * Error for transports with zero tidu_size. 3845 */ 3846 tdr.PRIM_type = prim; 3847 iosize = so->so_tidu_size; 3848 if (iosize <= 0) 3849 return (EMSGSIZE); 3850 if (uiop->uio_resid > iosize) { 3851 tdr.MORE_flag = 1; 3852 } else { 3853 if (more) 3854 tdr.MORE_flag = 1; 3855 else 3856 tdr.MORE_flag = 0; 3857 iosize = uiop->uio_resid; 3858 } 3859 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 3860 prim, tdr.MORE_flag, iosize)); 3861 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 3862 if (mp == NULL) { 3863 /* 3864 * Caught a signal waiting for memory. 3865 * Let send* return EINTR. 3866 */ 3867 if (first) 3868 return (EINTR); 3869 else 3870 return (0); 3871 } 3872 3873 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3874 0, sflag | MSG_BAND, 0); 3875 if (error) { 3876 if (!first && error == EWOULDBLOCK) 3877 return (0); 3878 eprintsoline(so, error); 3879 return (error); 3880 } 3881 first = 0; 3882 if (uiop->uio_resid > 0) { 3883 /* 3884 * Recheck for fatal errors. Fail write even though 3885 * some data have been written. This is consistent 3886 * with strwrite semantics and BSD sockets semantics. 3887 */ 3888 if (so->so_state & SS_CANTSENDMORE) { 3889 tsignal(curthread, SIGPIPE); 3890 eprintsoline(so, error); 3891 return (EPIPE); 3892 } 3893 if (so->so_error != 0) { 3894 mutex_enter(&so->so_lock); 3895 error = sogeterr(so); 3896 mutex_exit(&so->so_lock); 3897 if (error != 0) { 3898 eprintsoline(so, error); 3899 return (error); 3900 } 3901 } 3902 } 3903 } while (uiop->uio_resid > 0); 3904 return (0); 3905 } 3906 3907 /* 3908 * Check the state for errors and call the appropriate send function. 3909 * 3910 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 3911 * this function issues a setsockopt to toggle SO_DONTROUTE before and 3912 * after sending the message. 3913 */ 3914 static int 3915 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 3916 { 3917 int so_state; 3918 int so_mode; 3919 int error; 3920 struct sockaddr *name; 3921 t_uscalar_t namelen; 3922 int dontroute; 3923 int flags; 3924 3925 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 3926 so, msg, msg->msg_flags, 3927 pr_state(so->so_state, so->so_mode), so->so_error)); 3928 3929 mutex_enter(&so->so_lock); 3930 so_state = so->so_state; 3931 3932 if (so_state & SS_CANTSENDMORE) { 3933 mutex_exit(&so->so_lock); 3934 tsignal(curthread, SIGPIPE); 3935 return (EPIPE); 3936 } 3937 3938 if (so->so_error != 0) { 3939 error = sogeterr(so); 3940 if (error != 0) { 3941 mutex_exit(&so->so_lock); 3942 return (error); 3943 } 3944 } 3945 3946 name = (struct sockaddr *)msg->msg_name; 3947 namelen = msg->msg_namelen; 3948 3949 so_mode = so->so_mode; 3950 3951 if (name == NULL) { 3952 if (!(so_state & SS_ISCONNECTED)) { 3953 mutex_exit(&so->so_lock); 3954 if (so_mode & SM_CONNREQUIRED) 3955 return (ENOTCONN); 3956 else 3957 return (EDESTADDRREQ); 3958 } 3959 if (so_mode & SM_CONNREQUIRED) { 3960 name = NULL; 3961 namelen = 0; 3962 } else { 3963 /* 3964 * Note that this code does not prevent so_faddr_sa 3965 * from changing while it is being used. Thus 3966 * if an "unconnect"+connect occurs concurrently with 3967 * this send the datagram might be delivered to a 3968 * garbaled address. 3969 */ 3970 ASSERT(so->so_faddr_sa); 3971 name = so->so_faddr_sa; 3972 namelen = (t_uscalar_t)so->so_faddr_len; 3973 } 3974 } else { 3975 if (!(so_state & SS_ISCONNECTED) && 3976 (so_mode & SM_CONNREQUIRED)) { 3977 /* Required but not connected */ 3978 mutex_exit(&so->so_lock); 3979 return (ENOTCONN); 3980 } 3981 /* 3982 * Ignore the address on connection-oriented sockets. 3983 * Just like BSD this code does not generate an error for 3984 * TCP (a CONNREQUIRED socket) when sending to an address 3985 * passed in with sendto/sendmsg. Instead the data is 3986 * delivered on the connection as if no address had been 3987 * supplied. 3988 */ 3989 if ((so_state & SS_ISCONNECTED) && 3990 !(so_mode & SM_CONNREQUIRED)) { 3991 mutex_exit(&so->so_lock); 3992 return (EISCONN); 3993 } 3994 if (!(so_state & SS_ISBOUND)) { 3995 so_lock_single(so); /* Set SOLOCKED */ 3996 error = sotpi_bind(so, NULL, 0, 3997 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 3998 so_unlock_single(so, SOLOCKED); 3999 if (error) { 4000 mutex_exit(&so->so_lock); 4001 eprintsoline(so, error); 4002 return (error); 4003 } 4004 } 4005 /* 4006 * Handle delayed datagram errors. These are only queued 4007 * when the application sets SO_DGRAM_ERRIND. 4008 * Return the error if we are sending to the address 4009 * that was returned in the last T_UDERROR_IND. 4010 * If sending to some other address discard the delayed 4011 * error indication. 4012 */ 4013 if (so->so_delayed_error) { 4014 struct T_uderror_ind *tudi; 4015 void *addr; 4016 t_uscalar_t addrlen; 4017 boolean_t match = B_FALSE; 4018 4019 ASSERT(so->so_eaddr_mp); 4020 error = so->so_delayed_error; 4021 so->so_delayed_error = 0; 4022 tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; 4023 addrlen = tudi->DEST_length; 4024 addr = sogetoff(so->so_eaddr_mp, 4025 tudi->DEST_offset, 4026 addrlen, 1); 4027 ASSERT(addr); /* Checked by strsock_proto */ 4028 switch (so->so_family) { 4029 case AF_INET: { 4030 /* Compare just IP address and port */ 4031 sin_t *sin1 = (sin_t *)name; 4032 sin_t *sin2 = (sin_t *)addr; 4033 4034 if (addrlen == sizeof (sin_t) && 4035 namelen == addrlen && 4036 sin1->sin_port == sin2->sin_port && 4037 sin1->sin_addr.s_addr == 4038 sin2->sin_addr.s_addr) 4039 match = B_TRUE; 4040 break; 4041 } 4042 case AF_INET6: { 4043 /* Compare just IP address and port. Not flow */ 4044 sin6_t *sin1 = (sin6_t *)name; 4045 sin6_t *sin2 = (sin6_t *)addr; 4046 4047 if (addrlen == sizeof (sin6_t) && 4048 namelen == addrlen && 4049 sin1->sin6_port == sin2->sin6_port && 4050 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4051 &sin2->sin6_addr)) 4052 match = B_TRUE; 4053 break; 4054 } 4055 case AF_UNIX: 4056 default: 4057 if (namelen == addrlen && 4058 bcmp(name, addr, namelen) == 0) 4059 match = B_TRUE; 4060 } 4061 if (match) { 4062 freemsg(so->so_eaddr_mp); 4063 so->so_eaddr_mp = NULL; 4064 mutex_exit(&so->so_lock); 4065 #ifdef DEBUG 4066 dprintso(so, 0, 4067 ("sockfs delayed error %d for %s\n", 4068 error, 4069 pr_addr(so->so_family, name, namelen))); 4070 #endif /* DEBUG */ 4071 return (error); 4072 } 4073 freemsg(so->so_eaddr_mp); 4074 so->so_eaddr_mp = NULL; 4075 } 4076 } 4077 mutex_exit(&so->so_lock); 4078 4079 flags = msg->msg_flags; 4080 dontroute = 0; 4081 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4082 uint32_t val; 4083 4084 val = 1; 4085 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4086 &val, (t_uscalar_t)sizeof (val)); 4087 if (error) 4088 return (error); 4089 dontroute = 1; 4090 } 4091 4092 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4093 error = EOPNOTSUPP; 4094 goto done; 4095 } 4096 if (msg->msg_controllen != 0) { 4097 if (!(so_mode & SM_CONNREQUIRED)) { 4098 error = sosend_dgramcmsg(so, name, namelen, uiop, 4099 msg->msg_control, msg->msg_controllen, flags); 4100 } else { 4101 if (flags & MSG_OOB) { 4102 /* Can't generate T_EXDATA_REQ with options */ 4103 error = EOPNOTSUPP; 4104 goto done; 4105 } 4106 error = sosend_svccmsg(so, uiop, 4107 !(flags & MSG_EOR), 4108 msg->msg_control, msg->msg_controllen, 4109 flags); 4110 } 4111 goto done; 4112 } 4113 4114 if (!(so_mode & SM_CONNREQUIRED)) { 4115 /* 4116 * If there is no SO_DONTROUTE to turn off return immediately 4117 * from send_dgram. This can allow tail-call optimizations. 4118 */ 4119 if (!dontroute) { 4120 return (sosend_dgram(so, name, namelen, uiop, flags)); 4121 } 4122 error = sosend_dgram(so, name, namelen, uiop, flags); 4123 } else { 4124 t_scalar_t prim; 4125 int sflag; 4126 4127 /* Ignore msg_name in the connected state */ 4128 if (flags & MSG_OOB) { 4129 prim = T_EXDATA_REQ; 4130 /* 4131 * Send down T_EXDATA_REQ even if there is flow 4132 * control for data. 4133 */ 4134 sflag = MSG_IGNFLOW; 4135 } else { 4136 if (so_mode & SM_BYTESTREAM) { 4137 /* Byte stream transport - use write */ 4138 4139 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4140 /* 4141 * If there is no SO_DONTROUTE to turn off, 4142 * SS_DIRECT is on, and there is no flow 4143 * control, we can take the fast path. 4144 */ 4145 if (!dontroute && 4146 (so_state & SS_DIRECT) && 4147 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4148 return (sostream_direct(so, uiop, 4149 NULL, CRED())); 4150 } 4151 error = strwrite(SOTOV(so), uiop, CRED()); 4152 goto done; 4153 } 4154 prim = T_DATA_REQ; 4155 sflag = 0; 4156 } 4157 /* 4158 * If there is no SO_DONTROUTE to turn off return immediately 4159 * from sosend_svc. This can allow tail-call optimizations. 4160 */ 4161 if (!dontroute) 4162 return (sosend_svc(so, uiop, prim, 4163 !(flags & MSG_EOR), sflag)); 4164 error = sosend_svc(so, uiop, prim, 4165 !(flags & MSG_EOR), sflag); 4166 } 4167 ASSERT(dontroute); 4168 done: 4169 if (dontroute) { 4170 uint32_t val; 4171 4172 val = 0; 4173 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4174 &val, (t_uscalar_t)sizeof (val)); 4175 } 4176 return (error); 4177 } 4178 4179 /* 4180 * Sending data on a datagram socket. 4181 * Assumes caller has verified that SS_ISBOUND etc. are set. 4182 */ 4183 /* ARGSUSED */ 4184 static int 4185 sodgram_direct(struct sonode *so, struct sockaddr *name, 4186 socklen_t namelen, struct uio *uiop, int flags) 4187 { 4188 struct T_unitdata_req tudr; 4189 mblk_t *mp; 4190 int error = 0; 4191 void *addr; 4192 socklen_t addrlen; 4193 ssize_t len; 4194 struct stdata *stp = SOTOV(so)->v_stream; 4195 int so_state; 4196 queue_t *udp_wq; 4197 4198 ASSERT(name != NULL && namelen != 0); 4199 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4200 ASSERT(!(so->so_mode & SM_EXDATA)); 4201 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4202 ASSERT(SOTOV(so)->v_type == VSOCK); 4203 4204 /* Caller checked for proper length */ 4205 len = uiop->uio_resid; 4206 ASSERT(len <= so->so_tidu_size); 4207 4208 /* Length and family checks have been done by caller */ 4209 ASSERT(name->sa_family == so->so_family); 4210 ASSERT(so->so_family == AF_INET || 4211 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4212 ASSERT(so->so_family == AF_INET6 || 4213 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4214 4215 addr = name; 4216 addrlen = namelen; 4217 4218 if (stp->sd_sidp != NULL && 4219 (error = straccess(stp, JCWRITE)) != 0) 4220 goto done; 4221 4222 so_state = so->so_state; 4223 4224 /* 4225 * For UDP we don't break up the copyin into smaller pieces 4226 * as in the TCP case. That means if ENOMEM is returned by 4227 * mcopyinuio() then the uio vector has not been modified at 4228 * all and we fallback to either strwrite() or kstrputmsg() 4229 * below. Note also that we never generate priority messages 4230 * from here. 4231 */ 4232 udp_wq = stp->sd_wrq->q_next; 4233 if (canput(udp_wq) && 4234 (mp = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4235 ASSERT(DB_TYPE(mp) == M_DATA); 4236 ASSERT(uiop->uio_resid == 0); 4237 #ifdef C2_AUDIT 4238 if (audit_active) 4239 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4240 #endif /* C2_AUDIT */ 4241 udp_wput_data(udp_wq, mp, addr, addrlen); 4242 return (0); 4243 } 4244 if (error != 0 && error != ENOMEM) 4245 return (error); 4246 4247 /* 4248 * For connected, let strwrite() handle the blocking case. 4249 * Otherwise we fall thru and use kstrputmsg(). 4250 */ 4251 if (so_state & SS_ISCONNECTED) 4252 return (strwrite(SOTOV(so), uiop, CRED())); 4253 4254 tudr.PRIM_type = T_UNITDATA_REQ; 4255 tudr.DEST_length = addrlen; 4256 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4257 tudr.OPT_length = 0; 4258 tudr.OPT_offset = 0; 4259 4260 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, _ALLOC_INTR); 4261 if (mp == NULL) { 4262 /* 4263 * Caught a signal waiting for memory. 4264 * Let send* return EINTR. 4265 */ 4266 error = EINTR; 4267 goto done; 4268 } 4269 4270 #ifdef C2_AUDIT 4271 if (audit_active) 4272 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4273 #endif /* C2_AUDIT */ 4274 4275 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4276 done: 4277 #ifdef SOCK_DEBUG 4278 if (error != 0) { 4279 eprintsoline(so, error); 4280 } 4281 #endif /* SOCK_DEBUG */ 4282 return (error); 4283 } 4284 4285 int 4286 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4287 { 4288 struct stdata *stp = SOTOV(so)->v_stream; 4289 ssize_t iosize, rmax, maxblk; 4290 queue_t *tcp_wq = stp->sd_wrq->q_next; 4291 int error = 0, wflag = 0; 4292 4293 ASSERT(so->so_mode & SM_BYTESTREAM); 4294 ASSERT(SOTOV(so)->v_type == VSOCK); 4295 4296 if (stp->sd_sidp != NULL && 4297 (error = straccess(stp, JCWRITE)) != 0) 4298 return (error); 4299 4300 if (uiop == NULL) { 4301 /* 4302 * kstrwritemp() should have checked sd_flag and 4303 * flow-control before coming here. If we end up 4304 * here it means that we can simply pass down the 4305 * data to tcp. 4306 */ 4307 ASSERT(mp != NULL); 4308 tcp_wput(tcp_wq, mp); 4309 return (0); 4310 } 4311 4312 /* Fallback to strwrite() to do proper error handling */ 4313 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4314 return (strwrite(SOTOV(so), uiop, cr)); 4315 4316 rmax = stp->sd_qn_maxpsz; 4317 ASSERT(rmax >= 0 || rmax == INFPSZ); 4318 if (rmax == 0 || uiop->uio_resid <= 0) 4319 return (0); 4320 4321 if (rmax == INFPSZ) 4322 rmax = uiop->uio_resid; 4323 4324 maxblk = stp->sd_maxblk; 4325 4326 for (;;) { 4327 iosize = MIN(uiop->uio_resid, rmax); 4328 4329 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4330 if (mp == NULL) { 4331 /* 4332 * Fallback to strwrite() for ENOMEM; if this 4333 * is our first time in this routine and the uio 4334 * vector has not been modified, we will end up 4335 * calling strwrite() without any flag set. 4336 */ 4337 if (error == ENOMEM) 4338 goto slow_send; 4339 else 4340 return (error); 4341 } 4342 ASSERT(uiop->uio_resid >= 0); 4343 /* 4344 * If mp is non-NULL and ENOMEM is set, it means that 4345 * mcopyinuio() was able to break down some of the user 4346 * data into one or more mblks. Send the partial data 4347 * to tcp and let the rest be handled in strwrite(). 4348 */ 4349 ASSERT(error == 0 || error == ENOMEM); 4350 tcp_wput(tcp_wq, mp); 4351 4352 wflag |= NOINTR; 4353 4354 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4355 ASSERT(error == 0); 4356 break; 4357 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4358 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4359 slow_send: 4360 /* 4361 * We were able to send down partial data using 4362 * the direct call interface, but are now relying 4363 * on strwrite() to handle the non-fastpath cases. 4364 * If the socket is blocking we will sleep in 4365 * strwaitq() until write is permitted, otherwise, 4366 * we will need to return the amount of bytes 4367 * written so far back to the app. This is the 4368 * reason why we pass NOINTR flag to strwrite() 4369 * for non-blocking socket, because we don't want 4370 * to return EAGAIN when portion of the user data 4371 * has actually been sent down. 4372 */ 4373 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4374 } 4375 } 4376 return (0); 4377 } 4378 4379 /* 4380 * Update so_faddr by asking the transport (unless AF_UNIX). 4381 */ 4382 int 4383 sotpi_getpeername(struct sonode *so) 4384 { 4385 struct strbuf strbuf; 4386 int error = 0, res; 4387 void *addr; 4388 t_uscalar_t addrlen; 4389 k_sigset_t smask; 4390 4391 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4392 so, pr_state(so->so_state, so->so_mode))); 4393 4394 mutex_enter(&so->so_lock); 4395 so_lock_single(so); /* Set SOLOCKED */ 4396 if (!(so->so_state & SS_ISCONNECTED)) { 4397 error = ENOTCONN; 4398 goto done; 4399 } 4400 /* Added this check for X/Open */ 4401 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4402 error = EINVAL; 4403 if (xnet_check_print) { 4404 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4405 } 4406 goto done; 4407 } 4408 #ifdef DEBUG 4409 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4410 pr_addr(so->so_family, so->so_faddr_sa, 4411 (t_uscalar_t)so->so_faddr_len))); 4412 #endif /* DEBUG */ 4413 4414 if (so->so_family == AF_UNIX || so->so_family == AF_NCA) { 4415 /* Transport has different name space - return local info */ 4416 error = 0; 4417 goto done; 4418 } 4419 4420 ASSERT(so->so_faddr_sa); 4421 /* Allocate local buffer to use with ioctl */ 4422 addrlen = (t_uscalar_t)so->so_faddr_maxlen; 4423 mutex_exit(&so->so_lock); 4424 addr = kmem_alloc(addrlen, KM_SLEEP); 4425 4426 /* 4427 * Issue TI_GETPEERNAME with signals masked. 4428 * Put the result in so_faddr_sa so that getpeername works after 4429 * a shutdown(output). 4430 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4431 * back to the socket. 4432 */ 4433 strbuf.buf = addr; 4434 strbuf.maxlen = addrlen; 4435 strbuf.len = 0; 4436 4437 sigintr(&smask, 0); 4438 res = 0; 4439 ASSERT(CRED()); 4440 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4441 0, K_TO_K, CRED(), &res); 4442 sigunintr(&smask); 4443 4444 mutex_enter(&so->so_lock); 4445 /* 4446 * If there is an error record the error in so_error put don't fail 4447 * the getpeername. Instead fallback on the recorded 4448 * so->so_faddr_sa. 4449 */ 4450 if (error) { 4451 /* 4452 * Various stream head errors can be returned to the ioctl. 4453 * However, it is impossible to determine which ones of 4454 * these are really socket level errors that were incorrectly 4455 * consumed by the ioctl. Thus this code silently ignores the 4456 * error - to code explicitly does not reinstate the error 4457 * using soseterror(). 4458 * Experiments have shows that at least this set of 4459 * errors are reported and should not be reinstated on the 4460 * socket: 4461 * EINVAL E.g. if an I_LINK was in effect when 4462 * getpeername was called. 4463 * EPIPE The ioctl error semantics prefer the write 4464 * side error over the read side error. 4465 * ENOTCONN The transport just got disconnected but 4466 * sockfs had not yet seen the T_DISCON_IND 4467 * when issuing the ioctl. 4468 */ 4469 error = 0; 4470 } else if (res == 0 && strbuf.len > 0 && 4471 (so->so_state & SS_ISCONNECTED)) { 4472 ASSERT(strbuf.len <= (int)so->so_faddr_maxlen); 4473 so->so_faddr_len = (socklen_t)strbuf.len; 4474 bcopy(addr, so->so_faddr_sa, so->so_faddr_len); 4475 so->so_state |= SS_FADDR_VALID; 4476 } 4477 kmem_free(addr, addrlen); 4478 #ifdef DEBUG 4479 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4480 pr_addr(so->so_family, so->so_faddr_sa, 4481 (t_uscalar_t)so->so_faddr_len))); 4482 #endif /* DEBUG */ 4483 done: 4484 so_unlock_single(so, SOLOCKED); 4485 mutex_exit(&so->so_lock); 4486 return (error); 4487 } 4488 4489 /* 4490 * Update so_laddr by asking the transport (unless AF_UNIX). 4491 */ 4492 int 4493 sotpi_getsockname(struct sonode *so) 4494 { 4495 struct strbuf strbuf; 4496 int error = 0, res; 4497 void *addr; 4498 t_uscalar_t addrlen; 4499 k_sigset_t smask; 4500 4501 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4502 so, pr_state(so->so_state, so->so_mode))); 4503 4504 mutex_enter(&so->so_lock); 4505 so_lock_single(so); /* Set SOLOCKED */ 4506 if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) { 4507 /* Return an all zero address except for the family */ 4508 if (so->so_family == AF_INET) 4509 so->so_laddr_len = (socklen_t)sizeof (sin_t); 4510 else if (so->so_family == AF_INET6) 4511 so->so_laddr_len = (socklen_t)sizeof (sin6_t); 4512 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 4513 bzero(so->so_laddr_sa, so->so_laddr_len); 4514 /* 4515 * Can not assume there is a sa_family for all 4516 * protocol families. 4517 */ 4518 if (so->so_family == AF_INET || so->so_family == AF_INET6) 4519 so->so_laddr_sa->sa_family = so->so_family; 4520 } 4521 #ifdef DEBUG 4522 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4523 pr_addr(so->so_family, so->so_laddr_sa, 4524 (t_uscalar_t)so->so_laddr_len))); 4525 #endif /* DEBUG */ 4526 if (so->so_family == AF_UNIX) { 4527 /* Transport has different name space - return local info */ 4528 error = 0; 4529 goto done; 4530 } 4531 /* Allocate local buffer to use with ioctl */ 4532 addrlen = (t_uscalar_t)so->so_laddr_maxlen; 4533 mutex_exit(&so->so_lock); 4534 addr = kmem_alloc(addrlen, KM_SLEEP); 4535 4536 /* 4537 * Issue TI_GETMYNAME with signals masked. 4538 * Put the result in so_laddr_sa so that getsockname works after 4539 * a shutdown(output). 4540 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4541 * back to the socket. 4542 */ 4543 strbuf.buf = addr; 4544 strbuf.maxlen = addrlen; 4545 strbuf.len = 0; 4546 4547 sigintr(&smask, 0); 4548 res = 0; 4549 ASSERT(CRED()); 4550 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4551 0, K_TO_K, CRED(), &res); 4552 sigunintr(&smask); 4553 4554 mutex_enter(&so->so_lock); 4555 /* 4556 * If there is an error record the error in so_error put don't fail 4557 * the getsockname. Instead fallback on the recorded 4558 * so->so_laddr_sa. 4559 */ 4560 if (error) { 4561 /* 4562 * Various stream head errors can be returned to the ioctl. 4563 * However, it is impossible to determine which ones of 4564 * these are really socket level errors that were incorrectly 4565 * consumed by the ioctl. Thus this code silently ignores the 4566 * error - to code explicitly does not reinstate the error 4567 * using soseterror(). 4568 * Experiments have shows that at least this set of 4569 * errors are reported and should not be reinstated on the 4570 * socket: 4571 * EINVAL E.g. if an I_LINK was in effect when 4572 * getsockname was called. 4573 * EPIPE The ioctl error semantics prefer the write 4574 * side error over the read side error. 4575 */ 4576 error = 0; 4577 } else if (res == 0 && strbuf.len > 0 && 4578 (so->so_state & SS_ISBOUND)) { 4579 ASSERT(strbuf.len <= (int)so->so_laddr_maxlen); 4580 so->so_laddr_len = (socklen_t)strbuf.len; 4581 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 4582 so->so_state |= SS_LADDR_VALID; 4583 } 4584 kmem_free(addr, addrlen); 4585 #ifdef DEBUG 4586 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4587 pr_addr(so->so_family, so->so_laddr_sa, 4588 (t_uscalar_t)so->so_laddr_len))); 4589 #endif /* DEBUG */ 4590 done: 4591 so_unlock_single(so, SOLOCKED); 4592 mutex_exit(&so->so_lock); 4593 return (error); 4594 } 4595 4596 /* 4597 * Get socket options. For SOL_SOCKET options some options are handled 4598 * by the sockfs while others use the value recorded in the sonode as a 4599 * fallback should the T_SVR4_OPTMGMT_REQ fail. 4600 * 4601 * On the return most *optlenp bytes are copied to optval. 4602 */ 4603 int 4604 sotpi_getsockopt(struct sonode *so, int level, int option_name, 4605 void *optval, socklen_t *optlenp, int flags) 4606 { 4607 struct T_optmgmt_req optmgmt_req; 4608 struct T_optmgmt_ack *optmgmt_ack; 4609 struct opthdr oh; 4610 struct opthdr *opt_res; 4611 mblk_t *mp = NULL; 4612 int error = 0; 4613 void *option = NULL; /* Set if fallback value */ 4614 t_uscalar_t maxlen = *optlenp; 4615 t_uscalar_t len; 4616 uint32_t value; 4617 4618 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 4619 so, level, option_name, optval, optlenp, 4620 pr_state(so->so_state, so->so_mode))); 4621 4622 mutex_enter(&so->so_lock); 4623 so_lock_single(so); /* Set SOLOCKED */ 4624 4625 /* 4626 * Check for SOL_SOCKET options. 4627 * Certain SOL_SOCKET options are returned directly whereas 4628 * others only provide a default (fallback) value should 4629 * the T_SVR4_OPTMGMT_REQ fail. 4630 */ 4631 if (level == SOL_SOCKET) { 4632 /* Check parameters */ 4633 switch (option_name) { 4634 case SO_TYPE: 4635 case SO_ERROR: 4636 case SO_DEBUG: 4637 case SO_ACCEPTCONN: 4638 case SO_REUSEADDR: 4639 case SO_KEEPALIVE: 4640 case SO_DONTROUTE: 4641 case SO_BROADCAST: 4642 case SO_USELOOPBACK: 4643 case SO_OOBINLINE: 4644 case SO_SNDBUF: 4645 case SO_RCVBUF: 4646 #ifdef notyet 4647 case SO_SNDLOWAT: 4648 case SO_RCVLOWAT: 4649 case SO_SNDTIMEO: 4650 case SO_RCVTIMEO: 4651 #endif /* notyet */ 4652 case SO_DGRAM_ERRIND: 4653 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 4654 error = EINVAL; 4655 eprintsoline(so, error); 4656 goto done2; 4657 } 4658 break; 4659 case SO_LINGER: 4660 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 4661 error = EINVAL; 4662 eprintsoline(so, error); 4663 goto done2; 4664 } 4665 break; 4666 } 4667 4668 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 4669 4670 switch (option_name) { 4671 case SO_TYPE: 4672 value = so->so_type; 4673 option = &value; 4674 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4675 4676 case SO_ERROR: 4677 value = sogeterr(so); 4678 option = &value; 4679 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4680 4681 case SO_ACCEPTCONN: 4682 if (so->so_state & SS_ACCEPTCONN) 4683 value = SO_ACCEPTCONN; 4684 else 4685 value = 0; 4686 #ifdef DEBUG 4687 if (value) { 4688 dprintso(so, 1, 4689 ("sotpi_getsockopt: 0x%x is set\n", 4690 option_name)); 4691 } else { 4692 dprintso(so, 1, 4693 ("sotpi_getsockopt: 0x%x not set\n", 4694 option_name)); 4695 } 4696 #endif /* DEBUG */ 4697 option = &value; 4698 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4699 4700 case SO_DEBUG: 4701 case SO_REUSEADDR: 4702 case SO_KEEPALIVE: 4703 case SO_DONTROUTE: 4704 case SO_BROADCAST: 4705 case SO_USELOOPBACK: 4706 case SO_OOBINLINE: 4707 case SO_DGRAM_ERRIND: 4708 value = (so->so_options & option_name); 4709 #ifdef DEBUG 4710 if (value) { 4711 dprintso(so, 1, 4712 ("sotpi_getsockopt: 0x%x is set\n", 4713 option_name)); 4714 } else { 4715 dprintso(so, 1, 4716 ("sotpi_getsockopt: 0x%x not set\n", 4717 option_name)); 4718 } 4719 #endif /* DEBUG */ 4720 option = &value; 4721 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4722 4723 /* 4724 * The following options are only returned by sockfs when the 4725 * T_SVR4_OPTMGMT_REQ fails. 4726 */ 4727 case SO_LINGER: 4728 option = &so->so_linger; 4729 len = (t_uscalar_t)sizeof (struct linger); 4730 break; 4731 case SO_SNDBUF: { 4732 ssize_t lvalue; 4733 4734 /* 4735 * If the option has not been set then get a default 4736 * value from the read queue. This value is 4737 * returned if the transport fails 4738 * the T_SVR4_OPTMGMT_REQ. 4739 */ 4740 lvalue = so->so_sndbuf; 4741 if (lvalue == 0) { 4742 mutex_exit(&so->so_lock); 4743 (void) strqget(strvp2wq(SOTOV(so))->q_next, 4744 QHIWAT, 0, &lvalue); 4745 mutex_enter(&so->so_lock); 4746 dprintso(so, 1, 4747 ("got SO_SNDBUF %ld from q\n", lvalue)); 4748 } 4749 value = (int)lvalue; 4750 option = &value; 4751 len = (t_uscalar_t)sizeof (so->so_sndbuf); 4752 break; 4753 } 4754 case SO_RCVBUF: { 4755 ssize_t lvalue; 4756 4757 /* 4758 * If the option has not been set then get a default 4759 * value from the read queue. This value is 4760 * returned if the transport fails 4761 * the T_SVR4_OPTMGMT_REQ. 4762 * 4763 * XXX If SO_RCVBUF has been set and this is an 4764 * XPG 4.2 application then do not ask the transport 4765 * since the transport might adjust the value and not 4766 * return exactly what was set by the application. 4767 * For non-XPG 4.2 application we return the value 4768 * that the transport is actually using. 4769 */ 4770 lvalue = so->so_rcvbuf; 4771 if (lvalue == 0) { 4772 mutex_exit(&so->so_lock); 4773 (void) strqget(RD(strvp2wq(SOTOV(so))), 4774 QHIWAT, 0, &lvalue); 4775 mutex_enter(&so->so_lock); 4776 dprintso(so, 1, 4777 ("got SO_RCVBUF %ld from q\n", lvalue)); 4778 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 4779 value = (int)lvalue; 4780 option = &value; 4781 goto copyout; /* skip asking transport */ 4782 } 4783 value = (int)lvalue; 4784 option = &value; 4785 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 4786 break; 4787 } 4788 #ifdef notyet 4789 /* 4790 * We do not implement the semantics of these options 4791 * thus we shouldn't implement the options either. 4792 */ 4793 case SO_SNDLOWAT: 4794 value = so->so_sndlowat; 4795 option = &value; 4796 break; 4797 case SO_RCVLOWAT: 4798 value = so->so_rcvlowat; 4799 option = &value; 4800 break; 4801 case SO_SNDTIMEO: 4802 value = so->so_sndtimeo; 4803 option = &value; 4804 break; 4805 case SO_RCVTIMEO: 4806 value = so->so_rcvtimeo; 4807 option = &value; 4808 break; 4809 #endif /* notyet */ 4810 } 4811 } 4812 4813 if (so->so_family == AF_NCA) { 4814 goto done2; 4815 } 4816 4817 mutex_exit(&so->so_lock); 4818 4819 /* Send request */ 4820 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 4821 optmgmt_req.MGMT_flags = T_CHECK; 4822 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 4823 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 4824 4825 oh.level = level; 4826 oh.name = option_name; 4827 oh.len = maxlen; 4828 4829 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 4830 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 4831 /* Let option management work in the presence of data flow control */ 4832 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 4833 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 4834 mp = NULL; 4835 mutex_enter(&so->so_lock); 4836 if (error) { 4837 eprintsoline(so, error); 4838 goto done2; 4839 } 4840 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 4841 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 4842 if (error) { 4843 if (option != NULL) { 4844 /* We have a fallback value */ 4845 error = 0; 4846 goto copyout; 4847 } 4848 eprintsoline(so, error); 4849 goto done2; 4850 } 4851 ASSERT(mp); 4852 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 4853 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 4854 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 4855 if (opt_res == NULL) { 4856 if (option != NULL) { 4857 /* We have a fallback value */ 4858 error = 0; 4859 goto copyout; 4860 } 4861 error = EPROTO; 4862 eprintsoline(so, error); 4863 goto done; 4864 } 4865 option = &opt_res[1]; 4866 4867 /* check to ensure that the option is within bounds */ 4868 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 4869 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 4870 if (option != NULL) { 4871 /* We have a fallback value */ 4872 error = 0; 4873 goto copyout; 4874 } 4875 error = EPROTO; 4876 eprintsoline(so, error); 4877 goto done; 4878 } 4879 4880 len = opt_res->len; 4881 4882 copyout: { 4883 t_uscalar_t size = MIN(len, maxlen); 4884 bcopy(option, optval, size); 4885 bcopy(&size, optlenp, sizeof (size)); 4886 } 4887 done: 4888 freemsg(mp); 4889 done2: 4890 so_unlock_single(so, SOLOCKED); 4891 mutex_exit(&so->so_lock); 4892 return (error); 4893 } 4894 4895 /* 4896 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 4897 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 4898 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 4899 * setsockopt has to work even if the transport does not support the option. 4900 */ 4901 int 4902 sotpi_setsockopt(struct sonode *so, int level, int option_name, 4903 const void *optval, t_uscalar_t optlen) 4904 { 4905 struct T_optmgmt_req optmgmt_req; 4906 struct opthdr oh; 4907 mblk_t *mp; 4908 int error = 0; 4909 boolean_t handled = B_FALSE; 4910 4911 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 4912 so, level, option_name, optval, optlen, 4913 pr_state(so->so_state, so->so_mode))); 4914 4915 4916 /* X/Open requires this check */ 4917 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4918 if (xnet_check_print) 4919 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 4920 return (EINVAL); 4921 } 4922 4923 /* Caller allocates aligned optval, or passes null */ 4924 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 4925 /* If optval is null optlen is 0, and vice-versa */ 4926 ASSERT(optval != NULL || optlen == 0); 4927 ASSERT(optlen != 0 || optval == NULL); 4928 4929 mutex_enter(&so->so_lock); 4930 so_lock_single(so); /* Set SOLOCKED */ 4931 mutex_exit(&so->so_lock); 4932 4933 if (so->so_family == AF_NCA) { 4934 /* Ignore any flow control problems with the transport. */ 4935 mutex_enter(&so->so_lock); 4936 goto done; 4937 } 4938 4939 /* 4940 * For SOCKET or TCP level options, try to set it here itself 4941 * provided socket has not been popped and we know the tcp 4942 * structure (stored in so_priv). 4943 */ 4944 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 4945 (so->so_family == AF_INET || so->so_family == AF_INET6) && 4946 (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) { 4947 tcp_t *tcp = so->so_priv; 4948 boolean_t onoff; 4949 4950 #define intvalue (*(int32_t *)optval) 4951 4952 switch (level) { 4953 case SOL_SOCKET: 4954 switch (option_name) { /* Check length param */ 4955 case SO_DEBUG: 4956 case SO_REUSEADDR: 4957 case SO_DONTROUTE: 4958 case SO_BROADCAST: 4959 case SO_USELOOPBACK: 4960 case SO_OOBINLINE: 4961 case SO_DGRAM_ERRIND: 4962 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 4963 error = EINVAL; 4964 eprintsoline(so, error); 4965 mutex_enter(&so->so_lock); 4966 goto done2; 4967 } 4968 ASSERT(optval); 4969 onoff = intvalue != 0; 4970 handled = B_TRUE; 4971 break; 4972 case SO_LINGER: 4973 if (optlen != 4974 (t_uscalar_t)sizeof (struct linger)) { 4975 error = EINVAL; 4976 eprintsoline(so, error); 4977 mutex_enter(&so->so_lock); 4978 goto done2; 4979 } 4980 ASSERT(optval); 4981 handled = B_TRUE; 4982 break; 4983 } 4984 4985 switch (option_name) { /* Do actions */ 4986 case SO_LINGER: { 4987 struct linger *lgr = (struct linger *)optval; 4988 4989 if (lgr->l_onoff) { 4990 tcp->tcp_linger = 1; 4991 tcp->tcp_lingertime = lgr->l_linger; 4992 so->so_linger.l_onoff = SO_LINGER; 4993 so->so_options |= SO_LINGER; 4994 } else { 4995 tcp->tcp_linger = 0; 4996 tcp->tcp_lingertime = 0; 4997 so->so_linger.l_onoff = 0; 4998 so->so_options &= ~SO_LINGER; 4999 } 5000 so->so_linger.l_linger = lgr->l_linger; 5001 handled = B_TRUE; 5002 break; 5003 } 5004 case SO_DEBUG: 5005 tcp->tcp_debug = onoff; 5006 #ifdef SOCK_TEST 5007 if (intvalue & 2) 5008 sock_test_timelimit = 10 * hz; 5009 else 5010 sock_test_timelimit = 0; 5011 5012 if (intvalue & 4) 5013 do_useracc = 0; 5014 else 5015 do_useracc = 1; 5016 #endif /* SOCK_TEST */ 5017 break; 5018 case SO_DONTROUTE: 5019 /* 5020 * SO_DONTROUTE, SO_USELOOPBACK and 5021 * SO_BROADCAST are only of interest to IP. 5022 * We track them here only so 5023 * that we can report their current value. 5024 */ 5025 tcp->tcp_dontroute = onoff; 5026 if (onoff) 5027 so->so_options |= option_name; 5028 else 5029 so->so_options &= ~option_name; 5030 break; 5031 case SO_USELOOPBACK: 5032 tcp->tcp_useloopback = onoff; 5033 if (onoff) 5034 so->so_options |= option_name; 5035 else 5036 so->so_options &= ~option_name; 5037 break; 5038 case SO_BROADCAST: 5039 tcp->tcp_broadcast = onoff; 5040 if (onoff) 5041 so->so_options |= option_name; 5042 else 5043 so->so_options &= ~option_name; 5044 break; 5045 case SO_REUSEADDR: 5046 tcp->tcp_reuseaddr = onoff; 5047 if (onoff) 5048 so->so_options |= option_name; 5049 else 5050 so->so_options &= ~option_name; 5051 break; 5052 case SO_OOBINLINE: 5053 tcp->tcp_oobinline = onoff; 5054 if (onoff) 5055 so->so_options |= option_name; 5056 else 5057 so->so_options &= ~option_name; 5058 break; 5059 case SO_DGRAM_ERRIND: 5060 tcp->tcp_dgram_errind = onoff; 5061 if (onoff) 5062 so->so_options |= option_name; 5063 else 5064 so->so_options &= ~option_name; 5065 break; 5066 } 5067 break; 5068 case IPPROTO_TCP: 5069 switch (option_name) { 5070 case TCP_NODELAY: 5071 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5072 error = EINVAL; 5073 eprintsoline(so, error); 5074 mutex_enter(&so->so_lock); 5075 goto done2; 5076 } 5077 ASSERT(optval); 5078 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5079 handled = B_TRUE; 5080 break; 5081 } 5082 break; 5083 default: 5084 handled = B_FALSE; 5085 break; 5086 } 5087 } 5088 5089 if (handled) { 5090 mutex_enter(&so->so_lock); 5091 goto done2; 5092 } 5093 5094 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5095 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5096 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5097 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5098 5099 oh.level = level; 5100 oh.name = option_name; 5101 oh.len = optlen; 5102 5103 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5104 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 5105 /* Let option management work in the presence of data flow control */ 5106 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5107 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5108 mp = NULL; 5109 mutex_enter(&so->so_lock); 5110 if (error) { 5111 eprintsoline(so, error); 5112 goto done; 5113 } 5114 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5115 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5116 if (error) { 5117 eprintsoline(so, error); 5118 goto done; 5119 } 5120 ASSERT(mp); 5121 /* No need to verify T_optmgmt_ack */ 5122 freemsg(mp); 5123 done: 5124 /* 5125 * Check for SOL_SOCKET options and record their values. 5126 * If we know about a SOL_SOCKET parameter and the transport 5127 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5128 * EPROTO) we let the setsockopt succeed. 5129 */ 5130 if (level == SOL_SOCKET) { 5131 /* Check parameters */ 5132 switch (option_name) { 5133 case SO_DEBUG: 5134 case SO_REUSEADDR: 5135 case SO_KEEPALIVE: 5136 case SO_DONTROUTE: 5137 case SO_BROADCAST: 5138 case SO_USELOOPBACK: 5139 case SO_OOBINLINE: 5140 case SO_SNDBUF: 5141 case SO_RCVBUF: 5142 #ifdef notyet 5143 case SO_SNDLOWAT: 5144 case SO_RCVLOWAT: 5145 case SO_SNDTIMEO: 5146 case SO_RCVTIMEO: 5147 #endif /* notyet */ 5148 case SO_DGRAM_ERRIND: 5149 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5150 error = EINVAL; 5151 eprintsoline(so, error); 5152 goto done2; 5153 } 5154 ASSERT(optval); 5155 handled = B_TRUE; 5156 break; 5157 case SO_LINGER: 5158 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5159 error = EINVAL; 5160 eprintsoline(so, error); 5161 goto done2; 5162 } 5163 ASSERT(optval); 5164 handled = B_TRUE; 5165 break; 5166 } 5167 5168 #define intvalue (*(int32_t *)optval) 5169 5170 switch (option_name) { 5171 case SO_TYPE: 5172 case SO_ERROR: 5173 case SO_ACCEPTCONN: 5174 /* Can't be set */ 5175 error = ENOPROTOOPT; 5176 goto done2; 5177 case SO_LINGER: { 5178 struct linger *l = (struct linger *)optval; 5179 5180 so->so_linger.l_linger = l->l_linger; 5181 if (l->l_onoff) { 5182 so->so_linger.l_onoff = SO_LINGER; 5183 so->so_options |= SO_LINGER; 5184 } else { 5185 so->so_linger.l_onoff = 0; 5186 so->so_options &= ~SO_LINGER; 5187 } 5188 break; 5189 } 5190 5191 case SO_DEBUG: 5192 #ifdef SOCK_TEST 5193 if (intvalue & 2) 5194 sock_test_timelimit = 10 * hz; 5195 else 5196 sock_test_timelimit = 0; 5197 5198 if (intvalue & 4) 5199 do_useracc = 0; 5200 else 5201 do_useracc = 1; 5202 #endif /* SOCK_TEST */ 5203 /* FALLTHRU */ 5204 case SO_REUSEADDR: 5205 case SO_KEEPALIVE: 5206 case SO_DONTROUTE: 5207 case SO_BROADCAST: 5208 case SO_USELOOPBACK: 5209 case SO_OOBINLINE: 5210 case SO_DGRAM_ERRIND: 5211 if (intvalue != 0) { 5212 dprintso(so, 1, 5213 ("sotpi_setsockopt: setting 0x%x\n", 5214 option_name)); 5215 so->so_options |= option_name; 5216 } else { 5217 dprintso(so, 1, 5218 ("sotpi_setsockopt: clearing 0x%x\n", 5219 option_name)); 5220 so->so_options &= ~option_name; 5221 } 5222 break; 5223 /* 5224 * The following options are only returned by us when the 5225 * T_SVR4_OPTMGMT_REQ fails. 5226 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5227 * since the transport might adjust the value and not 5228 * return exactly what was set by the application. 5229 */ 5230 case SO_SNDBUF: 5231 so->so_sndbuf = intvalue; 5232 break; 5233 case SO_RCVBUF: 5234 so->so_rcvbuf = intvalue; 5235 break; 5236 #ifdef notyet 5237 /* 5238 * We do not implement the semantics of these options 5239 * thus we shouldn't implement the options either. 5240 */ 5241 case SO_SNDLOWAT: 5242 so->so_sndlowat = intvalue; 5243 break; 5244 case SO_RCVLOWAT: 5245 so->so_rcvlowat = intvalue; 5246 break; 5247 case SO_SNDTIMEO: 5248 so->so_sndtimeo = intvalue; 5249 break; 5250 case SO_RCVTIMEO: 5251 so->so_rcvtimeo = intvalue; 5252 break; 5253 #endif /* notyet */ 5254 } 5255 #undef intvalue 5256 5257 if (error) { 5258 if ((error == ENOPROTOOPT || error == EPROTO || 5259 error == EINVAL) && handled) { 5260 dprintso(so, 1, 5261 ("setsockopt: ignoring error %d for 0x%x\n", 5262 error, option_name)); 5263 error = 0; 5264 } 5265 } 5266 } 5267 done2: 5268 ret: 5269 so_unlock_single(so, SOLOCKED); 5270 mutex_exit(&so->so_lock); 5271 return (error); 5272 } 5273