1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <netinet/in.h> 61 #include <sys/un.h> 62 #include <sys/strsun.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 68 69 #include <c2/audit.h> 70 71 #include <inet/common.h> 72 #include <inet/ip.h> 73 #include <inet/ip6.h> 74 #include <inet/tcp.h> 75 76 #include <fs/sockfs/nl7c.h> 77 #include <sys/zone.h> 78 79 /* 80 * Possible failures when memory can't be allocated. The documented behavior: 81 * 82 * 5.5: 4.X: XNET: 83 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 84 * EINTR 85 * (4.X does not document EINTR but returns it) 86 * bind: ENOSR - ENOBUFS/ENOSR 87 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 88 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 89 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 90 * (4.X getpeername and getsockname do not fail in practice) 91 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 92 * listen: - - ENOBUFS 93 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 94 * EINTR 95 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 96 * EINTR 97 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 98 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 99 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 100 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 101 * 102 * Resolution. When allocation fails: 103 * recv: return EINTR 104 * send: return EINTR 105 * connect, accept: EINTR 106 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 107 * socket, socketpair: ENOBUFS 108 * getpeername, getsockname: sleep 109 * getsockopt, setsockopt: sleep 110 */ 111 112 #ifdef SOCK_TEST 113 /* 114 * Variables that make sockfs do something other than the standard TPI 115 * for the AF_INET transports. 116 * 117 * solisten_tpi_tcp: 118 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 119 * the transport is already bound. This is needed to avoid loosing the 120 * port number should listen() do a T_UNBIND_REQ followed by a 121 * O_T_BIND_REQ. 122 * 123 * soconnect_tpi_udp: 124 * UDP and ICMP can handle a T_CONN_REQ. 125 * This is needed to make the sequence of connect(), getsockname() 126 * return the local IP address used to send packets to the connected to 127 * destination. 128 * 129 * soconnect_tpi_tcp: 130 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 131 * Set this to non-zero to send TPI conformant messages to TCP in this 132 * respect. This is a performance optimization. 133 * 134 * soaccept_tpi_tcp: 135 * TCP can handle a T_CONN_REQ without the acceptor being bound. 136 * This is a performance optimization that has been picked up in XTI. 137 * 138 * soaccept_tpi_multioptions: 139 * When inheriting SOL_SOCKET options from the listener to the accepting 140 * socket send them as a single message for AF_INET{,6}. 141 */ 142 int solisten_tpi_tcp = 0; 143 int soconnect_tpi_udp = 0; 144 int soconnect_tpi_tcp = 0; 145 int soaccept_tpi_tcp = 0; 146 int soaccept_tpi_multioptions = 1; 147 #else /* SOCK_TEST */ 148 #define soconnect_tpi_tcp 0 149 #define soconnect_tpi_udp 0 150 #define solisten_tpi_tcp 0 151 #define soaccept_tpi_tcp 0 152 #define soaccept_tpi_multioptions 1 153 #endif /* SOCK_TEST */ 154 155 #ifdef SOCK_TEST 156 extern int do_useracc; 157 extern clock_t sock_test_timelimit; 158 #endif /* SOCK_TEST */ 159 160 /* 161 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 162 * applications working. Turn on this flag to disable these checks. 163 */ 164 int xnet_skip_checks = 0; 165 int xnet_check_print = 0; 166 int xnet_truncate_print = 0; 167 168 extern void sigintr(k_sigset_t *, int); 169 extern void sigunintr(k_sigset_t *); 170 171 extern void *nl7c_lookup_addr(void *, t_uscalar_t); 172 extern void *nl7c_add_addr(void *, t_uscalar_t); 173 extern void nl7c_listener_addr(void *, queue_t *); 174 175 static int sotpi_unbind(struct sonode *, int); 176 177 /* TPI sockfs sonode operations */ 178 static int sotpi_accept(struct sonode *, int, struct sonode **); 179 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 180 int); 181 static int sotpi_connect(struct sonode *, const struct sockaddr *, 182 socklen_t, int, int); 183 static int sotpi_listen(struct sonode *, int); 184 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 185 struct uio *); 186 static int sotpi_shutdown(struct sonode *, int); 187 static int sotpi_getsockname(struct sonode *); 188 189 sonodeops_t sotpi_sonodeops = { 190 sotpi_accept, /* sop_accept */ 191 sotpi_bind, /* sop_bind */ 192 sotpi_listen, /* sop_listen */ 193 sotpi_connect, /* sop_connect */ 194 sotpi_recvmsg, /* sop_recvmsg */ 195 sotpi_sendmsg, /* sop_sendmsg */ 196 sotpi_getpeername, /* sop_getpeername */ 197 sotpi_getsockname, /* sop_getsockname */ 198 sotpi_shutdown, /* sop_shutdown */ 199 sotpi_getsockopt, /* sop_getsockopt */ 200 sotpi_setsockopt /* sop_setsockopt */ 201 }; 202 203 /* 204 * Common create code for socket and accept. If tso is set the values 205 * from that node is used instead of issuing a T_INFO_REQ. 206 * 207 * Assumes that the caller has a VN_HOLD on accessvp. 208 * The VN_RELE will occur either when sotpi_create() fails or when 209 * the returned sonode is freed. 210 */ 211 struct sonode * 212 sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version, 213 struct sonode *tso, int *errorp) 214 { 215 struct sonode *so; 216 vnode_t *vp; 217 int flags, error; 218 219 ASSERT(accessvp != NULL); 220 vp = makesockvp(accessvp, domain, type, protocol); 221 ASSERT(vp != NULL); 222 so = VTOSO(vp); 223 224 flags = FREAD|FWRITE; 225 if (tso != NULL) { 226 if ((tso->so_state & (SS_TCP_FAST_ACCEPT)) != 0) { 227 flags |= SO_ACCEPTOR|SO_SOCKSTR; 228 so->so_state |= SS_TCP_FAST_ACCEPT; 229 } 230 } else { 231 if ((so->so_type == SOCK_STREAM) && 232 (so->so_family == AF_INET || so->so_family == AF_INET6)) { 233 flags |= SO_SOCKSTR; 234 so->so_state |= SS_TCP_FAST_ACCEPT; 235 } 236 } 237 238 /* 239 * Tell local transport that it is talking to sockets. 240 */ 241 if (so->so_family == AF_UNIX) { 242 flags |= SO_SOCKSTR; 243 } 244 245 if (error = socktpi_open(&vp, flags, CRED())) { 246 VN_RELE(vp); 247 *errorp = error; 248 return (NULL); 249 } 250 251 if (error = so_strinit(so, tso)) { 252 (void) VOP_CLOSE(vp, 0, 1, 0, CRED()); 253 VN_RELE(vp); 254 *errorp = error; 255 return (NULL); 256 } 257 258 if (version == SOV_DEFAULT) 259 version = so_default_version; 260 261 so->so_version = (short)version; 262 return (so); 263 } 264 265 /* 266 * Bind the socket to an unspecified address in sockfs only. 267 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 268 * required in all cases. 269 */ 270 static void 271 so_automatic_bind(struct sonode *so) 272 { 273 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 274 275 ASSERT(MUTEX_HELD(&so->so_lock)); 276 ASSERT(!(so->so_state & SS_ISBOUND)); 277 ASSERT(so->so_unbind_mp); 278 279 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 280 bzero(so->so_laddr_sa, so->so_laddr_len); 281 so->so_laddr_sa->sa_family = so->so_family; 282 so->so_state |= SS_ISBOUND; 283 } 284 285 286 /* 287 * bind the socket. 288 * 289 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 290 * are passed in we allow rebinding. Note that for backwards compatibility 291 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 292 * Thus the rebinding code is currently not executed. 293 * 294 * The constraints for rebinding are: 295 * - it is a SOCK_DGRAM, or 296 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 297 * and no listen() has been done. 298 * This rebinding code was added based on some language in the XNET book 299 * about not returning EINVAL it the protocol allows rebinding. However, 300 * this language is not present in the Posix socket draft. Thus maybe the 301 * rebinding logic should be deleted from the source. 302 * 303 * A null "name" can be used to unbind the socket if: 304 * - it is a SOCK_DGRAM, or 305 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 306 * and no listen() has been done. 307 */ 308 static int 309 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 310 socklen_t namelen, int backlog, int flags) 311 { 312 struct T_bind_req bind_req; 313 struct T_bind_ack *bind_ack; 314 int error = 0; 315 mblk_t *mp; 316 void *addr; 317 t_uscalar_t addrlen; 318 int unbind_on_err = 1; 319 boolean_t clear_acceptconn_on_err = B_FALSE; 320 boolean_t restore_backlog_on_err = B_FALSE; 321 int save_so_backlog; 322 t_scalar_t PRIM_type = O_T_BIND_REQ; 323 boolean_t tcp_udp_xport; 324 void *nl7c = NULL; 325 326 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 327 so, name, namelen, backlog, flags, 328 pr_state(so->so_state, so->so_mode))); 329 330 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 331 332 if (!(flags & _SOBIND_LOCK_HELD)) { 333 mutex_enter(&so->so_lock); 334 so_lock_single(so); /* Set SOLOCKED */ 335 } else { 336 ASSERT(MUTEX_HELD(&so->so_lock)); 337 ASSERT(so->so_flag & SOLOCKED); 338 } 339 340 /* 341 * Make sure that there is a preallocated unbind_req message 342 * before binding. This message allocated when the socket is 343 * created but it might be have been consumed. 344 */ 345 if (so->so_unbind_mp == NULL) { 346 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 347 /* NOTE: holding so_lock while sleeping */ 348 so->so_unbind_mp = 349 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 350 } 351 352 if (flags & _SOBIND_REBIND) { 353 /* 354 * Called from solisten after doing an sotpi_unbind() or 355 * potentially without the unbind (latter for AF_INET{,6}). 356 */ 357 ASSERT(name == NULL && namelen == 0); 358 359 if (so->so_family == AF_UNIX) { 360 ASSERT(so->so_ux_bound_vp); 361 addr = &so->so_ux_laddr; 362 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 363 dprintso(so, 1, 364 ("sobind rebind UNIX: addrlen %d, addr 0x%p, vp %p\n", 365 addrlen, 366 ((struct so_ux_addr *)addr)->soua_vp, 367 so->so_ux_bound_vp)); 368 } else { 369 addr = so->so_laddr_sa; 370 addrlen = (t_uscalar_t)so->so_laddr_len; 371 } 372 } else if (flags & _SOBIND_UNSPEC) { 373 ASSERT(name == NULL && namelen == 0); 374 375 /* 376 * The caller checked SS_ISBOUND but not necessarily 377 * under so_lock 378 */ 379 if (so->so_state & SS_ISBOUND) { 380 /* No error */ 381 goto done; 382 } 383 384 /* Set an initial local address */ 385 switch (so->so_family) { 386 case AF_UNIX: 387 /* 388 * Use an address with same size as struct sockaddr 389 * just like BSD. 390 */ 391 so->so_laddr_len = 392 (socklen_t)sizeof (struct sockaddr); 393 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 394 bzero(so->so_laddr_sa, so->so_laddr_len); 395 so->so_laddr_sa->sa_family = so->so_family; 396 397 /* 398 * Pass down an address with the implicit bind 399 * magic number and the rest all zeros. 400 * The transport will return a unique address. 401 */ 402 so->so_ux_laddr.soua_vp = NULL; 403 so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 404 addr = &so->so_ux_laddr; 405 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 406 break; 407 408 case AF_INET: 409 case AF_INET6: 410 /* 411 * An unspecified bind in TPI has a NULL address. 412 * Set the address in sockfs to have the sa_family. 413 */ 414 so->so_laddr_len = (so->so_family == AF_INET) ? 415 (socklen_t)sizeof (sin_t) : 416 (socklen_t)sizeof (sin6_t); 417 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 418 bzero(so->so_laddr_sa, so->so_laddr_len); 419 so->so_laddr_sa->sa_family = so->so_family; 420 addr = NULL; 421 addrlen = 0; 422 break; 423 424 default: 425 /* 426 * An unspecified bind in TPI has a NULL address. 427 * Set the address in sockfs to be zero length. 428 * 429 * Can not assume there is a sa_family for all 430 * protocol families. For example, AF_X25 does not 431 * have a family field. 432 */ 433 so->so_laddr_len = 0; /* XXX correct? */ 434 bzero(so->so_laddr_sa, so->so_laddr_len); 435 addr = NULL; 436 addrlen = 0; 437 break; 438 } 439 440 } else { 441 if (so->so_state & SS_ISBOUND) { 442 /* 443 * If it is ok to rebind the socket, first unbind 444 * with the transport. A rebind to the NULL address 445 * is interpreted as an unbind. 446 * Note that a bind to NULL in BSD does unbind the 447 * socket but it fails with EINVAL. 448 * Note that regular sockets set SOV_SOCKBSD i.e. 449 * _SOBIND_SOCKBSD gets set here hence no type of 450 * socket does currently allow rebinding. 451 * 452 * If the name is NULL just do an unbind. 453 */ 454 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 455 name != NULL) { 456 error = EINVAL; 457 unbind_on_err = 0; 458 eprintsoline(so, error); 459 goto done; 460 } 461 if ((so->so_mode & SM_CONNREQUIRED) && 462 (so->so_state & SS_CANTREBIND)) { 463 error = EINVAL; 464 unbind_on_err = 0; 465 eprintsoline(so, error); 466 goto done; 467 } 468 error = sotpi_unbind(so, 0); 469 if (error) { 470 eprintsoline(so, error); 471 goto done; 472 } 473 ASSERT(!(so->so_state & SS_ISBOUND)); 474 if (name == NULL) { 475 so->so_state &= 476 ~(SS_ISCONNECTED|SS_ISCONNECTING); 477 goto done; 478 } 479 } 480 /* X/Open requires this check */ 481 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 482 if (xnet_check_print) { 483 printf("sockfs: X/Open bind state check " 484 "caused EINVAL\n"); 485 } 486 error = EINVAL; 487 goto done; 488 } 489 490 switch (so->so_family) { 491 case AF_UNIX: 492 /* 493 * All AF_UNIX addresses are nul terminated 494 * when copied (copyin_name) in so the minimum 495 * length is 3 bytes. 496 */ 497 if (name == NULL || 498 (ssize_t)namelen <= sizeof (short) + 1) { 499 error = EISDIR; 500 eprintsoline(so, error); 501 goto done; 502 } 503 /* 504 * Verify so_family matches the bound family. 505 * BSD does not check this for AF_UNIX resulting 506 * in funny mknods. 507 */ 508 if (name->sa_family != so->so_family) { 509 error = EAFNOSUPPORT; 510 goto done; 511 } 512 break; 513 case AF_INET: 514 if (name == NULL) { 515 error = EINVAL; 516 eprintsoline(so, error); 517 goto done; 518 } 519 if ((size_t)namelen != sizeof (sin_t)) { 520 error = name->sa_family != so->so_family ? 521 EAFNOSUPPORT : EINVAL; 522 eprintsoline(so, error); 523 goto done; 524 } 525 if ((flags & _SOBIND_XPG4_2) && 526 (name->sa_family != so->so_family)) { 527 /* 528 * This check has to be made for X/Open 529 * sockets however application failures have 530 * been observed when it is applied to 531 * all sockets. 532 */ 533 error = EAFNOSUPPORT; 534 eprintsoline(so, error); 535 goto done; 536 } 537 /* 538 * Force a zero sa_family to match so_family. 539 * 540 * Some programs like inetd(1M) don't set the 541 * family field. Other programs leave 542 * sin_family set to garbage - SunOS 4.X does 543 * not check the family field on a bind. 544 * We use the family field that 545 * was passed in to the socket() call. 546 */ 547 name->sa_family = so->so_family; 548 break; 549 550 case AF_INET6: { 551 #ifdef DEBUG 552 sin6_t *sin6 = (sin6_t *)name; 553 #endif /* DEBUG */ 554 555 if (name == NULL) { 556 error = EINVAL; 557 eprintsoline(so, error); 558 goto done; 559 } 560 if ((size_t)namelen != sizeof (sin6_t)) { 561 error = name->sa_family != so->so_family ? 562 EAFNOSUPPORT : EINVAL; 563 eprintsoline(so, error); 564 goto done; 565 } 566 if (name->sa_family != so->so_family) { 567 /* 568 * With IPv6 we require the family to match 569 * unlike in IPv4. 570 */ 571 error = EAFNOSUPPORT; 572 eprintsoline(so, error); 573 goto done; 574 } 575 #ifdef DEBUG 576 /* 577 * Verify that apps don't forget to clear 578 * sin6_scope_id etc 579 */ 580 if (sin6->sin6_scope_id != 0 && 581 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 582 cmn_err(CE_WARN, 583 "bind with uninitialized sin6_scope_id " 584 "(%d) on socket. Pid = %d\n", 585 (int)sin6->sin6_scope_id, 586 (int)curproc->p_pid); 587 } 588 if (sin6->__sin6_src_id != 0) { 589 cmn_err(CE_WARN, 590 "bind with uninitialized __sin6_src_id " 591 "(%d) on socket. Pid = %d\n", 592 (int)sin6->__sin6_src_id, 593 (int)curproc->p_pid); 594 } 595 #endif /* DEBUG */ 596 break; 597 } 598 default: 599 /* 600 * Don't do any length or sa_family check to allow 601 * non-sockaddr style addresses. 602 */ 603 if (name == NULL) { 604 error = EINVAL; 605 eprintsoline(so, error); 606 goto done; 607 } 608 break; 609 } 610 611 if (namelen > (t_uscalar_t)so->so_laddr_maxlen) { 612 error = ENAMETOOLONG; 613 eprintsoline(so, error); 614 goto done; 615 } 616 /* 617 * Save local address. 618 */ 619 so->so_laddr_len = (socklen_t)namelen; 620 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 621 bcopy(name, so->so_laddr_sa, namelen); 622 623 addr = so->so_laddr_sa; 624 addrlen = (t_uscalar_t)so->so_laddr_len; 625 switch (so->so_family) { 626 case AF_INET6: 627 case AF_INET: 628 break; 629 case AF_UNIX: { 630 struct sockaddr_un *soun = 631 (struct sockaddr_un *)so->so_laddr_sa; 632 struct vnode *vp; 633 struct vattr vattr; 634 635 ASSERT(so->so_ux_bound_vp == NULL); 636 /* 637 * Create vnode for the specified path name. 638 * Keep vnode held with a reference in so_ux_bound_vp. 639 * Use the vnode pointer as the address used in the 640 * bind with the transport. 641 * 642 * Use the same mode as in BSD. In particular this does 643 * not observe the umask. 644 */ 645 /* MAXPATHLEN + soun_family + nul termination */ 646 if (so->so_laddr_len > 647 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 648 error = ENAMETOOLONG; 649 eprintsoline(so, error); 650 goto done; 651 } 652 vattr.va_type = VSOCK; 653 vattr.va_mode = 0777 & ~u.u_cmask; 654 vattr.va_mask = AT_TYPE|AT_MODE; 655 /* NOTE: holding so_lock */ 656 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 657 EXCL, 0, &vp, CRMKNOD, 0, 0); 658 if (error) { 659 if (error == EEXIST) 660 error = EADDRINUSE; 661 eprintsoline(so, error); 662 goto done; 663 } 664 /* 665 * Establish pointer from the underlying filesystem 666 * vnode to the socket node. 667 * so_ux_bound_vp and v_stream->sd_vnode form the 668 * cross-linkage between the underlying filesystem 669 * node and the socket node. 670 */ 671 ASSERT(SOTOV(so)->v_stream); 672 mutex_enter(&vp->v_lock); 673 vp->v_stream = SOTOV(so)->v_stream; 674 so->so_ux_bound_vp = vp; 675 mutex_exit(&vp->v_lock); 676 677 /* 678 * Use the vnode pointer value as a unique address 679 * (together with the magic number to avoid conflicts 680 * with implicit binds) in the transport provider. 681 */ 682 so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp; 683 so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 684 addr = &so->so_ux_laddr; 685 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 686 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 687 addrlen, 688 ((struct so_ux_addr *)addr)->soua_vp)); 689 break; 690 } 691 } /* end switch (so->so_family) */ 692 } 693 694 /* 695 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 696 * the transport can start passing up T_CONN_IND messages 697 * as soon as it receives the bind req and strsock_proto() 698 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 699 */ 700 if (flags & _SOBIND_LISTEN) { 701 if ((so->so_state & SS_ACCEPTCONN) == 0) 702 clear_acceptconn_on_err = B_TRUE; 703 save_so_backlog = so->so_backlog; 704 restore_backlog_on_err = B_TRUE; 705 so->so_state |= SS_ACCEPTCONN; 706 so->so_backlog = backlog; 707 } 708 709 /* 710 * If NL7C addr(s) have been configured check for addr/port match, 711 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 712 * 713 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 714 * family sockets only. If match mark as such. 715 */ 716 if ((nl7c_enabled && addr != NULL && 717 (so->so_family == AF_INET || so->so_family == AF_INET6) && 718 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 719 so->so_nl7c_flags == NL7C_AF_NCA) { 720 /* 721 * NL7C is not supported in non-global zones, 722 * we enforce this restriction here. 723 */ 724 if (so->so_zoneid == GLOBAL_ZONEID) { 725 /* An NL7C socket, mark it */ 726 so->so_nl7c_flags |= NL7C_ENABLED; 727 } else 728 nl7c = NULL; 729 } 730 /* 731 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 732 * for other transports we will send in a O_T_BIND_REQ. 733 */ 734 if (tcp_udp_xport && 735 (so->so_family == AF_INET || so->so_family == AF_INET6)) 736 PRIM_type = T_BIND_REQ; 737 738 bind_req.PRIM_type = PRIM_type; 739 bind_req.ADDR_length = addrlen; 740 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 741 bind_req.CONIND_number = backlog; 742 /* NOTE: holding so_lock while sleeping */ 743 mp = soallocproto2(&bind_req, sizeof (bind_req), 744 addr, addrlen, 0, _ALLOC_SLEEP); 745 so->so_state &= ~SS_LADDR_VALID; 746 /* Done using so_laddr_sa - can drop the lock */ 747 mutex_exit(&so->so_lock); 748 749 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 750 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 751 if (error) { 752 eprintsoline(so, error); 753 mutex_enter(&so->so_lock); 754 goto done; 755 } 756 757 mutex_enter(&so->so_lock); 758 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 759 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 760 if (error) { 761 eprintsoline(so, error); 762 goto done; 763 } 764 ASSERT(mp); 765 /* 766 * Even if some TPI message (e.g. T_DISCON_IND) was received in 767 * strsock_proto while the lock was dropped above, the bind 768 * is allowed to complete. 769 */ 770 771 /* Mark as bound. This will be undone if we detect errors below. */ 772 if (flags & _SOBIND_NOXLATE) { 773 ASSERT(so->so_family == AF_UNIX); 774 so->so_state |= SS_FADDR_NOXLATE; 775 } 776 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 777 so->so_state |= SS_ISBOUND; 778 ASSERT(so->so_unbind_mp); 779 780 /* note that we've already set SS_ACCEPTCONN above */ 781 782 /* 783 * Recompute addrlen - an unspecied bind sent down an 784 * address of length zero but we expect the appropriate length 785 * in return. 786 */ 787 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 788 sizeof (so->so_ux_laddr) : so->so_laddr_len); 789 790 bind_ack = (struct T_bind_ack *)mp->b_rptr; 791 /* 792 * The alignment restriction is really too strict but 793 * we want enough alignment to inspect the fields of 794 * a sockaddr_in. 795 */ 796 addr = sogetoff(mp, bind_ack->ADDR_offset, 797 bind_ack->ADDR_length, 798 __TPI_ALIGN_SIZE); 799 if (addr == NULL) { 800 freemsg(mp); 801 error = EPROTO; 802 eprintsoline(so, error); 803 goto done; 804 } 805 if (!(flags & _SOBIND_UNSPEC)) { 806 /* 807 * Verify that the transport didn't return something we 808 * did not want e.g. an address other than what we asked for. 809 * 810 * NOTE: These checks would go away if/when we switch to 811 * using the new TPI (in which the transport would fail 812 * the request instead of assigning a different address). 813 * 814 * NOTE2: For protocols that we don't know (i.e. any 815 * other than AF_INET6, AF_INET and AF_UNIX), we 816 * cannot know if the transport should be expected to 817 * return the same address as that requested. 818 * 819 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 820 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 821 * 822 * For example, in the case of netatalk it may be 823 * inappropriate for the transport to return the 824 * requested address (as it may have allocated a local 825 * port number in behaviour similar to that of an 826 * AF_INET bind request with a port number of zero). 827 * 828 * Given the definition of O_T_BIND_REQ, where the 829 * transport may bind to an address other than the 830 * requested address, it's not possible to determine 831 * whether a returned address that differs from the 832 * requested address is a reason to fail (because the 833 * requested address was not available) or succeed 834 * (because the transport allocated an appropriate 835 * address and/or port). 836 * 837 * sockfs currently requires that the transport return 838 * the requested address in the T_BIND_ACK, unless 839 * there is code here to allow for any discrepancy. 840 * Such code exists for AF_INET and AF_INET6. 841 * 842 * Netatalk chooses to return the requested address 843 * rather than the (correct) allocated address. This 844 * means that netatalk violates the TPI specification 845 * (and would not function correctly if used from a 846 * TLI application), but it does mean that it works 847 * with sockfs. 848 * 849 * As noted above, using the newer XTI bind primitive 850 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 851 * allow sockfs to be more sure about whether or not 852 * the bind request had succeeded (as transports are 853 * not permitted to bind to a different address than 854 * that requested - they must return failure). 855 * Unfortunately, support for T_BIND_REQ may not be 856 * present in all transport implementations (netatalk, 857 * for example, doesn't have it), making the 858 * transition difficult. 859 */ 860 if (bind_ack->ADDR_length != addrlen) { 861 /* Assumes that the requested address was in use */ 862 freemsg(mp); 863 error = EADDRINUSE; 864 eprintsoline(so, error); 865 goto done; 866 } 867 868 switch (so->so_family) { 869 case AF_INET6: 870 case AF_INET: { 871 sin_t *rname, *aname; 872 873 rname = (sin_t *)addr; 874 aname = (sin_t *)so->so_laddr_sa; 875 876 /* 877 * Take advantage of the alignment 878 * of sin_port and sin6_port which fall 879 * in the same place in their data structures. 880 * Just use sin_port for either address family. 881 * 882 * This may become a problem if (heaven forbid) 883 * there's a separate ipv6port_reserved... :-P 884 * 885 * Binding to port 0 has the semantics of letting 886 * the transport bind to any port. 887 * 888 * If the transport is TCP or UDP since we had sent 889 * a T_BIND_REQ we would not get a port other than 890 * what we asked for. 891 */ 892 if (tcp_udp_xport) { 893 /* 894 * Pick up the new port number if we bound to 895 * port 0. 896 */ 897 if (aname->sin_port == 0) 898 aname->sin_port = rname->sin_port; 899 so->so_state |= SS_LADDR_VALID; 900 break; 901 } 902 if (aname->sin_port != 0 && 903 aname->sin_port != rname->sin_port) { 904 freemsg(mp); 905 error = EADDRINUSE; 906 eprintsoline(so, error); 907 goto done; 908 } 909 /* 910 * Pick up the new port number if we bound to port 0. 911 */ 912 aname->sin_port = rname->sin_port; 913 914 /* 915 * Unfortunately, addresses aren't _quite_ the same. 916 */ 917 if (so->so_family == AF_INET) { 918 if (aname->sin_addr.s_addr != 919 rname->sin_addr.s_addr) { 920 freemsg(mp); 921 error = EADDRNOTAVAIL; 922 eprintsoline(so, error); 923 goto done; 924 } 925 } else { 926 sin6_t *rname6 = (sin6_t *)rname; 927 sin6_t *aname6 = (sin6_t *)aname; 928 929 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 930 &rname6->sin6_addr)) { 931 freemsg(mp); 932 error = EADDRNOTAVAIL; 933 eprintsoline(so, error); 934 goto done; 935 } 936 } 937 break; 938 } 939 case AF_UNIX: 940 if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) { 941 freemsg(mp); 942 error = EADDRINUSE; 943 eprintsoline(so, error); 944 eprintso(so, 945 ("addrlen %d, addr 0x%x, vp %p\n", 946 addrlen, *((int *)addr), 947 so->so_ux_bound_vp)); 948 goto done; 949 } 950 so->so_state |= SS_LADDR_VALID; 951 break; 952 default: 953 /* 954 * NOTE: This assumes that addresses can be 955 * byte-compared for equivalence. 956 */ 957 if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) { 958 freemsg(mp); 959 error = EADDRINUSE; 960 eprintsoline(so, error); 961 goto done; 962 } 963 /* 964 * Don't mark SS_LADDR_VALID, as we cannot be 965 * sure that the returned address is the real 966 * bound address when talking to an unknown 967 * transport. 968 */ 969 break; 970 } 971 } else { 972 /* 973 * Save for returned address for getsockname. 974 * Needed for unspecific bind unless transport supports 975 * the TI_GETMYNAME ioctl. 976 * Do this for AF_INET{,6} even though they do, as 977 * caching info here is much better performance than 978 * a TPI/STREAMS trip to the transport for getsockname. 979 * Any which can't for some reason _must_ _not_ set 980 * LADDR_VALID here for the caching version of getsockname 981 * to not break; 982 */ 983 switch (so->so_family) { 984 case AF_UNIX: 985 /* 986 * Record the address bound with the transport 987 * for use by socketpair. 988 */ 989 bcopy(addr, &so->so_ux_laddr, addrlen); 990 so->so_state |= SS_LADDR_VALID; 991 break; 992 case AF_INET: 993 case AF_INET6: 994 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 995 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 996 so->so_state |= SS_LADDR_VALID; 997 break; 998 default: 999 /* 1000 * Don't mark SS_LADDR_VALID, as we cannot be 1001 * sure that the returned address is the real 1002 * bound address when talking to an unknown 1003 * transport. 1004 */ 1005 break; 1006 } 1007 } 1008 1009 if (nl7c == NULL && (so->so_nl7c_flags & NL7C_AF_NCA) && 1010 (so->so_nl7c_flags & NL7C_ENABLED)) { 1011 /* 1012 * Was an AF_NCA bind() so add it to the addr list for 1013 * reporting purposes. 1014 */ 1015 nl7c = nl7c_add_addr(addr, addrlen); 1016 } 1017 if (nl7c != NULL) { 1018 nl7c_listener_addr(nl7c, strvp2wq(SOTOV(so))); 1019 } 1020 1021 freemsg(mp); 1022 1023 done: 1024 if (error) { 1025 /* reset state & backlog to values held on entry */ 1026 if (clear_acceptconn_on_err == B_TRUE) 1027 so->so_state &= ~SS_ACCEPTCONN; 1028 if (restore_backlog_on_err == B_TRUE) 1029 so->so_backlog = save_so_backlog; 1030 1031 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1032 int err; 1033 1034 err = sotpi_unbind(so, 0); 1035 /* LINTED - statement has no consequent: if */ 1036 if (err) { 1037 eprintsoline(so, error); 1038 } else { 1039 ASSERT(!(so->so_state & SS_ISBOUND)); 1040 } 1041 } 1042 } 1043 if (!(flags & _SOBIND_LOCK_HELD)) { 1044 so_unlock_single(so, SOLOCKED); 1045 mutex_exit(&so->so_lock); 1046 } else { 1047 /* If the caller held the lock don't release it here */ 1048 ASSERT(MUTEX_HELD(&so->so_lock)); 1049 ASSERT(so->so_flag & SOLOCKED); 1050 } 1051 return (error); 1052 } 1053 1054 /* bind the socket */ 1055 int 1056 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1057 int flags) 1058 { 1059 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1060 return (sotpi_bindlisten(so, name, namelen, 0, flags)); 1061 1062 flags &= ~_SOBIND_SOCKETPAIR; 1063 return (sotpi_bindlisten(so, name, namelen, 1, flags)); 1064 } 1065 1066 /* 1067 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1068 * address, or when listen needs to unbind and bind. 1069 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1070 * so that a sobind can pick them up. 1071 */ 1072 static int 1073 sotpi_unbind(struct sonode *so, int flags) 1074 { 1075 struct T_unbind_req unbind_req; 1076 int error = 0; 1077 mblk_t *mp; 1078 1079 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1080 so, flags, pr_state(so->so_state, so->so_mode))); 1081 1082 ASSERT(MUTEX_HELD(&so->so_lock)); 1083 ASSERT(so->so_flag & SOLOCKED); 1084 1085 if (!(so->so_state & SS_ISBOUND)) { 1086 error = EINVAL; 1087 eprintsoline(so, error); 1088 goto done; 1089 } 1090 1091 mutex_exit(&so->so_lock); 1092 1093 /* 1094 * Flush the read and write side (except stream head read queue) 1095 * and send down T_UNBIND_REQ. 1096 */ 1097 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1098 1099 unbind_req.PRIM_type = T_UNBIND_REQ; 1100 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1101 0, _ALLOC_SLEEP); 1102 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1103 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1104 mutex_enter(&so->so_lock); 1105 if (error) { 1106 eprintsoline(so, error); 1107 goto done; 1108 } 1109 1110 error = sowaitokack(so, T_UNBIND_REQ); 1111 if (error) { 1112 eprintsoline(so, error); 1113 goto done; 1114 } 1115 1116 /* 1117 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1118 * strsock_proto while the lock was dropped above, the unbind 1119 * is allowed to complete. 1120 */ 1121 if (!(flags & _SOUNBIND_REBIND)) { 1122 /* 1123 * Clear out bound address. 1124 */ 1125 vnode_t *vp; 1126 1127 if ((vp = so->so_ux_bound_vp) != NULL) { 1128 ASSERT(vp->v_stream); 1129 so->so_ux_bound_vp = NULL; 1130 vn_rele_stream(vp); 1131 } 1132 /* Clear out address */ 1133 so->so_laddr_len = 0; 1134 } 1135 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1136 done: 1137 /* If the caller held the lock don't release it here */ 1138 ASSERT(MUTEX_HELD(&so->so_lock)); 1139 ASSERT(so->so_flag & SOLOCKED); 1140 1141 return (error); 1142 } 1143 1144 /* 1145 * listen on the socket. 1146 * For TPI conforming transports this has to first unbind with the transport 1147 * and then bind again using the new backlog. 1148 */ 1149 int 1150 sotpi_listen(struct sonode *so, int backlog) 1151 { 1152 int error = 0; 1153 1154 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1155 so, backlog, pr_state(so->so_state, so->so_mode))); 1156 1157 if (so->so_serv_type == T_CLTS) 1158 return (EOPNOTSUPP); 1159 1160 /* 1161 * If the socket is ready to accept connections already, then 1162 * return without doing anything. This avoids a problem where 1163 * a second listen() call fails if a connection is pending and 1164 * leaves the socket unbound. Only when we are not unbinding 1165 * with the transport can we safely increase the backlog. 1166 */ 1167 if (so->so_state & SS_ACCEPTCONN && 1168 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1169 /*CONSTCOND*/ 1170 !solisten_tpi_tcp)) 1171 return (0); 1172 1173 if (so->so_state & SS_ISCONNECTED) 1174 return (EINVAL); 1175 1176 mutex_enter(&so->so_lock); 1177 so_lock_single(so); /* Set SOLOCKED */ 1178 1179 if (backlog < 0) 1180 backlog = 0; 1181 /* 1182 * Use the same qlimit as in BSD. BSD checks the qlimit 1183 * before queuing the next connection implying that a 1184 * listen(sock, 0) allows one connection to be queued. 1185 * BSD also uses 1.5 times the requested backlog. 1186 * 1187 * XNS Issue 4 required a strict interpretation of the backlog. 1188 * This has been waived subsequently for Issue 4 and the change 1189 * incorporated in XNS Issue 5. So we aren't required to do 1190 * anything special for XPG apps. 1191 */ 1192 if (backlog >= (INT_MAX - 1) / 3) 1193 backlog = INT_MAX; 1194 else 1195 backlog = backlog * 3 / 2 + 1; 1196 1197 /* 1198 * If the listen doesn't change the backlog we do nothing. 1199 * This avoids an EPROTO error from the transport. 1200 */ 1201 if ((so->so_state & SS_ACCEPTCONN) && 1202 so->so_backlog == backlog) 1203 goto done; 1204 1205 if (!(so->so_state & SS_ISBOUND)) { 1206 /* 1207 * Must have been explicitly bound in the UNIX domain. 1208 */ 1209 if (so->so_family == AF_UNIX) { 1210 error = EINVAL; 1211 goto done; 1212 } 1213 error = sotpi_bindlisten(so, NULL, 0, backlog, 1214 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1215 } else if (backlog > 0) { 1216 /* 1217 * AF_INET{,6} hack to avoid losing the port. 1218 * Assumes that all AF_INET{,6} transports can handle a 1219 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1220 * has already bound thus it is possible to avoid the unbind. 1221 */ 1222 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1223 /*CONSTCOND*/ 1224 !solisten_tpi_tcp)) { 1225 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1226 if (error) 1227 goto done; 1228 } 1229 error = sotpi_bindlisten(so, NULL, 0, backlog, 1230 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1231 } else { 1232 so->so_state |= SS_ACCEPTCONN; 1233 so->so_backlog = backlog; 1234 } 1235 if (error) 1236 goto done; 1237 ASSERT(so->so_state & SS_ACCEPTCONN); 1238 done: 1239 so_unlock_single(so, SOLOCKED); 1240 mutex_exit(&so->so_lock); 1241 return (error); 1242 } 1243 1244 /* 1245 * Disconnect either a specified seqno or all (-1). 1246 * The former is used on listening sockets only. 1247 * 1248 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1249 * the current use of sodisconnect(seqno == -1) is only for shutdown 1250 * so there is no point (and potentially incorrect) to unbind. 1251 */ 1252 int 1253 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1254 { 1255 struct T_discon_req discon_req; 1256 int error = 0; 1257 mblk_t *mp; 1258 1259 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1260 so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1261 1262 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1263 mutex_enter(&so->so_lock); 1264 so_lock_single(so); /* Set SOLOCKED */ 1265 } else { 1266 ASSERT(MUTEX_HELD(&so->so_lock)); 1267 ASSERT(so->so_flag & SOLOCKED); 1268 } 1269 1270 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1271 error = EINVAL; 1272 eprintsoline(so, error); 1273 goto done; 1274 } 1275 1276 mutex_exit(&so->so_lock); 1277 /* 1278 * Flush the write side (unless this is a listener) 1279 * and then send down a T_DISCON_REQ. 1280 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1281 * and other messages.) 1282 */ 1283 if (!(so->so_state & SS_ACCEPTCONN)) 1284 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1285 1286 discon_req.PRIM_type = T_DISCON_REQ; 1287 discon_req.SEQ_number = seqno; 1288 mp = soallocproto1(&discon_req, sizeof (discon_req), 1289 0, _ALLOC_SLEEP); 1290 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1291 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1292 mutex_enter(&so->so_lock); 1293 if (error) { 1294 eprintsoline(so, error); 1295 goto done; 1296 } 1297 1298 error = sowaitokack(so, T_DISCON_REQ); 1299 if (error) { 1300 eprintsoline(so, error); 1301 goto done; 1302 } 1303 /* 1304 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1305 * strsock_proto while the lock was dropped above, the disconnect 1306 * is allowed to complete. However, it is not possible to 1307 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1308 */ 1309 so->so_state &= 1310 ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID); 1311 done: 1312 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1313 so_unlock_single(so, SOLOCKED); 1314 mutex_exit(&so->so_lock); 1315 } else { 1316 /* If the caller held the lock don't release it here */ 1317 ASSERT(MUTEX_HELD(&so->so_lock)); 1318 ASSERT(so->so_flag & SOLOCKED); 1319 } 1320 return (error); 1321 } 1322 1323 int 1324 sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) 1325 { 1326 struct T_conn_ind *conn_ind; 1327 struct T_conn_res *conn_res; 1328 int error = 0; 1329 mblk_t *mp; 1330 struct sonode *nso; 1331 vnode_t *nvp; 1332 void *src; 1333 t_uscalar_t srclen; 1334 void *opt; 1335 t_uscalar_t optlen; 1336 t_scalar_t PRIM_type; 1337 t_scalar_t SEQ_number; 1338 1339 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1340 so, fflag, nsop, pr_state(so->so_state, so->so_mode))); 1341 1342 /* 1343 * Defer single-threading the accepting socket until 1344 * the T_CONN_IND has been received and parsed and the 1345 * new sonode has been opened. 1346 */ 1347 1348 /* Check that we are not already connected */ 1349 if ((so->so_state & SS_ACCEPTCONN) == 0) 1350 goto conn_bad; 1351 again: 1352 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1353 goto e_bad; 1354 1355 ASSERT(mp); 1356 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1357 /* 1358 * Save SEQ_number for error paths. 1359 */ 1360 SEQ_number = conn_ind->SEQ_number; 1361 1362 srclen = conn_ind->SRC_length; 1363 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1364 if (src == NULL) { 1365 error = EPROTO; 1366 freemsg(mp); 1367 eprintsoline(so, error); 1368 goto disconnect_unlocked; 1369 } 1370 optlen = conn_ind->OPT_length; 1371 switch (so->so_family) { 1372 case AF_INET: 1373 case AF_INET6: 1374 if ((optlen == sizeof (intptr_t)) && 1375 ((so->so_state & SS_TCP_FAST_ACCEPT) != 0)) { 1376 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1377 &opt, conn_ind->OPT_length); 1378 } else { 1379 /* 1380 * The transport (in this case TCP) hasn't sent up 1381 * a pointer to an instance for the accept fast-path. 1382 * Disable fast-path completely because the call to 1383 * sotpi_create() below would otherwise create an 1384 * incomplete TCP instance, which would lead to 1385 * problems when sockfs sends a normal T_CONN_RES 1386 * message down the new stream. 1387 */ 1388 so->so_state &= ~SS_TCP_FAST_ACCEPT; 1389 opt = NULL; 1390 optlen = 0; 1391 } 1392 break; 1393 case AF_UNIX: 1394 default: 1395 if (optlen != 0) { 1396 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1397 __TPI_ALIGN_SIZE); 1398 if (opt == NULL) { 1399 error = EPROTO; 1400 freemsg(mp); 1401 eprintsoline(so, error); 1402 goto disconnect_unlocked; 1403 } 1404 } 1405 if (so->so_family == AF_UNIX) { 1406 if (!(so->so_state & SS_FADDR_NOXLATE)) { 1407 src = NULL; 1408 srclen = 0; 1409 } 1410 /* Extract src address from options */ 1411 if (optlen != 0) 1412 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1413 } 1414 break; 1415 } 1416 1417 /* 1418 * Create the new socket. 1419 */ 1420 VN_HOLD(so->so_accessvp); 1421 nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, 1422 so->so_protocol, so->so_version, so, &error); 1423 if (nso == NULL) { 1424 ASSERT(error != 0); 1425 /* 1426 * Accept can not fail with ENOBUFS. sotpi_create 1427 * sleeps waiting for memory until a signal is caught 1428 * so return EINTR. 1429 */ 1430 freemsg(mp); 1431 if (error == ENOBUFS) 1432 error = EINTR; 1433 goto e_disc_unl; 1434 } 1435 nvp = SOTOV(nso); 1436 1437 #ifdef DEBUG 1438 /* 1439 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1440 * it's inherited early to allow debugging of the accept code itself. 1441 */ 1442 nso->so_options |= so->so_options & SO_DEBUG; 1443 #endif /* DEBUG */ 1444 1445 /* 1446 * Save the SRC address from the T_CONN_IND 1447 * for getpeername to work on AF_UNIX and on transports that do not 1448 * support TI_GETPEERNAME. 1449 * 1450 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1451 * copyin_name(). 1452 */ 1453 if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) { 1454 error = EINVAL; 1455 freemsg(mp); 1456 eprintsoline(so, error); 1457 goto disconnect_vp_unlocked; 1458 } 1459 nso->so_faddr_len = (socklen_t)srclen; 1460 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1461 bcopy(src, nso->so_faddr_sa, srclen); 1462 nso->so_state |= SS_FADDR_VALID; 1463 1464 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1465 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1466 cred_t *cr; 1467 1468 if ((cr = DB_CRED(mp)) != NULL) { 1469 crhold(cr); 1470 nso->so_peercred = cr; 1471 nso->so_cpid = DB_CPID(mp); 1472 } 1473 freemsg(mp); 1474 1475 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1476 sizeof (intptr_t), 0, _ALLOC_INTR); 1477 if (mp == NULL) { 1478 /* 1479 * Accept can not fail with ENOBUFS. 1480 * A signal was caught so return EINTR. 1481 */ 1482 error = EINTR; 1483 eprintsoline(so, error); 1484 goto disconnect_vp_unlocked; 1485 } 1486 conn_res = (struct T_conn_res *)mp->b_rptr; 1487 } else { 1488 nso->so_peercred = DB_CRED(mp); 1489 nso->so_cpid = DB_CPID(mp); 1490 DB_CRED(mp) = NULL; 1491 1492 mp->b_rptr = DB_BASE(mp); 1493 conn_res = (struct T_conn_res *)mp->b_rptr; 1494 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1495 } 1496 1497 /* 1498 * New socket must be bound at least in sockfs and, except for AF_INET, 1499 * (or AF_INET6) it also has to be bound in the transport provider. 1500 * After accepting the connection on nso so_laddr_sa will be set to 1501 * contain the same address as the listener's local address 1502 * so the address we bind to isn't important. 1503 */ 1504 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1505 /*CONSTCOND*/ 1506 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1507 /* 1508 * Optimization for AF_INET{,6} transports 1509 * that can handle a T_CONN_RES without being bound. 1510 */ 1511 mutex_enter(&nso->so_lock); 1512 so_automatic_bind(nso); 1513 mutex_exit(&nso->so_lock); 1514 } else { 1515 /* Perform NULL bind with the transport provider. */ 1516 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) { 1517 ASSERT(error != ENOBUFS); 1518 freemsg(mp); 1519 eprintsoline(nso, error); 1520 goto disconnect_vp_unlocked; 1521 } 1522 } 1523 1524 /* 1525 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1526 * so that any data arriving on the new socket will cause the 1527 * appropriate signals to be delivered for the new socket. 1528 * 1529 * No other thread (except strsock_proto and strsock_misc) 1530 * can access the new socket thus we relax the locking. 1531 */ 1532 nso->so_pgrp = so->so_pgrp; 1533 nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE); 1534 1535 if (nso->so_pgrp != 0) { 1536 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1537 eprintsoline(nso, error); 1538 error = 0; 1539 nso->so_pgrp = 0; 1540 } 1541 } 1542 1543 /* 1544 * Make note of the socket level options. TCP and IP level options 1545 * are already inherited. We could do all this after accept is 1546 * successful but doing it here simplifies code and no harm done 1547 * for error case. 1548 */ 1549 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1550 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1551 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1552 nso->so_sndbuf = so->so_sndbuf; 1553 nso->so_rcvbuf = so->so_rcvbuf; 1554 if (nso->so_options & SO_LINGER) 1555 nso->so_linger = so->so_linger; 1556 1557 if ((so->so_state & SS_TCP_FAST_ACCEPT) != 0) { 1558 mblk_t *ack_mp; 1559 1560 ASSERT(opt != NULL); 1561 1562 conn_res->OPT_length = optlen; 1563 conn_res->OPT_offset = MBLKL(mp); 1564 bcopy(&opt, mp->b_wptr, optlen); 1565 mp->b_wptr += optlen; 1566 conn_res->PRIM_type = T_CONN_RES; 1567 conn_res->ACCEPTOR_id = 0; 1568 PRIM_type = T_CONN_RES; 1569 1570 /* Send down the T_CONN_RES on acceptor STREAM */ 1571 error = kstrputmsg(SOTOV(nso), mp, NULL, 1572 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1573 if (error) { 1574 mutex_enter(&so->so_lock); 1575 so_lock_single(so); 1576 eprintsoline(so, error); 1577 goto disconnect_vp; 1578 } 1579 mutex_enter(&nso->so_lock); 1580 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1581 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1582 if (error) { 1583 mutex_exit(&nso->so_lock); 1584 mutex_enter(&so->so_lock); 1585 so_lock_single(so); 1586 eprintsoline(so, error); 1587 goto disconnect_vp; 1588 } 1589 if (nso->so_family == AF_INET) { 1590 sin_t *sin; 1591 1592 sin = (sin_t *)(ack_mp->b_rptr + 1593 sizeof (struct T_ok_ack)); 1594 bcopy(sin, nso->so_laddr_sa, sizeof (sin_t)); 1595 nso->so_laddr_len = sizeof (sin_t); 1596 } else { 1597 sin6_t *sin6; 1598 1599 sin6 = (sin6_t *)(ack_mp->b_rptr + 1600 sizeof (struct T_ok_ack)); 1601 bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t)); 1602 nso->so_laddr_len = sizeof (sin6_t); 1603 } 1604 freemsg(ack_mp); 1605 1606 nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID; 1607 nso->so_priv = opt; 1608 1609 if (so->so_nl7c_flags & NL7C_ENABLED) { 1610 /* 1611 * An NL7C marked listen()er so the new socket 1612 * inherits the listen()er's NL7C state. 1613 * 1614 * When calling NL7C to process the new socket 1615 * pass the nonblocking i/o state of the listen 1616 * socket as this is the context we are in. 1617 */ 1618 nso->so_nl7c_flags = so->so_nl7c_flags; 1619 if (nl7c_process(nso, 1620 (nso->so_state & (SS_NONBLOCK|SS_NDELAY)), 1621 (int)((tcp_t *)nso->so_priv)->tcp_mss)) { 1622 /* 1623 * NL7C has completed processing on the 1624 * socket, close the socket and back to 1625 * the top to await the next T_CONN_IND. 1626 */ 1627 mutex_exit(&nso->so_lock); 1628 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1629 CRED()); 1630 VN_RELE(nvp); 1631 goto again; 1632 } 1633 /* Pass the new socket out */ 1634 } 1635 1636 mutex_exit(&nso->so_lock); 1637 1638 /* 1639 * Pass out new socket. 1640 */ 1641 if (nsop != NULL) 1642 *nsop = nso; 1643 1644 return (0); 1645 } 1646 1647 /* 1648 * Copy local address from listener. 1649 */ 1650 nso->so_laddr_len = so->so_laddr_len; 1651 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1652 bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len); 1653 nso->so_state |= SS_LADDR_VALID; 1654 1655 /* 1656 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1657 * which don't support the FireEngine accept fast-path. It is also 1658 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1659 * again. Neither sockfs nor TCP attempt to find out if some other 1660 * random module has been inserted in between (in which case we 1661 * should follow TLI accept behaviour). We blindly assume the worst 1662 * case and revert back to old behaviour i.e. TCP will not send us 1663 * any option (eager) and the accept should happen on the listener 1664 * queue. Any queued T_conn_ind have already got their options removed 1665 * by so_sock2_stream() when "sockmod" was I_POP'd. 1666 */ 1667 /* 1668 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1669 */ 1670 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1671 #ifdef _ILP32 1672 queue_t *q; 1673 1674 /* 1675 * Find read queue in driver 1676 * Can safely do this since we "own" nso/nvp. 1677 */ 1678 q = strvp2wq(nvp)->q_next; 1679 while (SAMESTR(q)) 1680 q = q->q_next; 1681 q = RD(q); 1682 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1683 #else 1684 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1685 #endif /* _ILP32 */ 1686 conn_res->PRIM_type = O_T_CONN_RES; 1687 PRIM_type = O_T_CONN_RES; 1688 } else { 1689 conn_res->ACCEPTOR_id = nso->so_acceptor_id; 1690 conn_res->PRIM_type = T_CONN_RES; 1691 PRIM_type = T_CONN_RES; 1692 } 1693 conn_res->SEQ_number = SEQ_number; 1694 conn_res->OPT_length = 0; 1695 conn_res->OPT_offset = 0; 1696 1697 mutex_enter(&so->so_lock); 1698 so_lock_single(so); /* Set SOLOCKED */ 1699 mutex_exit(&so->so_lock); 1700 1701 error = kstrputmsg(SOTOV(so), mp, NULL, 1702 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1703 mutex_enter(&so->so_lock); 1704 if (error) { 1705 eprintsoline(so, error); 1706 goto disconnect_vp; 1707 } 1708 error = sowaitokack(so, PRIM_type); 1709 if (error) { 1710 eprintsoline(so, error); 1711 goto disconnect_vp; 1712 } 1713 so_unlock_single(so, SOLOCKED); 1714 mutex_exit(&so->so_lock); 1715 1716 nso->so_state |= SS_ISCONNECTED; 1717 1718 /* 1719 * Pass out new socket. 1720 */ 1721 if (nsop != NULL) 1722 *nsop = nso; 1723 1724 return (0); 1725 1726 1727 eproto_disc_unl: 1728 error = EPROTO; 1729 e_disc_unl: 1730 eprintsoline(so, error); 1731 goto disconnect_unlocked; 1732 1733 pr_disc_vp_unl: 1734 eprintsoline(so, error); 1735 disconnect_vp_unlocked: 1736 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 1737 VN_RELE(nvp); 1738 disconnect_unlocked: 1739 (void) sodisconnect(so, SEQ_number, 0); 1740 return (error); 1741 1742 pr_disc_vp: 1743 eprintsoline(so, error); 1744 disconnect_vp: 1745 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 1746 so_unlock_single(so, SOLOCKED); 1747 mutex_exit(&so->so_lock); 1748 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 1749 VN_RELE(nvp); 1750 return (error); 1751 1752 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 1753 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 1754 ? EOPNOTSUPP : EINVAL; 1755 e_bad: 1756 eprintsoline(so, error); 1757 return (error); 1758 } 1759 1760 /* 1761 * connect a socket. 1762 * 1763 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 1764 * unconnect (by specifying a null address). 1765 */ 1766 int 1767 sotpi_connect(struct sonode *so, 1768 const struct sockaddr *name, 1769 socklen_t namelen, 1770 int fflag, 1771 int flags) 1772 { 1773 struct T_conn_req conn_req; 1774 int error = 0; 1775 mblk_t *mp; 1776 void *src; 1777 socklen_t srclen; 1778 void *addr; 1779 socklen_t addrlen; 1780 boolean_t need_unlock; 1781 1782 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 1783 so, name, namelen, fflag, flags, 1784 pr_state(so->so_state, so->so_mode))); 1785 1786 /* 1787 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 1788 * avoid sleeping for memory with SOLOCKED held. 1789 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen 1790 * + sizeof (struct T_opthdr). 1791 * (the AF_UNIX so_ux_addr_xlate() does not make the address 1792 * exceed so_faddr_maxlen). 1793 */ 1794 mp = soallocproto(sizeof (struct T_conn_req) + 1795 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 1796 if (mp == NULL) { 1797 /* 1798 * Connect can not fail with ENOBUFS. A signal was 1799 * caught so return EINTR. 1800 */ 1801 error = EINTR; 1802 eprintsoline(so, error); 1803 return (error); 1804 } 1805 1806 mutex_enter(&so->so_lock); 1807 /* 1808 * Make sure that there is a preallocated unbind_req 1809 * message before any binding. This message allocated when 1810 * the socket is created but it might be have been 1811 * consumed. 1812 */ 1813 if (so->so_unbind_mp == NULL) { 1814 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 1815 /* NOTE: holding so_lock while sleeping */ 1816 so->so_unbind_mp = 1817 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 1818 if (so->so_unbind_mp == NULL) { 1819 error = EINTR; 1820 need_unlock = B_FALSE; 1821 goto done; 1822 } 1823 } 1824 1825 so_lock_single(so); /* Set SOLOCKED */ 1826 need_unlock = B_TRUE; 1827 1828 /* 1829 * Can't have done a listen before connecting. 1830 */ 1831 if (so->so_state & SS_ACCEPTCONN) { 1832 error = EOPNOTSUPP; 1833 goto done; 1834 } 1835 1836 /* 1837 * Must be bound with the transport 1838 */ 1839 if (!(so->so_state & SS_ISBOUND)) { 1840 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 1841 /*CONSTCOND*/ 1842 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 1843 /* 1844 * Optimization for AF_INET{,6} transports 1845 * that can handle a T_CONN_REQ without being bound. 1846 */ 1847 so_automatic_bind(so); 1848 } else { 1849 error = sotpi_bind(so, NULL, 0, 1850 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 1851 if (error) 1852 goto done; 1853 } 1854 ASSERT(so->so_state & SS_ISBOUND); 1855 flags |= _SOCONNECT_DID_BIND; 1856 } 1857 1858 /* 1859 * Handle a connect to a name parameter of type AF_UNSPEC like a 1860 * connect to a null address. This is the portable method to 1861 * unconnect a socket. 1862 */ 1863 if ((namelen >= sizeof (sa_family_t)) && 1864 (name->sa_family == AF_UNSPEC)) { 1865 name = NULL; 1866 namelen = 0; 1867 } 1868 1869 /* 1870 * Check that we are not already connected. 1871 * A connection-oriented socket cannot be reconnected. 1872 * A connected connection-less socket can be 1873 * - connected to a different address by a subsequent connect 1874 * - "unconnected" by a connect to the NULL address 1875 */ 1876 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 1877 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 1878 if (so->so_mode & SM_CONNREQUIRED) { 1879 /* Connection-oriented socket */ 1880 error = so->so_state & SS_ISCONNECTED ? 1881 EISCONN : EALREADY; 1882 goto done; 1883 } 1884 /* Connection-less socket */ 1885 if (name == NULL) { 1886 /* 1887 * Remove the connected state and clear SO_DGRAM_ERRIND 1888 * since it was set when the socket was connected. 1889 * If this is UDP also send down a T_DISCON_REQ. 1890 */ 1891 int val; 1892 1893 if ((so->so_family == AF_INET || 1894 so->so_family == AF_INET6) && 1895 (so->so_type == SOCK_DGRAM || 1896 so->so_type == SOCK_RAW) && 1897 /*CONSTCOND*/ 1898 !soconnect_tpi_udp) { 1899 /* XXX What about implicitly unbinding here? */ 1900 error = sodisconnect(so, -1, 1901 _SODISCONNECT_LOCK_HELD); 1902 } else { 1903 so->so_state &= 1904 ~(SS_ISCONNECTED | SS_ISCONNECTING | 1905 SS_FADDR_VALID); 1906 so->so_faddr_len = 0; 1907 } 1908 1909 so_unlock_single(so, SOLOCKED); 1910 mutex_exit(&so->so_lock); 1911 1912 val = 0; 1913 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 1914 &val, (t_uscalar_t)sizeof (val)); 1915 1916 mutex_enter(&so->so_lock); 1917 so_lock_single(so); /* Set SOLOCKED */ 1918 goto done; 1919 } 1920 } 1921 ASSERT(so->so_state & SS_ISBOUND); 1922 1923 if (name == NULL || namelen == 0) { 1924 error = EINVAL; 1925 goto done; 1926 } 1927 /* 1928 * Mark the socket if so_faddr_sa represents the transport level 1929 * address. 1930 */ 1931 if (flags & _SOCONNECT_NOXLATE) { 1932 struct sockaddr_ux *soaddr_ux; 1933 1934 ASSERT(so->so_family == AF_UNIX); 1935 if (namelen != sizeof (struct sockaddr_ux)) { 1936 error = EINVAL; 1937 goto done; 1938 } 1939 soaddr_ux = (struct sockaddr_ux *)name; 1940 name = (struct sockaddr *)&soaddr_ux->sou_addr; 1941 namelen = sizeof (soaddr_ux->sou_addr); 1942 so->so_state |= SS_FADDR_NOXLATE; 1943 } 1944 1945 /* 1946 * Length and family checks. 1947 */ 1948 error = so_addr_verify(so, name, namelen); 1949 if (error) 1950 goto bad; 1951 1952 /* 1953 * Save foreign address. Needed for AF_UNIX as well as 1954 * transport providers that do not support TI_GETPEERNAME. 1955 * Also used for cached foreign address for TCP and UDP. 1956 */ 1957 if (namelen > (t_uscalar_t)so->so_faddr_maxlen) { 1958 error = EINVAL; 1959 goto done; 1960 } 1961 so->so_faddr_len = (socklen_t)namelen; 1962 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1963 bcopy(name, so->so_faddr_sa, namelen); 1964 so->so_state |= SS_FADDR_VALID; 1965 1966 if (so->so_family == AF_UNIX) { 1967 if (so->so_state & SS_FADDR_NOXLATE) { 1968 /* 1969 * Already have a transport internal address. Do not 1970 * pass any (transport internal) source address. 1971 */ 1972 addr = so->so_faddr_sa; 1973 addrlen = (t_uscalar_t)so->so_faddr_len; 1974 src = NULL; 1975 srclen = 0; 1976 } else { 1977 /* 1978 * Pass the sockaddr_un source address as an option 1979 * and translate the remote address. 1980 * Holding so_lock thus so_laddr_sa can not change. 1981 */ 1982 src = so->so_laddr_sa; 1983 srclen = (t_uscalar_t)so->so_laddr_len; 1984 dprintso(so, 1, 1985 ("sotpi_connect UNIX: srclen %d, src %p\n", 1986 srclen, src)); 1987 error = so_ux_addr_xlate(so, 1988 so->so_faddr_sa, (socklen_t)so->so_faddr_len, 1989 (flags & _SOCONNECT_XPG4_2), 1990 &addr, &addrlen); 1991 if (error) 1992 goto bad; 1993 } 1994 } else { 1995 addr = so->so_faddr_sa; 1996 addrlen = (t_uscalar_t)so->so_faddr_len; 1997 src = NULL; 1998 srclen = 0; 1999 } 2000 /* 2001 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2002 * option which asks the transport provider to send T_UDERR_IND 2003 * messages. These T_UDERR_IND messages are used to return connected 2004 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2005 * 2006 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2007 * we send down a T_CONN_REQ. This is needed to let the 2008 * transport assign a local address that is consistent with 2009 * the remote address. Applications depend on a getsockname() 2010 * after a connect() to retrieve the "source" IP address for 2011 * the connected socket. Invalidate the cached local address 2012 * to force getsockname() to enquire of the transport. 2013 */ 2014 if (!(so->so_mode & SM_CONNREQUIRED)) { 2015 /* 2016 * Datagram socket. 2017 */ 2018 int32_t val; 2019 2020 so_unlock_single(so, SOLOCKED); 2021 mutex_exit(&so->so_lock); 2022 2023 val = 1; 2024 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2025 &val, (t_uscalar_t)sizeof (val)); 2026 2027 mutex_enter(&so->so_lock); 2028 so_lock_single(so); /* Set SOLOCKED */ 2029 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2030 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2031 soconnect_tpi_udp) { 2032 soisconnected(so); 2033 goto done; 2034 } 2035 /* 2036 * Send down T_CONN_REQ etc. 2037 * Clear fflag to avoid returning EWOULDBLOCK. 2038 */ 2039 fflag = 0; 2040 ASSERT(so->so_family != AF_UNIX); 2041 so->so_state &= ~SS_LADDR_VALID; 2042 } else if (so->so_laddr_len != 0) { 2043 /* 2044 * If the local address or port was "any" then it may be 2045 * changed by the transport as a result of the 2046 * connect. Invalidate the cached version if we have one. 2047 */ 2048 switch (so->so_family) { 2049 case AF_INET: 2050 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t)); 2051 if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr == 2052 INADDR_ANY || 2053 ((sin_t *)so->so_laddr_sa)->sin_port == 0) 2054 so->so_state &= ~SS_LADDR_VALID; 2055 break; 2056 2057 case AF_INET6: 2058 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t)); 2059 if (IN6_IS_ADDR_UNSPECIFIED( 2060 &((sin6_t *)so->so_laddr_sa) ->sin6_addr) || 2061 IN6_IS_ADDR_V4MAPPED_ANY( 2062 &((sin6_t *)so->so_laddr_sa)->sin6_addr) || 2063 ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) 2064 so->so_state &= ~SS_LADDR_VALID; 2065 break; 2066 2067 default: 2068 break; 2069 } 2070 } 2071 2072 /* 2073 * Check for failure of an earlier call 2074 */ 2075 if (so->so_error != 0) 2076 goto so_bad; 2077 2078 /* 2079 * Send down T_CONN_REQ. Message was allocated above. 2080 */ 2081 conn_req.PRIM_type = T_CONN_REQ; 2082 conn_req.DEST_length = addrlen; 2083 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2084 if (srclen == 0) { 2085 conn_req.OPT_length = 0; 2086 conn_req.OPT_offset = 0; 2087 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2088 soappendmsg(mp, addr, addrlen); 2089 } else { 2090 /* 2091 * There is a AF_UNIX sockaddr_un to include as a source 2092 * address option. 2093 */ 2094 struct T_opthdr toh; 2095 2096 toh.level = SOL_SOCKET; 2097 toh.name = SO_SRCADDR; 2098 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2099 toh.status = 0; 2100 conn_req.OPT_length = 2101 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2102 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2103 _TPI_ALIGN_TOPT(addrlen)); 2104 2105 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2106 soappendmsg(mp, addr, addrlen); 2107 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2108 soappendmsg(mp, &toh, sizeof (toh)); 2109 soappendmsg(mp, src, srclen); 2110 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2111 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2112 } 2113 /* 2114 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2115 * in order to have the right state when the T_CONN_CON shows up. 2116 */ 2117 soisconnecting(so); 2118 mutex_exit(&so->so_lock); 2119 2120 #ifdef C2_AUDIT 2121 if (audit_active) 2122 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2123 #endif /* C2_AUDIT */ 2124 2125 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2126 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2127 mp = NULL; 2128 mutex_enter(&so->so_lock); 2129 if (error != 0) 2130 goto bad; 2131 2132 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2133 goto bad; 2134 2135 /* Allow other threads to access the socket */ 2136 so_unlock_single(so, SOLOCKED); 2137 need_unlock = B_FALSE; 2138 2139 /* 2140 * Wait until we get a T_CONN_CON or an error 2141 */ 2142 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2143 so_lock_single(so); /* Set SOLOCKED */ 2144 need_unlock = B_TRUE; 2145 } 2146 2147 done: 2148 freemsg(mp); 2149 switch (error) { 2150 case EINPROGRESS: 2151 case EALREADY: 2152 case EISCONN: 2153 case EINTR: 2154 /* Non-fatal errors */ 2155 so->so_state &= ~SS_LADDR_VALID; 2156 /* FALLTHRU */ 2157 case 0: 2158 break; 2159 2160 case EHOSTUNREACH: 2161 if (flags & _SOCONNECT_XPG4_2) { 2162 /* 2163 * X/Open specification contains a requirement that 2164 * ENETUNREACH be returned but does not require 2165 * EHOSTUNREACH. In order to keep the test suite 2166 * happy we mess with the errno here. 2167 */ 2168 error = ENETUNREACH; 2169 } 2170 /* FALLTHRU */ 2171 2172 default: 2173 ASSERT(need_unlock); 2174 /* 2175 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2176 * and invalidate local-address cache 2177 */ 2178 so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID); 2179 /* A discon_ind might have already unbound us */ 2180 if ((flags & _SOCONNECT_DID_BIND) && 2181 (so->so_state & SS_ISBOUND)) { 2182 int err; 2183 2184 err = sotpi_unbind(so, 0); 2185 /* LINTED - statement has no conseq */ 2186 if (err) { 2187 eprintsoline(so, err); 2188 } 2189 } 2190 break; 2191 } 2192 if (need_unlock) 2193 so_unlock_single(so, SOLOCKED); 2194 mutex_exit(&so->so_lock); 2195 return (error); 2196 2197 so_bad: error = sogeterr(so); 2198 bad: eprintsoline(so, error); 2199 goto done; 2200 } 2201 2202 int 2203 sotpi_shutdown(struct sonode *so, int how) 2204 { 2205 struct T_ordrel_req ordrel_req; 2206 mblk_t *mp; 2207 uint_t old_state, state_change; 2208 int error = 0; 2209 2210 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2211 so, how, pr_state(so->so_state, so->so_mode))); 2212 2213 mutex_enter(&so->so_lock); 2214 so_lock_single(so); /* Set SOLOCKED */ 2215 2216 /* 2217 * SunOS 4.X has no check for datagram sockets. 2218 * 5.X checks that it is connected (ENOTCONN) 2219 * X/Open requires that we check the connected state. 2220 */ 2221 if (!(so->so_state & SS_ISCONNECTED)) { 2222 if (!xnet_skip_checks) { 2223 error = ENOTCONN; 2224 if (xnet_check_print) { 2225 printf("sockfs: X/Open shutdown check " 2226 "caused ENOTCONN\n"); 2227 } 2228 } 2229 goto done; 2230 } 2231 /* 2232 * Record the current state and then perform any state changes. 2233 * Then use the difference between the old and new states to 2234 * determine which messages need to be sent. 2235 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2236 * duplicate calls to shutdown(). 2237 */ 2238 old_state = so->so_state; 2239 2240 switch (how) { 2241 case 0: 2242 socantrcvmore(so); 2243 break; 2244 case 1: 2245 socantsendmore(so); 2246 break; 2247 case 2: 2248 socantsendmore(so); 2249 socantrcvmore(so); 2250 break; 2251 default: 2252 error = EINVAL; 2253 goto done; 2254 } 2255 2256 /* 2257 * Assumes that the SS_CANT* flags are never cleared in the above code. 2258 */ 2259 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2260 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2261 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2262 2263 switch (state_change) { 2264 case 0: 2265 dprintso(so, 1, 2266 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2267 so->so_state)); 2268 goto done; 2269 2270 case SS_CANTRCVMORE: 2271 mutex_exit(&so->so_lock); 2272 strseteof(SOTOV(so), 1); 2273 /* 2274 * strseteof takes care of read side wakeups, 2275 * pollwakeups, and signals. 2276 */ 2277 /* 2278 * Get the read lock before flushing data to avoid problems 2279 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2280 */ 2281 mutex_enter(&so->so_lock); 2282 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2283 mutex_exit(&so->so_lock); 2284 2285 /* Flush read side queue */ 2286 strflushrq(SOTOV(so), FLUSHALL); 2287 2288 mutex_enter(&so->so_lock); 2289 so_unlock_read(so); /* Clear SOREADLOCKED */ 2290 break; 2291 2292 case SS_CANTSENDMORE: 2293 mutex_exit(&so->so_lock); 2294 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2295 mutex_enter(&so->so_lock); 2296 break; 2297 2298 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2299 mutex_exit(&so->so_lock); 2300 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2301 strseteof(SOTOV(so), 1); 2302 /* 2303 * strseteof takes care of read side wakeups, 2304 * pollwakeups, and signals. 2305 */ 2306 /* 2307 * Get the read lock before flushing data to avoid problems 2308 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2309 */ 2310 mutex_enter(&so->so_lock); 2311 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2312 mutex_exit(&so->so_lock); 2313 2314 /* Flush read side queue */ 2315 strflushrq(SOTOV(so), FLUSHALL); 2316 2317 mutex_enter(&so->so_lock); 2318 so_unlock_read(so); /* Clear SOREADLOCKED */ 2319 break; 2320 } 2321 2322 ASSERT(MUTEX_HELD(&so->so_lock)); 2323 2324 /* 2325 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2326 * was set due to this call and the new state has both of them set: 2327 * Send the AF_UNIX close indication 2328 * For T_COTS send a discon_ind 2329 * 2330 * If cantsend was set due to this call: 2331 * For T_COTSORD send an ordrel_ind 2332 * 2333 * Note that for T_CLTS there is no message sent here. 2334 */ 2335 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2336 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2337 /* 2338 * For SunOS 4.X compatibility we tell the other end 2339 * that we are unable to receive at this point. 2340 */ 2341 if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS) 2342 so_unix_close(so); 2343 2344 if (so->so_serv_type == T_COTS) 2345 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2346 } 2347 if ((state_change & SS_CANTSENDMORE) && 2348 (so->so_serv_type == T_COTS_ORD)) { 2349 /* Send an orderly release */ 2350 ordrel_req.PRIM_type = T_ORDREL_REQ; 2351 2352 mutex_exit(&so->so_lock); 2353 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2354 0, _ALLOC_SLEEP); 2355 /* 2356 * Send down the T_ORDREL_REQ even if there is flow control. 2357 * This prevents shutdown from blocking. 2358 * Note that there is no T_OK_ACK for ordrel_req. 2359 */ 2360 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2361 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2362 mutex_enter(&so->so_lock); 2363 if (error) { 2364 eprintsoline(so, error); 2365 goto done; 2366 } 2367 } 2368 2369 done: 2370 so_unlock_single(so, SOLOCKED); 2371 mutex_exit(&so->so_lock); 2372 return (error); 2373 } 2374 2375 /* 2376 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2377 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2378 * that we have closed. 2379 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2380 * T_UNITDATA_REQ containing the same option. 2381 * 2382 * For SOCK_DGRAM half-connections (somebody connected to this end 2383 * but this end is not connect) we don't know where to send any 2384 * SO_UNIX_CLOSE. 2385 * 2386 * We have to ignore stream head errors just in case there has been 2387 * a shutdown(output). 2388 * Ignore any flow control to try to get the message more quickly to the peer. 2389 * While locally ignoring flow control solves the problem when there 2390 * is only the loopback transport on the stream it would not provide 2391 * the correct AF_UNIX socket semantics when one or more modules have 2392 * been pushed. 2393 */ 2394 void 2395 so_unix_close(struct sonode *so) 2396 { 2397 int error; 2398 struct T_opthdr toh; 2399 mblk_t *mp; 2400 2401 ASSERT(MUTEX_HELD(&so->so_lock)); 2402 2403 ASSERT(so->so_family == AF_UNIX); 2404 2405 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2406 (SS_ISCONNECTED|SS_ISBOUND)) 2407 return; 2408 2409 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2410 so, pr_state(so->so_state, so->so_mode))); 2411 2412 toh.level = SOL_SOCKET; 2413 toh.name = SO_UNIX_CLOSE; 2414 2415 /* zero length + header */ 2416 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2417 toh.status = 0; 2418 2419 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2420 struct T_optdata_req tdr; 2421 2422 tdr.PRIM_type = T_OPTDATA_REQ; 2423 tdr.DATA_flag = 0; 2424 2425 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2426 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2427 2428 /* NOTE: holding so_lock while sleeping */ 2429 mp = soallocproto2(&tdr, sizeof (tdr), 2430 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2431 } else { 2432 struct T_unitdata_req tudr; 2433 void *addr; 2434 socklen_t addrlen; 2435 void *src; 2436 socklen_t srclen; 2437 struct T_opthdr toh2; 2438 t_scalar_t size; 2439 2440 /* Connecteded DGRAM socket */ 2441 2442 /* 2443 * For AF_UNIX the destination address is translated to 2444 * an internal name and the source address is passed as 2445 * an option. 2446 */ 2447 /* 2448 * Length and family checks. 2449 */ 2450 error = so_addr_verify(so, so->so_faddr_sa, 2451 (t_uscalar_t)so->so_faddr_len); 2452 if (error) { 2453 eprintsoline(so, error); 2454 return; 2455 } 2456 if (so->so_state & SS_FADDR_NOXLATE) { 2457 /* 2458 * Already have a transport internal address. Do not 2459 * pass any (transport internal) source address. 2460 */ 2461 addr = so->so_faddr_sa; 2462 addrlen = (t_uscalar_t)so->so_faddr_len; 2463 src = NULL; 2464 srclen = 0; 2465 } else { 2466 /* 2467 * Pass the sockaddr_un source address as an option 2468 * and translate the remote address. 2469 * Holding so_lock thus so_laddr_sa can not change. 2470 */ 2471 src = so->so_laddr_sa; 2472 srclen = (socklen_t)so->so_laddr_len; 2473 dprintso(so, 1, 2474 ("so_ux_close: srclen %d, src %p\n", 2475 srclen, src)); 2476 error = so_ux_addr_xlate(so, 2477 so->so_faddr_sa, 2478 (socklen_t)so->so_faddr_len, 0, 2479 &addr, &addrlen); 2480 if (error) { 2481 eprintsoline(so, error); 2482 return; 2483 } 2484 } 2485 tudr.PRIM_type = T_UNITDATA_REQ; 2486 tudr.DEST_length = addrlen; 2487 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2488 if (srclen == 0) { 2489 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2490 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2491 _TPI_ALIGN_TOPT(addrlen)); 2492 2493 size = tudr.OPT_offset + tudr.OPT_length; 2494 /* NOTE: holding so_lock while sleeping */ 2495 mp = soallocproto2(&tudr, sizeof (tudr), 2496 addr, addrlen, size, _ALLOC_SLEEP); 2497 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2498 soappendmsg(mp, &toh, sizeof (toh)); 2499 } else { 2500 /* 2501 * There is a AF_UNIX sockaddr_un to include as a 2502 * source address option. 2503 */ 2504 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2505 _TPI_ALIGN_TOPT(srclen)); 2506 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2507 _TPI_ALIGN_TOPT(addrlen)); 2508 2509 toh2.level = SOL_SOCKET; 2510 toh2.name = SO_SRCADDR; 2511 toh2.len = (t_uscalar_t)(srclen + 2512 sizeof (struct T_opthdr)); 2513 toh2.status = 0; 2514 2515 size = tudr.OPT_offset + tudr.OPT_length; 2516 2517 /* NOTE: holding so_lock while sleeping */ 2518 mp = soallocproto2(&tudr, sizeof (tudr), 2519 addr, addrlen, size, _ALLOC_SLEEP); 2520 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2521 soappendmsg(mp, &toh, sizeof (toh)); 2522 soappendmsg(mp, &toh2, sizeof (toh2)); 2523 soappendmsg(mp, src, srclen); 2524 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2525 } 2526 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2527 } 2528 mutex_exit(&so->so_lock); 2529 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2530 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2531 mutex_enter(&so->so_lock); 2532 } 2533 2534 /* 2535 * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 2536 */ 2537 int 2538 sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) 2539 { 2540 mblk_t *mp, *nmp; 2541 int error; 2542 2543 dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", so, msg, flags)); 2544 2545 /* 2546 * There is never any oob data with addresses or control since 2547 * the T_EXDATA_IND does not carry any options. 2548 */ 2549 msg->msg_controllen = 0; 2550 msg->msg_namelen = 0; 2551 2552 mutex_enter(&so->so_lock); 2553 ASSERT(so_verify_oobstate(so)); 2554 if ((so->so_options & SO_OOBINLINE) || 2555 (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 2556 dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 2557 mutex_exit(&so->so_lock); 2558 return (EINVAL); 2559 } 2560 if (!(so->so_state & SS_HAVEOOBDATA)) { 2561 dprintso(so, 1, ("sorecvoob: no data yet\n")); 2562 mutex_exit(&so->so_lock); 2563 return (EWOULDBLOCK); 2564 } 2565 ASSERT(so->so_oobmsg != NULL); 2566 mp = so->so_oobmsg; 2567 if (flags & MSG_PEEK) { 2568 /* 2569 * Since recv* can not return ENOBUFS we can not use dupmsg. 2570 * Instead we revert to the consolidation private 2571 * allocb_wait plus bcopy. 2572 */ 2573 mblk_t *mp1; 2574 2575 mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 2576 ASSERT(mp1); 2577 2578 while (mp != NULL) { 2579 ssize_t size; 2580 2581 size = MBLKL(mp); 2582 bcopy(mp->b_rptr, mp1->b_wptr, size); 2583 mp1->b_wptr += size; 2584 ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 2585 mp = mp->b_cont; 2586 } 2587 mp = mp1; 2588 } else { 2589 /* 2590 * Update the state indicating that the data has been consumed. 2591 * Keep SS_OOBPEND set until data is consumed past the mark. 2592 */ 2593 so->so_oobmsg = NULL; 2594 so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 2595 } 2596 dprintso(so, 1, 2597 ("after recvoob(%p): counts %d/%d state %s\n", 2598 so, so->so_oobsigcnt, 2599 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2600 ASSERT(so_verify_oobstate(so)); 2601 mutex_exit(&so->so_lock); 2602 2603 error = 0; 2604 nmp = mp; 2605 while (nmp != NULL && uiop->uio_resid > 0) { 2606 ssize_t n = MBLKL(nmp); 2607 2608 n = MIN(n, uiop->uio_resid); 2609 if (n > 0) 2610 error = uiomove(nmp->b_rptr, n, 2611 UIO_READ, uiop); 2612 if (error) 2613 break; 2614 nmp = nmp->b_cont; 2615 } 2616 freemsg(mp); 2617 return (error); 2618 } 2619 2620 /* 2621 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2622 * In addition, the caller typically verifies that there is some 2623 * potential state to clear by checking 2624 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2625 * before calling this routine. 2626 * Note that such a check can be made without holding so_lock since 2627 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2628 * decrements so_oobsigcnt. 2629 * 2630 * When data is read *after* the point that all pending 2631 * oob data has been consumed the oob indication is cleared. 2632 * 2633 * This logic keeps select/poll returning POLLRDBAND and 2634 * SIOCATMARK returning true until we have read past 2635 * the mark. 2636 */ 2637 static void 2638 sorecv_update_oobstate(struct sonode *so) 2639 { 2640 mutex_enter(&so->so_lock); 2641 ASSERT(so_verify_oobstate(so)); 2642 dprintso(so, 1, 2643 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2644 so->so_oobsigcnt, 2645 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2646 if (so->so_oobsigcnt == 0) { 2647 /* No more pending oob indications */ 2648 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2649 freemsg(so->so_oobmsg); 2650 so->so_oobmsg = NULL; 2651 } 2652 ASSERT(so_verify_oobstate(so)); 2653 mutex_exit(&so->so_lock); 2654 } 2655 2656 /* 2657 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2658 */ 2659 static int 2660 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2661 { 2662 int error = 0; 2663 mblk_t *tmp = NULL; 2664 mblk_t *pmp = NULL; 2665 mblk_t *nmp = so->so_nl7c_rcv_mp; 2666 2667 ASSERT(nmp != NULL); 2668 2669 while (nmp != NULL && uiop->uio_resid > 0) { 2670 ssize_t n; 2671 2672 if (DB_TYPE(nmp) == M_DATA) { 2673 /* 2674 * We have some data, uiomove up to resid bytes. 2675 */ 2676 n = MIN(MBLKL(nmp), uiop->uio_resid); 2677 if (n > 0) 2678 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2679 if (error) 2680 break; 2681 nmp->b_rptr += n; 2682 if (nmp->b_rptr == nmp->b_wptr) { 2683 pmp = nmp; 2684 nmp = nmp->b_cont; 2685 } 2686 } else { 2687 /* 2688 * We only handle data, save for caller to handle. 2689 */ 2690 if (pmp != NULL) { 2691 pmp->b_cont = nmp->b_cont; 2692 } 2693 nmp->b_cont = NULL; 2694 if (*rmp == NULL) { 2695 *rmp = nmp; 2696 } else { 2697 tmp->b_next = nmp; 2698 } 2699 nmp = nmp->b_cont; 2700 tmp = nmp; 2701 } 2702 } 2703 if (pmp != NULL) { 2704 /* Free any mblk_t(s) which we have consumed */ 2705 pmp->b_cont = NULL; 2706 freemsg(so->so_nl7c_rcv_mp); 2707 } 2708 if ((so->so_nl7c_rcv_mp = nmp) == NULL) { 2709 /* Last mblk_t so return the saved rval from kstrgetmsg() */ 2710 rp->r_vals = so->so_nl7c_rcv_rval; 2711 so->so_nl7c_rcv_rval = 0; 2712 } else { 2713 /* More mblk_t(s) to process so no rval to return */ 2714 rp->r_vals = 0; 2715 } 2716 return (error); 2717 } 2718 2719 /* 2720 * Receive the next message on the queue. 2721 * If msg_controllen is non-zero when called the caller is interested in 2722 * any received control info (options). 2723 * If msg_namelen is non-zero when called the caller is interested in 2724 * any received source address. 2725 * The routine returns with msg_control and msg_name pointing to 2726 * kmem_alloc'ed memory which the caller has to free. 2727 */ 2728 int 2729 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2730 { 2731 union T_primitives *tpr; 2732 mblk_t *mp; 2733 uchar_t pri; 2734 int pflag, opflag; 2735 void *control; 2736 t_uscalar_t controllen; 2737 t_uscalar_t namelen; 2738 int so_state = so->so_state; /* Snapshot */ 2739 ssize_t saved_resid; 2740 int error; 2741 rval_t rval; 2742 int flags; 2743 clock_t timout; 2744 int first; 2745 2746 flags = msg->msg_flags; 2747 msg->msg_flags = 0; 2748 2749 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2750 so, msg, flags, 2751 pr_state(so->so_state, so->so_mode), so->so_error)); 2752 2753 /* 2754 * If we are not connected because we have never been connected 2755 * we return ENOTCONN. If we have been connected (but are no longer 2756 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2757 * the EOF. 2758 * 2759 * An alternative would be to post an ENOTCONN error in stream head 2760 * (read+write) and clear it when we're connected. However, that error 2761 * would cause incorrect poll/select behavior! 2762 */ 2763 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2764 (so->so_mode & SM_CONNREQUIRED)) { 2765 return (ENOTCONN); 2766 } 2767 2768 /* 2769 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2770 * after checking that the read queue is empty) and returns zero. 2771 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2772 * is zero. 2773 */ 2774 2775 if (flags & MSG_OOB) { 2776 /* Check that the transport supports OOB */ 2777 if (!(so->so_mode & SM_EXDATA)) 2778 return (EOPNOTSUPP); 2779 return (sorecvoob(so, msg, uiop, flags)); 2780 } 2781 2782 /* 2783 * Set msg_controllen and msg_namelen to zero here to make it 2784 * simpler in the cases that no control or name is returned. 2785 */ 2786 controllen = msg->msg_controllen; 2787 namelen = msg->msg_namelen; 2788 msg->msg_controllen = 0; 2789 msg->msg_namelen = 0; 2790 2791 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2792 namelen, controllen)); 2793 2794 /* 2795 * If an NL7C enabled socket and not waiting for write data. 2796 */ 2797 mutex_enter(&so->so_lock); 2798 if ((so->so_nl7c_flags & (NL7C_ENABLED|NL7C_WAITWRITE)) == 2799 NL7C_ENABLED) { 2800 if (so->so_nl7c_uri) { 2801 /* 2802 * Close uri processing for a previous request. 2803 */ 2804 nl7c_close(so); 2805 } 2806 if (nl7c_process(so, 2807 (so->so_state & (SS_NONBLOCK|SS_NDELAY)), 2808 (int)((tcp_t *)so->so_priv)->tcp_mss)) { 2809 /* 2810 * NL7C has completed processing on the socket, 2811 * clear the enabled bit as no further NL7C 2812 * processing will be needed. 2813 */ 2814 so->so_nl7c_flags = 0; 2815 } 2816 } 2817 2818 /* 2819 * Only one reader is allowed at any given time. This is needed 2820 * for T_EXDATA handling and, in the future, MSG_WAITALL. 2821 * 2822 * This is slightly different that BSD behavior in that it fails with 2823 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 2824 * is single-threaded using sblock(), which is dropped while waiting 2825 * for data to appear. The difference shows up e.g. if one 2826 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 2827 * does use nonblocking io and different threads are reading each 2828 * file descriptor. In BSD there would never be an EWOULDBLOCK error 2829 * in this case as long as the read queue doesn't get empty. 2830 * In this implementation the thread using nonblocking io can 2831 * get an EWOULDBLOCK error due to the blocking thread executing 2832 * e.g. in the uiomove in kstrgetmsg. 2833 * This difference is not believed to be significant. 2834 */ 2835 error = so_lock_read_intr(so, uiop->uio_fmode); /* Set SOREADLOCKED */ 2836 mutex_exit(&so->so_lock); 2837 if (error) 2838 return (error); 2839 2840 /* 2841 * Tell kstrgetmsg to not inspect the stream head errors until all 2842 * queued data has been consumed. 2843 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 2844 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 2845 * 2846 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 2847 * to T_OPTDATA_IND that do not contain any user-visible control msg. 2848 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 2849 */ 2850 pflag = MSG_ANY | MSG_DELAYERROR; 2851 if (flags & MSG_PEEK) { 2852 pflag |= MSG_IPEEK; 2853 flags &= ~MSG_WAITALL; 2854 } 2855 if (so->so_mode & SM_ATOMIC) 2856 pflag |= MSG_DISCARDTAIL; 2857 2858 if (flags & MSG_DONTWAIT) 2859 timout = 0; 2860 else 2861 timout = -1; 2862 opflag = pflag; 2863 first = 1; 2864 2865 /* 2866 * If so saved NL7C rcv mblk_t(s) uiomove them first 2867 * else get'm from the streamhead. 2868 */ 2869 retry: 2870 saved_resid = uiop->uio_resid; 2871 pri = 0; 2872 mp = NULL; 2873 if (so->so_nl7c_rcv_mp != NULL) { 2874 error = nl7c_sorecv(so, &mp, uiop, &rval); 2875 } else { 2876 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 2877 timout, &rval); 2878 } 2879 if (error) { 2880 switch (error) { 2881 case EINTR: 2882 case EWOULDBLOCK: 2883 if (!first) 2884 error = 0; 2885 break; 2886 case ETIME: 2887 /* Returned from kstrgetmsg when timeout expires */ 2888 if (!first) 2889 error = 0; 2890 else 2891 error = EWOULDBLOCK; 2892 break; 2893 default: 2894 eprintsoline(so, error); 2895 break; 2896 } 2897 mutex_enter(&so->so_lock); 2898 so_unlock_read(so); /* Clear SOREADLOCKED */ 2899 mutex_exit(&so->so_lock); 2900 return (error); 2901 } 2902 /* 2903 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 2904 * For non-datagrams MOREDATA is used to set MSG_EOR. 2905 */ 2906 ASSERT(!(rval.r_val1 & MORECTL)); 2907 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 2908 msg->msg_flags |= MSG_TRUNC; 2909 2910 if (mp == NULL) { 2911 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 2912 /* 2913 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 2914 * The draft Posix socket spec states that the mark should 2915 * not be cleared when peeking. We follow the latter. 2916 */ 2917 if ((so->so_state & 2918 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 2919 (uiop->uio_resid != saved_resid) && 2920 !(flags & MSG_PEEK)) { 2921 sorecv_update_oobstate(so); 2922 } 2923 2924 mutex_enter(&so->so_lock); 2925 /* Set MSG_EOR based on MOREDATA */ 2926 if (!(rval.r_val1 & MOREDATA)) { 2927 if (so->so_state & SS_SAVEDEOR) { 2928 msg->msg_flags |= MSG_EOR; 2929 so->so_state &= ~SS_SAVEDEOR; 2930 } 2931 } 2932 /* 2933 * If some data was received (i.e. not EOF) and the 2934 * read/recv* has not been satisfied wait for some more. 2935 */ 2936 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 2937 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 2938 mutex_exit(&so->so_lock); 2939 first = 0; 2940 pflag = opflag | MSG_NOMARK; 2941 goto retry; 2942 } 2943 so_unlock_read(so); /* Clear SOREADLOCKED */ 2944 mutex_exit(&so->so_lock); 2945 return (0); 2946 } 2947 2948 /* strsock_proto has already verified length and alignment */ 2949 tpr = (union T_primitives *)mp->b_rptr; 2950 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 2951 2952 switch (tpr->type) { 2953 case T_DATA_IND: { 2954 if ((so->so_state & 2955 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 2956 (uiop->uio_resid != saved_resid) && 2957 !(flags & MSG_PEEK)) { 2958 sorecv_update_oobstate(so); 2959 } 2960 2961 /* 2962 * Set msg_flags to MSG_EOR based on 2963 * MORE_flag and MOREDATA. 2964 */ 2965 mutex_enter(&so->so_lock); 2966 so->so_state &= ~SS_SAVEDEOR; 2967 if (!(tpr->data_ind.MORE_flag & 1)) { 2968 if (!(rval.r_val1 & MOREDATA)) 2969 msg->msg_flags |= MSG_EOR; 2970 else 2971 so->so_state |= SS_SAVEDEOR; 2972 } 2973 freemsg(mp); 2974 /* 2975 * If some data was received (i.e. not EOF) and the 2976 * read/recv* has not been satisfied wait for some more. 2977 */ 2978 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 2979 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 2980 mutex_exit(&so->so_lock); 2981 first = 0; 2982 pflag = opflag | MSG_NOMARK; 2983 goto retry; 2984 } 2985 so_unlock_read(so); /* Clear SOREADLOCKED */ 2986 mutex_exit(&so->so_lock); 2987 return (0); 2988 } 2989 case T_UNITDATA_IND: { 2990 void *addr; 2991 t_uscalar_t addrlen; 2992 void *abuf; 2993 t_uscalar_t optlen; 2994 void *opt; 2995 2996 if ((so->so_state & 2997 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 2998 (uiop->uio_resid != saved_resid) && 2999 !(flags & MSG_PEEK)) { 3000 sorecv_update_oobstate(so); 3001 } 3002 3003 if (namelen != 0) { 3004 /* Caller wants source address */ 3005 addrlen = tpr->unitdata_ind.SRC_length; 3006 addr = sogetoff(mp, 3007 tpr->unitdata_ind.SRC_offset, 3008 addrlen, 1); 3009 if (addr == NULL) { 3010 freemsg(mp); 3011 error = EPROTO; 3012 eprintsoline(so, error); 3013 goto err; 3014 } 3015 if (so->so_family == AF_UNIX) { 3016 /* 3017 * Can not use the transport level address. 3018 * If there is a SO_SRCADDR option carrying 3019 * the socket level address it will be 3020 * extracted below. 3021 */ 3022 addr = NULL; 3023 addrlen = 0; 3024 } 3025 } 3026 optlen = tpr->unitdata_ind.OPT_length; 3027 if (optlen != 0) { 3028 t_uscalar_t ncontrollen; 3029 3030 /* 3031 * Extract any source address option. 3032 * Determine how large cmsg buffer is needed. 3033 */ 3034 opt = sogetoff(mp, 3035 tpr->unitdata_ind.OPT_offset, 3036 optlen, __TPI_ALIGN_SIZE); 3037 3038 if (opt == NULL) { 3039 freemsg(mp); 3040 error = EPROTO; 3041 eprintsoline(so, error); 3042 goto err; 3043 } 3044 if (so->so_family == AF_UNIX) 3045 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3046 ncontrollen = so_cmsglen(mp, opt, optlen, 3047 !(flags & MSG_XPG4_2)); 3048 if (controllen != 0) 3049 controllen = ncontrollen; 3050 else if (ncontrollen != 0) 3051 msg->msg_flags |= MSG_CTRUNC; 3052 } else { 3053 controllen = 0; 3054 } 3055 3056 if (namelen != 0) { 3057 /* 3058 * Return address to caller. 3059 * Caller handles truncation if length 3060 * exceeds msg_namelen. 3061 * NOTE: AF_UNIX NUL termination is ensured by 3062 * the sender's copyin_name(). 3063 */ 3064 abuf = kmem_alloc(addrlen, KM_SLEEP); 3065 3066 bcopy(addr, abuf, addrlen); 3067 msg->msg_name = abuf; 3068 msg->msg_namelen = addrlen; 3069 } 3070 3071 if (controllen != 0) { 3072 /* 3073 * Return control msg to caller. 3074 * Caller handles truncation if length 3075 * exceeds msg_controllen. 3076 */ 3077 control = kmem_alloc(controllen, KM_SLEEP); 3078 3079 error = so_opt2cmsg(mp, opt, optlen, 3080 !(flags & MSG_XPG4_2), 3081 control, controllen); 3082 if (error) { 3083 freemsg(mp); 3084 if (msg->msg_namelen != 0) 3085 kmem_free(msg->msg_name, 3086 msg->msg_namelen); 3087 kmem_free(control, controllen); 3088 eprintsoline(so, error); 3089 goto err; 3090 } 3091 msg->msg_control = control; 3092 msg->msg_controllen = controllen; 3093 } 3094 3095 freemsg(mp); 3096 mutex_enter(&so->so_lock); 3097 so_unlock_read(so); /* Clear SOREADLOCKED */ 3098 mutex_exit(&so->so_lock); 3099 return (0); 3100 } 3101 case T_OPTDATA_IND: { 3102 struct T_optdata_req *tdr; 3103 void *opt; 3104 t_uscalar_t optlen; 3105 3106 if ((so->so_state & 3107 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3108 (uiop->uio_resid != saved_resid) && 3109 !(flags & MSG_PEEK)) { 3110 sorecv_update_oobstate(so); 3111 } 3112 3113 tdr = (struct T_optdata_req *)mp->b_rptr; 3114 optlen = tdr->OPT_length; 3115 if (optlen != 0) { 3116 t_uscalar_t ncontrollen; 3117 /* 3118 * Determine how large cmsg buffer is needed. 3119 */ 3120 opt = sogetoff(mp, 3121 tpr->optdata_ind.OPT_offset, 3122 optlen, __TPI_ALIGN_SIZE); 3123 3124 if (opt == NULL) { 3125 freemsg(mp); 3126 error = EPROTO; 3127 eprintsoline(so, error); 3128 goto err; 3129 } 3130 3131 ncontrollen = so_cmsglen(mp, opt, optlen, 3132 !(flags & MSG_XPG4_2)); 3133 if (controllen != 0) 3134 controllen = ncontrollen; 3135 else if (ncontrollen != 0) 3136 msg->msg_flags |= MSG_CTRUNC; 3137 } else { 3138 controllen = 0; 3139 } 3140 3141 if (controllen != 0) { 3142 /* 3143 * Return control msg to caller. 3144 * Caller handles truncation if length 3145 * exceeds msg_controllen. 3146 */ 3147 control = kmem_alloc(controllen, KM_SLEEP); 3148 3149 error = so_opt2cmsg(mp, opt, optlen, 3150 !(flags & MSG_XPG4_2), 3151 control, controllen); 3152 if (error) { 3153 freemsg(mp); 3154 kmem_free(control, controllen); 3155 eprintsoline(so, error); 3156 goto err; 3157 } 3158 msg->msg_control = control; 3159 msg->msg_controllen = controllen; 3160 } 3161 3162 /* 3163 * Set msg_flags to MSG_EOR based on 3164 * DATA_flag and MOREDATA. 3165 */ 3166 mutex_enter(&so->so_lock); 3167 so->so_state &= ~SS_SAVEDEOR; 3168 if (!(tpr->data_ind.MORE_flag & 1)) { 3169 if (!(rval.r_val1 & MOREDATA)) 3170 msg->msg_flags |= MSG_EOR; 3171 else 3172 so->so_state |= SS_SAVEDEOR; 3173 } 3174 freemsg(mp); 3175 /* 3176 * If some data was received (i.e. not EOF) and the 3177 * read/recv* has not been satisfied wait for some more. 3178 * Not possible to wait if control info was received. 3179 */ 3180 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3181 controllen == 0 && 3182 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3183 mutex_exit(&so->so_lock); 3184 first = 0; 3185 pflag = opflag | MSG_NOMARK; 3186 goto retry; 3187 } 3188 so_unlock_read(so); /* Clear SOREADLOCKED */ 3189 mutex_exit(&so->so_lock); 3190 return (0); 3191 } 3192 case T_EXDATA_IND: { 3193 dprintso(so, 1, 3194 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3195 "state %s\n", 3196 so->so_oobsigcnt, so->so_oobcnt, 3197 saved_resid - uiop->uio_resid, 3198 pr_state(so->so_state, so->so_mode))); 3199 /* 3200 * kstrgetmsg handles MSGMARK so there is nothing to 3201 * inspect in the T_EXDATA_IND. 3202 * strsock_proto makes the stream head queue the T_EXDATA_IND 3203 * as a separate message with no M_DATA component. Furthermore, 3204 * the stream head does not consolidate M_DATA messages onto 3205 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3206 * remains a message by itself. This is needed since MSGMARK 3207 * marks both the whole message as well as the last byte 3208 * of the message. 3209 */ 3210 freemsg(mp); 3211 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3212 if (flags & MSG_PEEK) { 3213 /* 3214 * Even though we are peeking we consume the 3215 * T_EXDATA_IND thereby moving the mark information 3216 * to SS_RCVATMARK. Then the oob code below will 3217 * retry the peeking kstrgetmsg. 3218 * Note that the stream head read queue is 3219 * never flushed without holding SOREADLOCKED 3220 * thus the T_EXDATA_IND can not disappear 3221 * underneath us. 3222 */ 3223 dprintso(so, 1, 3224 ("sotpi_recvmsg: consume EXDATA_IND " 3225 "counts %d/%d state %s\n", 3226 so->so_oobsigcnt, 3227 so->so_oobcnt, 3228 pr_state(so->so_state, so->so_mode))); 3229 3230 pflag = MSG_ANY | MSG_DELAYERROR; 3231 if (so->so_mode & SM_ATOMIC) 3232 pflag |= MSG_DISCARDTAIL; 3233 3234 pri = 0; 3235 mp = NULL; 3236 3237 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3238 &pri, &pflag, (clock_t)-1, &rval); 3239 ASSERT(uiop->uio_resid == saved_resid); 3240 3241 if (error) { 3242 #ifdef SOCK_DEBUG 3243 if (error != EWOULDBLOCK && error != EINTR) { 3244 eprintsoline(so, error); 3245 } 3246 #endif /* SOCK_DEBUG */ 3247 mutex_enter(&so->so_lock); 3248 so_unlock_read(so); /* Clear SOREADLOCKED */ 3249 mutex_exit(&so->so_lock); 3250 return (error); 3251 } 3252 ASSERT(mp); 3253 tpr = (union T_primitives *)mp->b_rptr; 3254 ASSERT(tpr->type == T_EXDATA_IND); 3255 freemsg(mp); 3256 } /* end "if (flags & MSG_PEEK)" */ 3257 3258 /* 3259 * Decrement the number of queued and pending oob. 3260 * 3261 * SS_RCVATMARK is cleared when we read past a mark. 3262 * SS_HAVEOOBDATA is cleared when we've read past the 3263 * last mark. 3264 * SS_OOBPEND is cleared if we've read past the last 3265 * mark and no (new) SIGURG has been posted. 3266 */ 3267 mutex_enter(&so->so_lock); 3268 ASSERT(so_verify_oobstate(so)); 3269 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 3270 ASSERT(so->so_oobsigcnt > 0); 3271 so->so_oobsigcnt--; 3272 ASSERT(so->so_oobcnt > 0); 3273 so->so_oobcnt--; 3274 /* 3275 * Since the T_EXDATA_IND has been removed from the stream 3276 * head, but we have not read data past the mark, 3277 * sockfs needs to track that the socket is still at the mark. 3278 * 3279 * Since no data was received call kstrgetmsg again to wait 3280 * for data. 3281 */ 3282 so->so_state |= SS_RCVATMARK; 3283 mutex_exit(&so->so_lock); 3284 dprintso(so, 1, 3285 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3286 so->so_oobsigcnt, so->so_oobcnt, 3287 pr_state(so->so_state, so->so_mode))); 3288 pflag = opflag; 3289 goto retry; 3290 } 3291 default: 3292 ASSERT(0); 3293 freemsg(mp); 3294 error = EPROTO; 3295 eprintsoline(so, error); 3296 goto err; 3297 } 3298 /* NOTREACHED */ 3299 err: 3300 mutex_enter(&so->so_lock); 3301 so_unlock_read(so); /* Clear SOREADLOCKED */ 3302 mutex_exit(&so->so_lock); 3303 return (error); 3304 } 3305 3306 /* 3307 * Sending data with options on a datagram socket. 3308 * Assumes caller has verified that SS_ISBOUND etc. are set. 3309 */ 3310 static int 3311 sosend_dgramcmsg(struct sonode *so, 3312 struct sockaddr *name, 3313 t_uscalar_t namelen, 3314 struct uio *uiop, 3315 void *control, 3316 t_uscalar_t controllen, 3317 int flags) 3318 { 3319 struct T_unitdata_req tudr; 3320 mblk_t *mp; 3321 int error; 3322 void *addr; 3323 socklen_t addrlen; 3324 void *src; 3325 socklen_t srclen; 3326 ssize_t len; 3327 int size; 3328 struct T_opthdr toh; 3329 struct fdbuf *fdbuf; 3330 t_uscalar_t optlen; 3331 void *fds; 3332 int fdlen; 3333 3334 ASSERT(name && namelen); 3335 ASSERT(control && controllen); 3336 3337 len = uiop->uio_resid; 3338 if (len > (ssize_t)so->so_tidu_size) { 3339 return (EMSGSIZE); 3340 } 3341 3342 /* 3343 * For AF_UNIX the destination address is translated to an internal 3344 * name and the source address is passed as an option. 3345 * Also, file descriptors are passed as file pointers in an 3346 * option. 3347 */ 3348 3349 /* 3350 * Length and family checks. 3351 */ 3352 error = so_addr_verify(so, name, namelen); 3353 if (error) { 3354 eprintsoline(so, error); 3355 return (error); 3356 } 3357 if (so->so_family == AF_UNIX) { 3358 if (so->so_state & SS_FADDR_NOXLATE) { 3359 /* 3360 * Already have a transport internal address. Do not 3361 * pass any (transport internal) source address. 3362 */ 3363 addr = name; 3364 addrlen = namelen; 3365 src = NULL; 3366 srclen = 0; 3367 } else { 3368 /* 3369 * Pass the sockaddr_un source address as an option 3370 * and translate the remote address. 3371 * 3372 * Note that this code does not prevent so_laddr_sa 3373 * from changing while it is being used. Thus 3374 * if an unbind+bind occurs concurrently with this 3375 * send the peer might see a partially new and a 3376 * partially old "from" address. 3377 */ 3378 src = so->so_laddr_sa; 3379 srclen = (t_uscalar_t)so->so_laddr_len; 3380 dprintso(so, 1, 3381 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3382 srclen, src)); 3383 error = so_ux_addr_xlate(so, name, namelen, 3384 (flags & MSG_XPG4_2), 3385 &addr, &addrlen); 3386 if (error) { 3387 eprintsoline(so, error); 3388 return (error); 3389 } 3390 } 3391 } else { 3392 addr = name; 3393 addrlen = namelen; 3394 src = NULL; 3395 srclen = 0; 3396 } 3397 optlen = so_optlen(control, controllen, 3398 !(flags & MSG_XPG4_2)); 3399 tudr.PRIM_type = T_UNITDATA_REQ; 3400 tudr.DEST_length = addrlen; 3401 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3402 if (srclen != 0) 3403 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3404 _TPI_ALIGN_TOPT(srclen)); 3405 else 3406 tudr.OPT_length = optlen; 3407 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3408 _TPI_ALIGN_TOPT(addrlen)); 3409 3410 size = tudr.OPT_offset + tudr.OPT_length; 3411 3412 /* 3413 * File descriptors only when SM_FDPASSING set. 3414 */ 3415 error = so_getfdopt(control, controllen, 3416 !(flags & MSG_XPG4_2), &fds, &fdlen); 3417 if (error) 3418 return (error); 3419 if (fdlen != -1) { 3420 if (!(so->so_mode & SM_FDPASSING)) 3421 return (EOPNOTSUPP); 3422 3423 error = fdbuf_create(fds, fdlen, &fdbuf); 3424 if (error) 3425 return (error); 3426 mp = fdbuf_allocmsg(size, fdbuf); 3427 if (mp == NULL) 3428 fdbuf_free(fdbuf); 3429 } else { 3430 mp = soallocproto(size, _ALLOC_INTR); 3431 } 3432 if (mp == NULL) { 3433 /* 3434 * Caught a signal waiting for memory. 3435 * Let send* return EINTR. 3436 */ 3437 return (EINTR); 3438 } 3439 soappendmsg(mp, &tudr, sizeof (tudr)); 3440 soappendmsg(mp, addr, addrlen); 3441 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3442 3443 if (fdlen != -1) { 3444 ASSERT(fdbuf != NULL); 3445 toh.level = SOL_SOCKET; 3446 toh.name = SO_FILEP; 3447 toh.len = fdbuf->fd_size + 3448 (t_uscalar_t)sizeof (struct T_opthdr); 3449 toh.status = 0; 3450 soappendmsg(mp, &toh, sizeof (toh)); 3451 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3452 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3453 } 3454 if (srclen != 0) { 3455 /* 3456 * There is a AF_UNIX sockaddr_un to include as a source 3457 * address option. 3458 */ 3459 toh.level = SOL_SOCKET; 3460 toh.name = SO_SRCADDR; 3461 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3462 toh.status = 0; 3463 soappendmsg(mp, &toh, sizeof (toh)); 3464 soappendmsg(mp, src, srclen); 3465 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3466 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3467 } 3468 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3469 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3470 /* At most 3 bytes left in the message */ 3471 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3472 ASSERT(MBLKL(mp) <= (ssize_t)size); 3473 3474 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3475 #ifdef C2_AUDIT 3476 if (audit_active) 3477 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3478 #endif /* C2_AUDIT */ 3479 3480 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3481 #ifdef SOCK_DEBUG 3482 if (error) { 3483 eprintsoline(so, error); 3484 } 3485 #endif /* SOCK_DEBUG */ 3486 return (error); 3487 } 3488 3489 /* 3490 * Sending data with options on a connected stream socket. 3491 * Assumes caller has verified that SS_ISCONNECTED is set. 3492 */ 3493 static int 3494 sosend_svccmsg(struct sonode *so, 3495 struct uio *uiop, 3496 int more, 3497 void *control, 3498 t_uscalar_t controllen, 3499 int flags) 3500 { 3501 struct T_optdata_req tdr; 3502 mblk_t *mp; 3503 int error; 3504 ssize_t iosize; 3505 int first = 1; 3506 int size; 3507 struct fdbuf *fdbuf; 3508 t_uscalar_t optlen; 3509 void *fds; 3510 int fdlen; 3511 struct T_opthdr toh; 3512 3513 dprintso(so, 1, 3514 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3515 3516 /* 3517 * Has to be bound and connected. However, since no locks are 3518 * held the state could have changed after sotpi_sendmsg checked it 3519 * thus it is not possible to ASSERT on the state. 3520 */ 3521 3522 /* Options on connection-oriented only when SM_OPTDATA set. */ 3523 if (!(so->so_mode & SM_OPTDATA)) 3524 return (EOPNOTSUPP); 3525 3526 do { 3527 /* 3528 * Set the MORE flag if uio_resid does not fit in this 3529 * message or if the caller passed in "more". 3530 * Error for transports with zero tidu_size. 3531 */ 3532 tdr.PRIM_type = T_OPTDATA_REQ; 3533 iosize = so->so_tidu_size; 3534 if (iosize <= 0) 3535 return (EMSGSIZE); 3536 if (uiop->uio_resid > iosize) { 3537 tdr.DATA_flag = 1; 3538 } else { 3539 if (more) 3540 tdr.DATA_flag = 1; 3541 else 3542 tdr.DATA_flag = 0; 3543 iosize = uiop->uio_resid; 3544 } 3545 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3546 tdr.DATA_flag, iosize)); 3547 3548 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3549 tdr.OPT_length = optlen; 3550 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3551 3552 size = (int)sizeof (tdr) + optlen; 3553 /* 3554 * File descriptors only when SM_FDPASSING set. 3555 */ 3556 error = so_getfdopt(control, controllen, 3557 !(flags & MSG_XPG4_2), &fds, &fdlen); 3558 if (error) 3559 return (error); 3560 if (fdlen != -1) { 3561 if (!(so->so_mode & SM_FDPASSING)) 3562 return (EOPNOTSUPP); 3563 3564 error = fdbuf_create(fds, fdlen, &fdbuf); 3565 if (error) 3566 return (error); 3567 mp = fdbuf_allocmsg(size, fdbuf); 3568 if (mp == NULL) 3569 fdbuf_free(fdbuf); 3570 } else { 3571 mp = soallocproto(size, _ALLOC_INTR); 3572 } 3573 3574 if (mp == NULL) { 3575 /* 3576 * Caught a signal waiting for memory. 3577 * Let send* return EINTR. 3578 */ 3579 if (first) 3580 return (EINTR); 3581 else 3582 return (0); 3583 } 3584 soappendmsg(mp, &tdr, sizeof (tdr)); 3585 3586 if (fdlen != -1) { 3587 ASSERT(fdbuf != NULL); 3588 toh.level = SOL_SOCKET; 3589 toh.name = SO_FILEP; 3590 toh.len = fdbuf->fd_size + 3591 (t_uscalar_t)sizeof (struct T_opthdr); 3592 toh.status = 0; 3593 soappendmsg(mp, &toh, sizeof (toh)); 3594 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3595 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3596 } 3597 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3598 /* At most 3 bytes left in the message */ 3599 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3600 ASSERT(MBLKL(mp) <= (ssize_t)size); 3601 3602 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3603 3604 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3605 0, MSG_BAND, 0); 3606 if (error) { 3607 if (!first && error == EWOULDBLOCK) 3608 return (0); 3609 eprintsoline(so, error); 3610 return (error); 3611 } 3612 control = NULL; 3613 first = 0; 3614 if (uiop->uio_resid > 0) { 3615 /* 3616 * Recheck for fatal errors. Fail write even though 3617 * some data have been written. This is consistent 3618 * with strwrite semantics and BSD sockets semantics. 3619 */ 3620 if (so->so_state & SS_CANTSENDMORE) { 3621 tsignal(curthread, SIGPIPE); 3622 eprintsoline(so, error); 3623 return (EPIPE); 3624 } 3625 if (so->so_error != 0) { 3626 mutex_enter(&so->so_lock); 3627 error = sogeterr(so); 3628 mutex_exit(&so->so_lock); 3629 if (error != 0) { 3630 eprintsoline(so, error); 3631 return (error); 3632 } 3633 } 3634 } 3635 } while (uiop->uio_resid > 0); 3636 return (0); 3637 } 3638 3639 /* 3640 * Sending data on a datagram socket. 3641 * Assumes caller has verified that SS_ISBOUND etc. are set. 3642 * 3643 * For AF_UNIX the destination address is translated to an internal 3644 * name and the source address is passed as an option. 3645 */ 3646 int 3647 sosend_dgram(struct sonode *so, 3648 struct sockaddr *name, 3649 socklen_t namelen, 3650 struct uio *uiop, 3651 int flags) 3652 { 3653 struct T_unitdata_req tudr; 3654 mblk_t *mp; 3655 int error; 3656 void *addr; 3657 socklen_t addrlen; 3658 void *src; 3659 socklen_t srclen; 3660 ssize_t len; 3661 3662 ASSERT(name && namelen); 3663 3664 len = uiop->uio_resid; 3665 if (len > so->so_tidu_size) { 3666 error = EMSGSIZE; 3667 goto done; 3668 } 3669 3670 /* 3671 * Length and family checks. 3672 */ 3673 error = so_addr_verify(so, name, namelen); 3674 if (error) { 3675 eprintsoline(so, error); 3676 goto done; 3677 } 3678 if (so->so_family == AF_UNIX) { 3679 if (so->so_state & SS_FADDR_NOXLATE) { 3680 /* 3681 * Already have a transport internal address. Do not 3682 * pass any (transport internal) source address. 3683 */ 3684 addr = name; 3685 addrlen = namelen; 3686 src = NULL; 3687 srclen = 0; 3688 } else { 3689 /* 3690 * Pass the sockaddr_un source address as an option 3691 * and translate the remote address. 3692 * 3693 * Note that this code does not prevent so_laddr_sa 3694 * from changing while it is being used. Thus 3695 * if an unbind+bind occurs concurrently with this 3696 * send the peer might see a partially new and a 3697 * partially old "from" address. 3698 */ 3699 src = so->so_laddr_sa; 3700 srclen = (socklen_t)so->so_laddr_len; 3701 dprintso(so, 1, 3702 ("sosend_dgram UNIX: srclen %d, src %p\n", 3703 srclen, src)); 3704 error = so_ux_addr_xlate(so, name, namelen, 3705 (flags & MSG_XPG4_2), 3706 &addr, &addrlen); 3707 if (error) { 3708 eprintsoline(so, error); 3709 goto done; 3710 } 3711 } 3712 } else { 3713 addr = name; 3714 addrlen = namelen; 3715 src = NULL; 3716 srclen = 0; 3717 } 3718 tudr.PRIM_type = T_UNITDATA_REQ; 3719 tudr.DEST_length = addrlen; 3720 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3721 if (srclen == 0) { 3722 tudr.OPT_length = 0; 3723 tudr.OPT_offset = 0; 3724 3725 mp = soallocproto2(&tudr, sizeof (tudr), 3726 addr, addrlen, 0, _ALLOC_INTR); 3727 if (mp == NULL) { 3728 /* 3729 * Caught a signal waiting for memory. 3730 * Let send* return EINTR. 3731 */ 3732 error = EINTR; 3733 goto done; 3734 } 3735 } else { 3736 /* 3737 * There is a AF_UNIX sockaddr_un to include as a source 3738 * address option. 3739 */ 3740 struct T_opthdr toh; 3741 ssize_t size; 3742 3743 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3744 _TPI_ALIGN_TOPT(srclen)); 3745 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3746 _TPI_ALIGN_TOPT(addrlen)); 3747 3748 toh.level = SOL_SOCKET; 3749 toh.name = SO_SRCADDR; 3750 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3751 toh.status = 0; 3752 3753 size = tudr.OPT_offset + tudr.OPT_length; 3754 mp = soallocproto2(&tudr, sizeof (tudr), 3755 addr, addrlen, size, _ALLOC_INTR); 3756 if (mp == NULL) { 3757 /* 3758 * Caught a signal waiting for memory. 3759 * Let send* return EINTR. 3760 */ 3761 error = EINTR; 3762 goto done; 3763 } 3764 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3765 soappendmsg(mp, &toh, sizeof (toh)); 3766 soappendmsg(mp, src, srclen); 3767 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3768 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3769 } 3770 3771 #ifdef C2_AUDIT 3772 if (audit_active) 3773 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3774 #endif /* C2_AUDIT */ 3775 3776 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3777 done: 3778 #ifdef SOCK_DEBUG 3779 if (error) { 3780 eprintsoline(so, error); 3781 } 3782 #endif /* SOCK_DEBUG */ 3783 return (error); 3784 } 3785 3786 /* 3787 * Sending data on a connected stream socket. 3788 * Assumes caller has verified that SS_ISCONNECTED is set. 3789 */ 3790 int 3791 sosend_svc(struct sonode *so, 3792 struct uio *uiop, 3793 t_scalar_t prim, 3794 int more, 3795 int sflag) 3796 { 3797 struct T_data_req tdr; 3798 mblk_t *mp; 3799 int error; 3800 ssize_t iosize; 3801 int first = 1; 3802 3803 dprintso(so, 1, 3804 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 3805 so, uiop->uio_resid, prim, sflag)); 3806 3807 /* 3808 * Has to be bound and connected. However, since no locks are 3809 * held the state could have changed after sotpi_sendmsg checked it 3810 * thus it is not possible to ASSERT on the state. 3811 */ 3812 3813 do { 3814 /* 3815 * Set the MORE flag if uio_resid does not fit in this 3816 * message or if the caller passed in "more". 3817 * Error for transports with zero tidu_size. 3818 */ 3819 tdr.PRIM_type = prim; 3820 iosize = so->so_tidu_size; 3821 if (iosize <= 0) 3822 return (EMSGSIZE); 3823 if (uiop->uio_resid > iosize) { 3824 tdr.MORE_flag = 1; 3825 } else { 3826 if (more) 3827 tdr.MORE_flag = 1; 3828 else 3829 tdr.MORE_flag = 0; 3830 iosize = uiop->uio_resid; 3831 } 3832 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 3833 prim, tdr.MORE_flag, iosize)); 3834 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 3835 if (mp == NULL) { 3836 /* 3837 * Caught a signal waiting for memory. 3838 * Let send* return EINTR. 3839 */ 3840 if (first) 3841 return (EINTR); 3842 else 3843 return (0); 3844 } 3845 3846 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3847 0, sflag | MSG_BAND, 0); 3848 if (error) { 3849 if (!first && error == EWOULDBLOCK) 3850 return (0); 3851 eprintsoline(so, error); 3852 return (error); 3853 } 3854 first = 0; 3855 if (uiop->uio_resid > 0) { 3856 /* 3857 * Recheck for fatal errors. Fail write even though 3858 * some data have been written. This is consistent 3859 * with strwrite semantics and BSD sockets semantics. 3860 */ 3861 if (so->so_state & SS_CANTSENDMORE) { 3862 tsignal(curthread, SIGPIPE); 3863 eprintsoline(so, error); 3864 return (EPIPE); 3865 } 3866 if (so->so_error != 0) { 3867 mutex_enter(&so->so_lock); 3868 error = sogeterr(so); 3869 mutex_exit(&so->so_lock); 3870 if (error != 0) { 3871 eprintsoline(so, error); 3872 return (error); 3873 } 3874 } 3875 } 3876 } while (uiop->uio_resid > 0); 3877 return (0); 3878 } 3879 3880 /* 3881 * Check the state for errors and call the appropriate send function. 3882 * 3883 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 3884 * this function issues a setsockopt to toggle SO_DONTROUTE before and 3885 * after sending the message. 3886 */ 3887 static int 3888 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 3889 { 3890 int so_state; 3891 int so_mode; 3892 int error; 3893 struct sockaddr *name; 3894 t_uscalar_t namelen; 3895 int dontroute; 3896 int flags; 3897 3898 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 3899 so, msg, msg->msg_flags, 3900 pr_state(so->so_state, so->so_mode), so->so_error)); 3901 3902 mutex_enter(&so->so_lock); 3903 so_state = so->so_state; 3904 3905 if (so_state & SS_CANTSENDMORE) { 3906 mutex_exit(&so->so_lock); 3907 tsignal(curthread, SIGPIPE); 3908 return (EPIPE); 3909 } 3910 3911 if (so->so_error != 0) { 3912 error = sogeterr(so); 3913 if (error != 0) { 3914 mutex_exit(&so->so_lock); 3915 return (error); 3916 } 3917 } 3918 3919 name = (struct sockaddr *)msg->msg_name; 3920 namelen = msg->msg_namelen; 3921 3922 so_mode = so->so_mode; 3923 3924 if (name == NULL) { 3925 if (!(so_state & SS_ISCONNECTED)) { 3926 mutex_exit(&so->so_lock); 3927 if (so_mode & SM_CONNREQUIRED) 3928 return (ENOTCONN); 3929 else 3930 return (EDESTADDRREQ); 3931 } 3932 if (so_mode & SM_CONNREQUIRED) { 3933 name = NULL; 3934 namelen = 0; 3935 } else { 3936 /* 3937 * Note that this code does not prevent so_faddr_sa 3938 * from changing while it is being used. Thus 3939 * if an "unconnect"+connect occurs concurrently with 3940 * this send the datagram might be delivered to a 3941 * garbaled address. 3942 */ 3943 ASSERT(so->so_faddr_sa); 3944 name = so->so_faddr_sa; 3945 namelen = (t_uscalar_t)so->so_faddr_len; 3946 } 3947 } else { 3948 if (!(so_state & SS_ISCONNECTED) && 3949 (so_mode & SM_CONNREQUIRED)) { 3950 /* Required but not connected */ 3951 mutex_exit(&so->so_lock); 3952 return (ENOTCONN); 3953 } 3954 /* 3955 * Ignore the address on connection-oriented sockets. 3956 * Just like BSD this code does not generate an error for 3957 * TCP (a CONNREQUIRED socket) when sending to an address 3958 * passed in with sendto/sendmsg. Instead the data is 3959 * delivered on the connection as if no address had been 3960 * supplied. 3961 */ 3962 if ((so_state & SS_ISCONNECTED) && 3963 !(so_mode & SM_CONNREQUIRED)) { 3964 mutex_exit(&so->so_lock); 3965 return (EISCONN); 3966 } 3967 if (!(so_state & SS_ISBOUND)) { 3968 so_lock_single(so); /* Set SOLOCKED */ 3969 error = sotpi_bind(so, NULL, 0, 3970 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 3971 so_unlock_single(so, SOLOCKED); 3972 if (error) { 3973 mutex_exit(&so->so_lock); 3974 eprintsoline(so, error); 3975 return (error); 3976 } 3977 } 3978 /* 3979 * Handle delayed datagram errors. These are only queued 3980 * when the application sets SO_DGRAM_ERRIND. 3981 * Return the error if we are sending to the address 3982 * that was returned in the last T_UDERROR_IND. 3983 * If sending to some other address discard the delayed 3984 * error indication. 3985 */ 3986 if (so->so_delayed_error) { 3987 struct T_uderror_ind *tudi; 3988 void *addr; 3989 t_uscalar_t addrlen; 3990 boolean_t match = B_FALSE; 3991 3992 ASSERT(so->so_eaddr_mp); 3993 error = so->so_delayed_error; 3994 so->so_delayed_error = 0; 3995 tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; 3996 addrlen = tudi->DEST_length; 3997 addr = sogetoff(so->so_eaddr_mp, 3998 tudi->DEST_offset, 3999 addrlen, 1); 4000 ASSERT(addr); /* Checked by strsock_proto */ 4001 switch (so->so_family) { 4002 case AF_INET: { 4003 /* Compare just IP address and port */ 4004 sin_t *sin1 = (sin_t *)name; 4005 sin_t *sin2 = (sin_t *)addr; 4006 4007 if (addrlen == sizeof (sin_t) && 4008 namelen == addrlen && 4009 sin1->sin_port == sin2->sin_port && 4010 sin1->sin_addr.s_addr == 4011 sin2->sin_addr.s_addr) 4012 match = B_TRUE; 4013 break; 4014 } 4015 case AF_INET6: { 4016 /* Compare just IP address and port. Not flow */ 4017 sin6_t *sin1 = (sin6_t *)name; 4018 sin6_t *sin2 = (sin6_t *)addr; 4019 4020 if (addrlen == sizeof (sin6_t) && 4021 namelen == addrlen && 4022 sin1->sin6_port == sin2->sin6_port && 4023 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4024 &sin2->sin6_addr)) 4025 match = B_TRUE; 4026 break; 4027 } 4028 case AF_UNIX: 4029 default: 4030 if (namelen == addrlen && 4031 bcmp(name, addr, namelen) == 0) 4032 match = B_TRUE; 4033 } 4034 if (match) { 4035 freemsg(so->so_eaddr_mp); 4036 so->so_eaddr_mp = NULL; 4037 mutex_exit(&so->so_lock); 4038 #ifdef DEBUG 4039 dprintso(so, 0, 4040 ("sockfs delayed error %d for %s\n", 4041 error, 4042 pr_addr(so->so_family, name, namelen))); 4043 #endif /* DEBUG */ 4044 return (error); 4045 } 4046 freemsg(so->so_eaddr_mp); 4047 so->so_eaddr_mp = NULL; 4048 } 4049 } 4050 mutex_exit(&so->so_lock); 4051 4052 flags = msg->msg_flags; 4053 dontroute = 0; 4054 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4055 uint32_t val; 4056 4057 val = 1; 4058 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4059 &val, (t_uscalar_t)sizeof (val)); 4060 if (error) 4061 return (error); 4062 dontroute = 1; 4063 } 4064 4065 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4066 error = EOPNOTSUPP; 4067 goto done; 4068 } 4069 if (msg->msg_controllen != 0) { 4070 if (!(so_mode & SM_CONNREQUIRED)) { 4071 error = sosend_dgramcmsg(so, name, namelen, uiop, 4072 msg->msg_control, msg->msg_controllen, 4073 flags); 4074 } else { 4075 if (flags & MSG_OOB) { 4076 /* Can't generate T_EXDATA_REQ with options */ 4077 error = EOPNOTSUPP; 4078 goto done; 4079 } 4080 error = sosend_svccmsg(so, uiop, 4081 !(flags & MSG_EOR), 4082 msg->msg_control, msg->msg_controllen, 4083 flags); 4084 } 4085 goto done; 4086 } 4087 4088 if (!(so_mode & SM_CONNREQUIRED)) { 4089 /* 4090 * If there is no SO_DONTROUTE to turn off return immediately 4091 * from sosend_dgram. This can allow tail-call optimizations. 4092 */ 4093 if (!dontroute) { 4094 return (sosend_dgram(so, name, namelen, uiop, flags)); 4095 } 4096 error = sosend_dgram(so, name, namelen, uiop, flags); 4097 } else { 4098 t_scalar_t prim; 4099 int sflag; 4100 4101 /* Ignore msg_name in the connected state */ 4102 if (flags & MSG_OOB) { 4103 prim = T_EXDATA_REQ; 4104 /* 4105 * Send down T_EXDATA_REQ even if there is flow 4106 * control for data. 4107 */ 4108 sflag = MSG_IGNFLOW; 4109 } else { 4110 if (so_mode & SM_BYTESTREAM) { 4111 /* Byte stream transport - use write */ 4112 4113 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4114 /* 4115 * If there is no SO_DONTROUTE to turn off 4116 * return immediately from strwrite. This can 4117 * allow tail-call optimizations. 4118 */ 4119 if (!dontroute) 4120 return (strwrite(SOTOV(so), uiop, 4121 CRED())); 4122 error = strwrite(SOTOV(so), uiop, CRED()); 4123 goto done; 4124 } 4125 prim = T_DATA_REQ; 4126 sflag = 0; 4127 } 4128 /* 4129 * If there is no SO_DONTROUTE to turn off return immediately 4130 * from sosend_svc. This can allow tail-call optimizations. 4131 */ 4132 if (!dontroute) 4133 return (sosend_svc(so, uiop, prim, 4134 !(flags & MSG_EOR), sflag)); 4135 error = sosend_svc(so, uiop, prim, 4136 !(flags & MSG_EOR), sflag); 4137 } 4138 ASSERT(dontroute); 4139 done: 4140 if (dontroute) { 4141 uint32_t val; 4142 4143 val = 0; 4144 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4145 &val, (t_uscalar_t)sizeof (val)); 4146 } 4147 return (error); 4148 } 4149 4150 /* 4151 * Update so_faddr by asking the transport (unless AF_UNIX). 4152 */ 4153 int 4154 sotpi_getpeername(struct sonode *so) 4155 { 4156 struct strbuf strbuf; 4157 int error = 0, res; 4158 void *addr; 4159 t_uscalar_t addrlen; 4160 k_sigset_t smask; 4161 4162 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4163 so, pr_state(so->so_state, so->so_mode))); 4164 4165 mutex_enter(&so->so_lock); 4166 so_lock_single(so); /* Set SOLOCKED */ 4167 if (!(so->so_state & SS_ISCONNECTED)) { 4168 error = ENOTCONN; 4169 goto done; 4170 } 4171 /* Added this check for X/Open */ 4172 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4173 error = EINVAL; 4174 if (xnet_check_print) { 4175 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4176 } 4177 goto done; 4178 } 4179 #ifdef DEBUG 4180 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4181 pr_addr(so->so_family, so->so_faddr_sa, 4182 (t_uscalar_t)so->so_faddr_len))); 4183 #endif /* DEBUG */ 4184 4185 if (so->so_family == AF_UNIX || so->so_family == AF_NCA) { 4186 /* Transport has different name space - return local info */ 4187 error = 0; 4188 goto done; 4189 } 4190 4191 ASSERT(so->so_faddr_sa); 4192 /* Allocate local buffer to use with ioctl */ 4193 addrlen = (t_uscalar_t)so->so_faddr_maxlen; 4194 mutex_exit(&so->so_lock); 4195 addr = kmem_alloc(addrlen, KM_SLEEP); 4196 4197 /* 4198 * Issue TI_GETPEERNAME with signals masked. 4199 * Put the result in so_faddr_sa so that getpeername works after 4200 * a shutdown(output). 4201 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4202 * back to the socket. 4203 */ 4204 strbuf.buf = addr; 4205 strbuf.maxlen = addrlen; 4206 strbuf.len = 0; 4207 4208 sigintr(&smask, 0); 4209 res = 0; 4210 ASSERT(CRED()); 4211 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4212 0, K_TO_K, CRED(), &res); 4213 sigunintr(&smask); 4214 4215 mutex_enter(&so->so_lock); 4216 /* 4217 * If there is an error record the error in so_error put don't fail 4218 * the getpeername. Instead fallback on the recorded 4219 * so->so_faddr_sa. 4220 */ 4221 if (error) { 4222 /* 4223 * Various stream head errors can be returned to the ioctl. 4224 * However, it is impossible to determine which ones of 4225 * these are really socket level errors that were incorrectly 4226 * consumed by the ioctl. Thus this code silently ignores the 4227 * error - to code explicitly does not reinstate the error 4228 * using soseterror(). 4229 * Experiments have shows that at least this set of 4230 * errors are reported and should not be reinstated on the 4231 * socket: 4232 * EINVAL E.g. if an I_LINK was in effect when 4233 * getpeername was called. 4234 * EPIPE The ioctl error semantics prefer the write 4235 * side error over the read side error. 4236 * ENOTCONN The transport just got disconnected but 4237 * sockfs had not yet seen the T_DISCON_IND 4238 * when issuing the ioctl. 4239 */ 4240 error = 0; 4241 } else if (res == 0 && strbuf.len > 0 && 4242 (so->so_state & SS_ISCONNECTED)) { 4243 ASSERT(strbuf.len <= (int)so->so_faddr_maxlen); 4244 so->so_faddr_len = (socklen_t)strbuf.len; 4245 bcopy(addr, so->so_faddr_sa, so->so_faddr_len); 4246 so->so_state |= SS_FADDR_VALID; 4247 } 4248 kmem_free(addr, addrlen); 4249 #ifdef DEBUG 4250 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4251 pr_addr(so->so_family, so->so_faddr_sa, 4252 (t_uscalar_t)so->so_faddr_len))); 4253 #endif /* DEBUG */ 4254 done: 4255 so_unlock_single(so, SOLOCKED); 4256 mutex_exit(&so->so_lock); 4257 return (error); 4258 } 4259 4260 /* 4261 * Update so_laddr by asking the transport (unless AF_UNIX). 4262 */ 4263 int 4264 sotpi_getsockname(struct sonode *so) 4265 { 4266 struct strbuf strbuf; 4267 int error = 0, res; 4268 void *addr; 4269 t_uscalar_t addrlen; 4270 k_sigset_t smask; 4271 4272 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4273 so, pr_state(so->so_state, so->so_mode))); 4274 4275 mutex_enter(&so->so_lock); 4276 so_lock_single(so); /* Set SOLOCKED */ 4277 if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) { 4278 /* Return an all zero address except for the family */ 4279 if (so->so_family == AF_INET) 4280 so->so_laddr_len = (socklen_t)sizeof (sin_t); 4281 else if (so->so_family == AF_INET6) 4282 so->so_laddr_len = (socklen_t)sizeof (sin6_t); 4283 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 4284 bzero(so->so_laddr_sa, so->so_laddr_len); 4285 /* 4286 * Can not assume there is a sa_family for all 4287 * protocol families. 4288 */ 4289 if (so->so_family == AF_INET || so->so_family == AF_INET6) 4290 so->so_laddr_sa->sa_family = so->so_family; 4291 } 4292 #ifdef DEBUG 4293 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4294 pr_addr(so->so_family, so->so_laddr_sa, 4295 (t_uscalar_t)so->so_laddr_len))); 4296 #endif /* DEBUG */ 4297 if (so->so_family == AF_UNIX) { 4298 /* Transport has different name space - return local info */ 4299 error = 0; 4300 goto done; 4301 } 4302 /* Allocate local buffer to use with ioctl */ 4303 addrlen = (t_uscalar_t)so->so_laddr_maxlen; 4304 mutex_exit(&so->so_lock); 4305 addr = kmem_alloc(addrlen, KM_SLEEP); 4306 4307 /* 4308 * Issue TI_GETMYNAME with signals masked. 4309 * Put the result in so_laddr_sa so that getsockname works after 4310 * a shutdown(output). 4311 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4312 * back to the socket. 4313 */ 4314 strbuf.buf = addr; 4315 strbuf.maxlen = addrlen; 4316 strbuf.len = 0; 4317 4318 sigintr(&smask, 0); 4319 res = 0; 4320 ASSERT(CRED()); 4321 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4322 0, K_TO_K, CRED(), &res); 4323 sigunintr(&smask); 4324 4325 mutex_enter(&so->so_lock); 4326 /* 4327 * If there is an error record the error in so_error put don't fail 4328 * the getsockname. Instead fallback on the recorded 4329 * so->so_laddr_sa. 4330 */ 4331 if (error) { 4332 /* 4333 * Various stream head errors can be returned to the ioctl. 4334 * However, it is impossible to determine which ones of 4335 * these are really socket level errors that were incorrectly 4336 * consumed by the ioctl. Thus this code silently ignores the 4337 * error - to code explicitly does not reinstate the error 4338 * using soseterror(). 4339 * Experiments have shows that at least this set of 4340 * errors are reported and should not be reinstated on the 4341 * socket: 4342 * EINVAL E.g. if an I_LINK was in effect when 4343 * getsockname was called. 4344 * EPIPE The ioctl error semantics prefer the write 4345 * side error over the read side error. 4346 */ 4347 error = 0; 4348 } else if (res == 0 && strbuf.len > 0 && 4349 (so->so_state & SS_ISBOUND)) { 4350 ASSERT(strbuf.len <= (int)so->so_laddr_maxlen); 4351 so->so_laddr_len = (socklen_t)strbuf.len; 4352 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 4353 so->so_state |= SS_LADDR_VALID; 4354 } 4355 kmem_free(addr, addrlen); 4356 #ifdef DEBUG 4357 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4358 pr_addr(so->so_family, so->so_laddr_sa, 4359 (t_uscalar_t)so->so_laddr_len))); 4360 #endif /* DEBUG */ 4361 done: 4362 so_unlock_single(so, SOLOCKED); 4363 mutex_exit(&so->so_lock); 4364 return (error); 4365 } 4366 4367 /* 4368 * Get socket options. For SOL_SOCKET options some options are handled 4369 * by the sockfs while others use the value recorded in the sonode as a 4370 * fallback should the T_SVR4_OPTMGMT_REQ fail. 4371 * 4372 * On the return most *optlenp bytes are copied to optval. 4373 */ 4374 int 4375 sotpi_getsockopt(struct sonode *so, int level, int option_name, 4376 void *optval, socklen_t *optlenp, int flags) 4377 { 4378 struct T_optmgmt_req optmgmt_req; 4379 struct T_optmgmt_ack *optmgmt_ack; 4380 struct opthdr oh; 4381 struct opthdr *opt_res; 4382 mblk_t *mp = NULL; 4383 int error = 0; 4384 void *option = NULL; /* Set if fallback value */ 4385 t_uscalar_t maxlen = *optlenp; 4386 t_uscalar_t len; 4387 uint32_t value; 4388 4389 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 4390 so, level, option_name, optval, optlenp, 4391 pr_state(so->so_state, so->so_mode))); 4392 4393 mutex_enter(&so->so_lock); 4394 so_lock_single(so); /* Set SOLOCKED */ 4395 4396 /* 4397 * Check for SOL_SOCKET options. 4398 * Certain SOL_SOCKET options are returned directly whereas 4399 * others only provide a default (fallback) value should 4400 * the T_SVR4_OPTMGMT_REQ fail. 4401 */ 4402 if (level == SOL_SOCKET) { 4403 /* Check parameters */ 4404 switch (option_name) { 4405 case SO_TYPE: 4406 case SO_ERROR: 4407 case SO_DEBUG: 4408 case SO_ACCEPTCONN: 4409 case SO_REUSEADDR: 4410 case SO_KEEPALIVE: 4411 case SO_DONTROUTE: 4412 case SO_BROADCAST: 4413 case SO_USELOOPBACK: 4414 case SO_OOBINLINE: 4415 case SO_SNDBUF: 4416 case SO_RCVBUF: 4417 #ifdef notyet 4418 case SO_SNDLOWAT: 4419 case SO_RCVLOWAT: 4420 case SO_SNDTIMEO: 4421 case SO_RCVTIMEO: 4422 #endif /* notyet */ 4423 case SO_DGRAM_ERRIND: 4424 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 4425 error = EINVAL; 4426 eprintsoline(so, error); 4427 goto done2; 4428 } 4429 break; 4430 case SO_LINGER: 4431 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 4432 error = EINVAL; 4433 eprintsoline(so, error); 4434 goto done2; 4435 } 4436 break; 4437 } 4438 4439 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 4440 4441 switch (option_name) { 4442 case SO_TYPE: 4443 value = so->so_type; 4444 option = &value; 4445 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4446 4447 case SO_ERROR: 4448 value = sogeterr(so); 4449 option = &value; 4450 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4451 4452 case SO_ACCEPTCONN: 4453 if (so->so_state & SS_ACCEPTCONN) 4454 value = SO_ACCEPTCONN; 4455 else 4456 value = 0; 4457 #ifdef DEBUG 4458 if (value) { 4459 dprintso(so, 1, 4460 ("sotpi_getsockopt: 0x%x is set\n", 4461 option_name)); 4462 } else { 4463 dprintso(so, 1, 4464 ("sotpi_getsockopt: 0x%x not set\n", 4465 option_name)); 4466 } 4467 #endif /* DEBUG */ 4468 option = &value; 4469 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4470 4471 case SO_DEBUG: 4472 case SO_REUSEADDR: 4473 case SO_KEEPALIVE: 4474 case SO_DONTROUTE: 4475 case SO_BROADCAST: 4476 case SO_USELOOPBACK: 4477 case SO_OOBINLINE: 4478 case SO_DGRAM_ERRIND: 4479 value = (so->so_options & option_name); 4480 #ifdef DEBUG 4481 if (value) { 4482 dprintso(so, 1, 4483 ("sotpi_getsockopt: 0x%x is set\n", 4484 option_name)); 4485 } else { 4486 dprintso(so, 1, 4487 ("sotpi_getsockopt: 0x%x not set\n", 4488 option_name)); 4489 } 4490 #endif /* DEBUG */ 4491 option = &value; 4492 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4493 4494 /* 4495 * The following options are only returned by sockfs when the 4496 * T_SVR4_OPTMGMT_REQ fails. 4497 */ 4498 case SO_LINGER: 4499 option = &so->so_linger; 4500 len = (t_uscalar_t)sizeof (struct linger); 4501 break; 4502 case SO_SNDBUF: { 4503 ssize_t lvalue; 4504 4505 /* 4506 * If the option has not been set then get a default 4507 * value from the read queue. This value is 4508 * returned if the transport fails 4509 * the T_SVR4_OPTMGMT_REQ. 4510 */ 4511 lvalue = so->so_sndbuf; 4512 if (lvalue == 0) { 4513 mutex_exit(&so->so_lock); 4514 (void) strqget(strvp2wq(SOTOV(so))->q_next, 4515 QHIWAT, 0, &lvalue); 4516 mutex_enter(&so->so_lock); 4517 dprintso(so, 1, 4518 ("got SO_SNDBUF %ld from q\n", lvalue)); 4519 } 4520 value = (int)lvalue; 4521 option = &value; 4522 len = (t_uscalar_t)sizeof (so->so_sndbuf); 4523 break; 4524 } 4525 case SO_RCVBUF: { 4526 ssize_t lvalue; 4527 4528 /* 4529 * If the option has not been set then get a default 4530 * value from the read queue. This value is 4531 * returned if the transport fails 4532 * the T_SVR4_OPTMGMT_REQ. 4533 * 4534 * XXX If SO_RCVBUF has been set and this is an 4535 * XPG 4.2 application then do not ask the transport 4536 * since the transport might adjust the value and not 4537 * return exactly what was set by the application. 4538 * For non-XPG 4.2 application we return the value 4539 * that the transport is actually using. 4540 */ 4541 lvalue = so->so_rcvbuf; 4542 if (lvalue == 0) { 4543 mutex_exit(&so->so_lock); 4544 (void) strqget(RD(strvp2wq(SOTOV(so))), 4545 QHIWAT, 0, &lvalue); 4546 mutex_enter(&so->so_lock); 4547 dprintso(so, 1, 4548 ("got SO_RCVBUF %ld from q\n", lvalue)); 4549 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 4550 value = (int)lvalue; 4551 option = &value; 4552 goto copyout; /* skip asking transport */ 4553 } 4554 value = (int)lvalue; 4555 option = &value; 4556 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 4557 break; 4558 } 4559 #ifdef notyet 4560 /* 4561 * We do not implement the semantics of these options 4562 * thus we shouldn't implement the options either. 4563 */ 4564 case SO_SNDLOWAT: 4565 value = so->so_sndlowat; 4566 option = &value; 4567 break; 4568 case SO_RCVLOWAT: 4569 value = so->so_rcvlowat; 4570 option = &value; 4571 break; 4572 case SO_SNDTIMEO: 4573 value = so->so_sndtimeo; 4574 option = &value; 4575 break; 4576 case SO_RCVTIMEO: 4577 value = so->so_rcvtimeo; 4578 option = &value; 4579 break; 4580 #endif /* notyet */ 4581 } 4582 } 4583 4584 if (so->so_family == AF_NCA) { 4585 goto done2; 4586 } 4587 4588 mutex_exit(&so->so_lock); 4589 4590 /* Send request */ 4591 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 4592 optmgmt_req.MGMT_flags = T_CHECK; 4593 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 4594 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 4595 4596 oh.level = level; 4597 oh.name = option_name; 4598 oh.len = maxlen; 4599 4600 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 4601 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 4602 /* Let option management work in the presence of data flow control */ 4603 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 4604 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 4605 mp = NULL; 4606 mutex_enter(&so->so_lock); 4607 if (error) { 4608 eprintsoline(so, error); 4609 goto done2; 4610 } 4611 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 4612 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 4613 if (error) { 4614 if (option != NULL) { 4615 /* We have a fallback value */ 4616 error = 0; 4617 goto copyout; 4618 } 4619 eprintsoline(so, error); 4620 goto done2; 4621 } 4622 ASSERT(mp); 4623 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 4624 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 4625 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 4626 if (opt_res == NULL) { 4627 if (option != NULL) { 4628 /* We have a fallback value */ 4629 error = 0; 4630 goto copyout; 4631 } 4632 error = EPROTO; 4633 eprintsoline(so, error); 4634 goto done; 4635 } 4636 option = &opt_res[1]; 4637 4638 /* check to ensure that the option is within bounds */ 4639 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 4640 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 4641 if (option != NULL) { 4642 /* We have a fallback value */ 4643 error = 0; 4644 goto copyout; 4645 } 4646 error = EPROTO; 4647 eprintsoline(so, error); 4648 goto done; 4649 } 4650 4651 len = opt_res->len; 4652 4653 copyout: { 4654 t_uscalar_t size = MIN(len, maxlen); 4655 bcopy(option, optval, size); 4656 bcopy(&size, optlenp, sizeof (size)); 4657 } 4658 done: 4659 freemsg(mp); 4660 done2: 4661 so_unlock_single(so, SOLOCKED); 4662 mutex_exit(&so->so_lock); 4663 return (error); 4664 } 4665 4666 /* 4667 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 4668 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 4669 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 4670 * setsockopt has to work even if the transport does not support the option. 4671 */ 4672 int 4673 sotpi_setsockopt(struct sonode *so, int level, int option_name, 4674 const void *optval, t_uscalar_t optlen) 4675 { 4676 struct T_optmgmt_req optmgmt_req; 4677 struct opthdr oh; 4678 mblk_t *mp; 4679 int error = 0; 4680 boolean_t handled = B_FALSE; 4681 4682 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 4683 so, level, option_name, optval, optlen, 4684 pr_state(so->so_state, so->so_mode))); 4685 4686 4687 /* X/Open requires this check */ 4688 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4689 if (xnet_check_print) 4690 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 4691 return (EINVAL); 4692 } 4693 4694 /* Caller allocates aligned optval, or passes null */ 4695 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 4696 /* If optval is null optlen is 0, and vice-versa */ 4697 ASSERT(optval != NULL || optlen == 0); 4698 ASSERT(optlen != 0 || optval == NULL); 4699 4700 mutex_enter(&so->so_lock); 4701 so_lock_single(so); /* Set SOLOCKED */ 4702 mutex_exit(&so->so_lock); 4703 4704 if (so->so_family == AF_NCA) { 4705 /* Ignore any flow control problems with the transport. */ 4706 mutex_enter(&so->so_lock); 4707 goto done; 4708 } 4709 4710 /* 4711 * For SOCKET or TCP level options, try to set it here itself 4712 * provided socket has not been popped and we know the tcp 4713 * structure (stored in so_priv). 4714 */ 4715 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 4716 (so->so_family == AF_INET || so->so_family == AF_INET6) && 4717 (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) { 4718 tcp_t *tcp = so->so_priv; 4719 boolean_t onoff; 4720 4721 #define intvalue (*(int32_t *)optval) 4722 4723 switch (level) { 4724 case SOL_SOCKET: 4725 switch (option_name) { /* Check length param */ 4726 case SO_DEBUG: 4727 case SO_REUSEADDR: 4728 case SO_DONTROUTE: 4729 case SO_BROADCAST: 4730 case SO_USELOOPBACK: 4731 case SO_OOBINLINE: 4732 case SO_DGRAM_ERRIND: 4733 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 4734 error = EINVAL; 4735 eprintsoline(so, error); 4736 mutex_enter(&so->so_lock); 4737 goto done2; 4738 } 4739 ASSERT(optval); 4740 onoff = intvalue != 0; 4741 handled = B_TRUE; 4742 break; 4743 case SO_LINGER: 4744 if (optlen != 4745 (t_uscalar_t)sizeof (struct linger)) { 4746 error = EINVAL; 4747 eprintsoline(so, error); 4748 mutex_enter(&so->so_lock); 4749 goto done2; 4750 } 4751 ASSERT(optval); 4752 handled = B_TRUE; 4753 break; 4754 } 4755 4756 switch (option_name) { /* Do actions */ 4757 case SO_LINGER: { 4758 struct linger *lgr = (struct linger *)optval; 4759 4760 if (lgr->l_onoff) { 4761 tcp->tcp_linger = 1; 4762 tcp->tcp_lingertime = lgr->l_linger; 4763 so->so_linger.l_onoff = SO_LINGER; 4764 so->so_options |= SO_LINGER; 4765 } else { 4766 tcp->tcp_linger = 0; 4767 tcp->tcp_lingertime = 0; 4768 so->so_linger.l_onoff = 0; 4769 so->so_options &= ~SO_LINGER; 4770 } 4771 so->so_linger.l_linger = lgr->l_linger; 4772 handled = B_TRUE; 4773 break; 4774 } 4775 case SO_DEBUG: 4776 tcp->tcp_debug = onoff; 4777 #ifdef SOCK_TEST 4778 if (intvalue & 2) 4779 sock_test_timelimit = 10 * hz; 4780 else 4781 sock_test_timelimit = 0; 4782 4783 if (intvalue & 4) 4784 do_useracc = 0; 4785 else 4786 do_useracc = 1; 4787 #endif /* SOCK_TEST */ 4788 break; 4789 case SO_DONTROUTE: 4790 /* 4791 * SO_DONTROUTE, SO_USELOOPBACK and 4792 * SO_BROADCAST are only of interest to IP. 4793 * We track them here only so 4794 * that we can report their current value. 4795 */ 4796 tcp->tcp_dontroute = onoff; 4797 if (onoff) 4798 so->so_options |= option_name; 4799 else 4800 so->so_options &= ~option_name; 4801 break; 4802 case SO_USELOOPBACK: 4803 tcp->tcp_useloopback = onoff; 4804 if (onoff) 4805 so->so_options |= option_name; 4806 else 4807 so->so_options &= ~option_name; 4808 break; 4809 case SO_BROADCAST: 4810 tcp->tcp_broadcast = onoff; 4811 if (onoff) 4812 so->so_options |= option_name; 4813 else 4814 so->so_options &= ~option_name; 4815 break; 4816 case SO_REUSEADDR: 4817 tcp->tcp_reuseaddr = onoff; 4818 if (onoff) 4819 so->so_options |= option_name; 4820 else 4821 so->so_options &= ~option_name; 4822 break; 4823 case SO_OOBINLINE: 4824 tcp->tcp_oobinline = onoff; 4825 if (onoff) 4826 so->so_options |= option_name; 4827 else 4828 so->so_options &= ~option_name; 4829 break; 4830 case SO_DGRAM_ERRIND: 4831 tcp->tcp_dgram_errind = onoff; 4832 if (onoff) 4833 so->so_options |= option_name; 4834 else 4835 so->so_options &= ~option_name; 4836 break; 4837 } 4838 break; 4839 case IPPROTO_TCP: 4840 switch (option_name) { 4841 case TCP_NODELAY: 4842 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 4843 error = EINVAL; 4844 eprintsoline(so, error); 4845 mutex_enter(&so->so_lock); 4846 goto done2; 4847 } 4848 ASSERT(optval); 4849 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 4850 handled = B_TRUE; 4851 break; 4852 } 4853 break; 4854 default: 4855 handled = B_FALSE; 4856 break; 4857 } 4858 } 4859 4860 if (handled) { 4861 mutex_enter(&so->so_lock); 4862 goto done2; 4863 } 4864 4865 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 4866 optmgmt_req.MGMT_flags = T_NEGOTIATE; 4867 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 4868 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 4869 4870 oh.level = level; 4871 oh.name = option_name; 4872 oh.len = optlen; 4873 4874 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 4875 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 4876 /* Let option management work in the presence of data flow control */ 4877 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 4878 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 4879 mp = NULL; 4880 mutex_enter(&so->so_lock); 4881 if (error) { 4882 eprintsoline(so, error); 4883 goto done; 4884 } 4885 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 4886 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 4887 if (error) { 4888 eprintsoline(so, error); 4889 goto done; 4890 } 4891 ASSERT(mp); 4892 /* No need to verify T_optmgmt_ack */ 4893 freemsg(mp); 4894 done: 4895 /* 4896 * Check for SOL_SOCKET options and record their values. 4897 * If we know about a SOL_SOCKET parameter and the transport 4898 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 4899 * EPROTO) we let the setsockopt succeed. 4900 */ 4901 if (level == SOL_SOCKET) { 4902 /* Check parameters */ 4903 switch (option_name) { 4904 case SO_DEBUG: 4905 case SO_REUSEADDR: 4906 case SO_KEEPALIVE: 4907 case SO_DONTROUTE: 4908 case SO_BROADCAST: 4909 case SO_USELOOPBACK: 4910 case SO_OOBINLINE: 4911 case SO_SNDBUF: 4912 case SO_RCVBUF: 4913 #ifdef notyet 4914 case SO_SNDLOWAT: 4915 case SO_RCVLOWAT: 4916 case SO_SNDTIMEO: 4917 case SO_RCVTIMEO: 4918 #endif /* notyet */ 4919 case SO_DGRAM_ERRIND: 4920 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 4921 error = EINVAL; 4922 eprintsoline(so, error); 4923 goto done2; 4924 } 4925 ASSERT(optval); 4926 handled = B_TRUE; 4927 break; 4928 case SO_LINGER: 4929 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 4930 error = EINVAL; 4931 eprintsoline(so, error); 4932 goto done2; 4933 } 4934 ASSERT(optval); 4935 handled = B_TRUE; 4936 break; 4937 } 4938 4939 #define intvalue (*(int32_t *)optval) 4940 4941 switch (option_name) { 4942 case SO_TYPE: 4943 case SO_ERROR: 4944 case SO_ACCEPTCONN: 4945 /* Can't be set */ 4946 error = ENOPROTOOPT; 4947 goto done2; 4948 case SO_LINGER: { 4949 struct linger *l = (struct linger *)optval; 4950 4951 so->so_linger.l_linger = l->l_linger; 4952 if (l->l_onoff) { 4953 so->so_linger.l_onoff = SO_LINGER; 4954 so->so_options |= SO_LINGER; 4955 } else { 4956 so->so_linger.l_onoff = 0; 4957 so->so_options &= ~SO_LINGER; 4958 } 4959 break; 4960 } 4961 4962 case SO_DEBUG: 4963 #ifdef SOCK_TEST 4964 if (intvalue & 2) 4965 sock_test_timelimit = 10 * hz; 4966 else 4967 sock_test_timelimit = 0; 4968 4969 if (intvalue & 4) 4970 do_useracc = 0; 4971 else 4972 do_useracc = 1; 4973 #endif /* SOCK_TEST */ 4974 /* FALLTHRU */ 4975 case SO_REUSEADDR: 4976 case SO_KEEPALIVE: 4977 case SO_DONTROUTE: 4978 case SO_BROADCAST: 4979 case SO_USELOOPBACK: 4980 case SO_OOBINLINE: 4981 case SO_DGRAM_ERRIND: 4982 if (intvalue != 0) { 4983 dprintso(so, 1, 4984 ("sotpi_setsockopt: setting 0x%x\n", 4985 option_name)); 4986 so->so_options |= option_name; 4987 } else { 4988 dprintso(so, 1, 4989 ("sotpi_setsockopt: clearing 0x%x\n", 4990 option_name)); 4991 so->so_options &= ~option_name; 4992 } 4993 break; 4994 /* 4995 * The following options are only returned by us when the 4996 * T_SVR4_OPTMGMT_REQ fails. 4997 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 4998 * since the transport might adjust the value and not 4999 * return exactly what was set by the application. 5000 */ 5001 case SO_SNDBUF: 5002 so->so_sndbuf = intvalue; 5003 break; 5004 case SO_RCVBUF: 5005 so->so_rcvbuf = intvalue; 5006 break; 5007 #ifdef notyet 5008 /* 5009 * We do not implement the semantics of these options 5010 * thus we shouldn't implement the options either. 5011 */ 5012 case SO_SNDLOWAT: 5013 so->so_sndlowat = intvalue; 5014 break; 5015 case SO_RCVLOWAT: 5016 so->so_rcvlowat = intvalue; 5017 break; 5018 case SO_SNDTIMEO: 5019 so->so_sndtimeo = intvalue; 5020 break; 5021 case SO_RCVTIMEO: 5022 so->so_rcvtimeo = intvalue; 5023 break; 5024 #endif /* notyet */ 5025 } 5026 #undef intvalue 5027 5028 if (error) { 5029 if ((error == ENOPROTOOPT || error == EPROTO || 5030 error == EINVAL) && handled) { 5031 dprintso(so, 1, 5032 ("setsockopt: ignoring error %d for 0x%x\n", 5033 error, option_name)); 5034 error = 0; 5035 } 5036 } 5037 } 5038 done2: 5039 ret: 5040 so_unlock_single(so, SOLOCKED); 5041 mutex_exit(&so->so_lock); 5042 return (error); 5043 } 5044