1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <netinet/in.h> 61 #include <sys/un.h> 62 #include <sys/strsun.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 68 69 #include <c2/audit.h> 70 71 #include <inet/common.h> 72 #include <inet/ip.h> 73 #include <inet/ip6.h> 74 #include <inet/tcp.h> 75 76 #include <fs/sockfs/nl7c.h> 77 #include <sys/zone.h> 78 79 /* 80 * Possible failures when memory can't be allocated. The documented behavior: 81 * 82 * 5.5: 4.X: XNET: 83 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 84 * EINTR 85 * (4.X does not document EINTR but returns it) 86 * bind: ENOSR - ENOBUFS/ENOSR 87 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 88 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 89 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 90 * (4.X getpeername and getsockname do not fail in practice) 91 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 92 * listen: - - ENOBUFS 93 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 94 * EINTR 95 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 96 * EINTR 97 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 98 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 99 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 100 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 101 * 102 * Resolution. When allocation fails: 103 * recv: return EINTR 104 * send: return EINTR 105 * connect, accept: EINTR 106 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 107 * socket, socketpair: ENOBUFS 108 * getpeername, getsockname: sleep 109 * getsockopt, setsockopt: sleep 110 */ 111 112 #ifdef SOCK_TEST 113 /* 114 * Variables that make sockfs do something other than the standard TPI 115 * for the AF_INET transports. 116 * 117 * solisten_tpi_tcp: 118 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 119 * the transport is already bound. This is needed to avoid loosing the 120 * port number should listen() do a T_UNBIND_REQ followed by a 121 * O_T_BIND_REQ. 122 * 123 * soconnect_tpi_udp: 124 * UDP and ICMP can handle a T_CONN_REQ. 125 * This is needed to make the sequence of connect(), getsockname() 126 * return the local IP address used to send packets to the connected to 127 * destination. 128 * 129 * soconnect_tpi_tcp: 130 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 131 * Set this to non-zero to send TPI conformant messages to TCP in this 132 * respect. This is a performance optimization. 133 * 134 * soaccept_tpi_tcp: 135 * TCP can handle a T_CONN_REQ without the acceptor being bound. 136 * This is a performance optimization that has been picked up in XTI. 137 * 138 * soaccept_tpi_multioptions: 139 * When inheriting SOL_SOCKET options from the listener to the accepting 140 * socket send them as a single message for AF_INET{,6}. 141 */ 142 int solisten_tpi_tcp = 0; 143 int soconnect_tpi_udp = 0; 144 int soconnect_tpi_tcp = 0; 145 int soaccept_tpi_tcp = 0; 146 int soaccept_tpi_multioptions = 1; 147 #else /* SOCK_TEST */ 148 #define soconnect_tpi_tcp 0 149 #define soconnect_tpi_udp 0 150 #define solisten_tpi_tcp 0 151 #define soaccept_tpi_tcp 0 152 #define soaccept_tpi_multioptions 1 153 #endif /* SOCK_TEST */ 154 155 #ifdef SOCK_TEST 156 extern int do_useracc; 157 extern clock_t sock_test_timelimit; 158 #endif /* SOCK_TEST */ 159 160 /* 161 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 162 * applications working. Turn on this flag to disable these checks. 163 */ 164 int xnet_skip_checks = 0; 165 int xnet_check_print = 0; 166 int xnet_truncate_print = 0; 167 168 extern void sigintr(k_sigset_t *, int); 169 extern void sigunintr(k_sigset_t *); 170 171 extern void *nl7c_lookup_addr(void *, t_uscalar_t); 172 extern void *nl7c_add_addr(void *, t_uscalar_t); 173 extern void nl7c_listener_addr(void *, queue_t *); 174 175 static int sotpi_unbind(struct sonode *, int); 176 177 /* TPI sockfs sonode operations */ 178 static int sotpi_accept(struct sonode *, int, struct sonode **); 179 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 180 int); 181 static int sotpi_connect(struct sonode *, const struct sockaddr *, 182 socklen_t, int, int); 183 static int sotpi_listen(struct sonode *, int); 184 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 185 struct uio *); 186 static int sotpi_shutdown(struct sonode *, int); 187 static int sotpi_getsockname(struct sonode *); 188 189 sonodeops_t sotpi_sonodeops = { 190 sotpi_accept, /* sop_accept */ 191 sotpi_bind, /* sop_bind */ 192 sotpi_listen, /* sop_listen */ 193 sotpi_connect, /* sop_connect */ 194 sotpi_recvmsg, /* sop_recvmsg */ 195 sotpi_sendmsg, /* sop_sendmsg */ 196 sotpi_getpeername, /* sop_getpeername */ 197 sotpi_getsockname, /* sop_getsockname */ 198 sotpi_shutdown, /* sop_shutdown */ 199 sotpi_getsockopt, /* sop_getsockopt */ 200 sotpi_setsockopt /* sop_setsockopt */ 201 }; 202 203 /* 204 * Common create code for socket and accept. If tso is set the values 205 * from that node is used instead of issuing a T_INFO_REQ. 206 * 207 * Assumes that the caller has a VN_HOLD on accessvp. 208 * The VN_RELE will occur either when sotpi_create() fails or when 209 * the returned sonode is freed. 210 */ 211 struct sonode * 212 sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version, 213 struct sonode *tso, int *errorp) 214 { 215 struct sonode *so; 216 vnode_t *vp; 217 int flags, error; 218 219 ASSERT(accessvp != NULL); 220 vp = makesockvp(accessvp, domain, type, protocol); 221 ASSERT(vp != NULL); 222 so = VTOSO(vp); 223 224 flags = FREAD|FWRITE; 225 if (tso != NULL) { 226 if ((tso->so_state & (SS_TCP_FAST_ACCEPT)) != 0) { 227 flags |= SO_ACCEPTOR|SO_SOCKSTR; 228 so->so_state |= SS_TCP_FAST_ACCEPT; 229 } 230 } else { 231 if ((so->so_type == SOCK_STREAM) && 232 (so->so_family == AF_INET || so->so_family == AF_INET6)) { 233 flags |= SO_SOCKSTR; 234 so->so_state |= SS_TCP_FAST_ACCEPT; 235 } 236 } 237 238 /* 239 * Tell local transport that it is talking to sockets. 240 */ 241 if (so->so_family == AF_UNIX) { 242 flags |= SO_SOCKSTR; 243 } 244 245 if (error = socktpi_open(&vp, flags, CRED())) { 246 VN_RELE(vp); 247 *errorp = error; 248 return (NULL); 249 } 250 251 if (error = so_strinit(so, tso)) { 252 (void) VOP_CLOSE(vp, 0, 1, 0, CRED()); 253 VN_RELE(vp); 254 *errorp = error; 255 return (NULL); 256 } 257 258 if (version == SOV_DEFAULT) 259 version = so_default_version; 260 261 so->so_version = (short)version; 262 return (so); 263 } 264 265 /* 266 * Bind the socket to an unspecified address in sockfs only. 267 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 268 * required in all cases. 269 */ 270 static void 271 so_automatic_bind(struct sonode *so) 272 { 273 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 274 275 ASSERT(MUTEX_HELD(&so->so_lock)); 276 ASSERT(!(so->so_state & SS_ISBOUND)); 277 ASSERT(so->so_unbind_mp); 278 279 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 280 bzero(so->so_laddr_sa, so->so_laddr_len); 281 so->so_laddr_sa->sa_family = so->so_family; 282 so->so_state |= SS_ISBOUND; 283 } 284 285 286 /* 287 * bind the socket. 288 * 289 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 290 * are passed in we allow rebinding. Note that for backwards compatibility 291 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 292 * Thus the rebinding code is currently not executed. 293 * 294 * The constraints for rebinding are: 295 * - it is a SOCK_DGRAM, or 296 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 297 * and no listen() has been done. 298 * This rebinding code was added based on some language in the XNET book 299 * about not returning EINVAL it the protocol allows rebinding. However, 300 * this language is not present in the Posix socket draft. Thus maybe the 301 * rebinding logic should be deleted from the source. 302 * 303 * A null "name" can be used to unbind the socket if: 304 * - it is a SOCK_DGRAM, or 305 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 306 * and no listen() has been done. 307 */ 308 static int 309 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 310 socklen_t namelen, int backlog, int flags) 311 { 312 struct T_bind_req bind_req; 313 struct T_bind_ack *bind_ack; 314 int error = 0; 315 mblk_t *mp; 316 void *addr; 317 t_uscalar_t addrlen; 318 int unbind_on_err = 1; 319 boolean_t clear_acceptconn_on_err = B_FALSE; 320 boolean_t restore_backlog_on_err = B_FALSE; 321 int save_so_backlog; 322 t_scalar_t PRIM_type = O_T_BIND_REQ; 323 boolean_t tcp_udp_xport; 324 void *nl7c = NULL; 325 326 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 327 so, name, namelen, backlog, flags, 328 pr_state(so->so_state, so->so_mode))); 329 330 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 331 332 if (!(flags & _SOBIND_LOCK_HELD)) { 333 mutex_enter(&so->so_lock); 334 so_lock_single(so); /* Set SOLOCKED */ 335 } else { 336 ASSERT(MUTEX_HELD(&so->so_lock)); 337 ASSERT(so->so_flag & SOLOCKED); 338 } 339 340 /* 341 * Make sure that there is a preallocated unbind_req message 342 * before binding. This message allocated when the socket is 343 * created but it might be have been consumed. 344 */ 345 if (so->so_unbind_mp == NULL) { 346 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 347 /* NOTE: holding so_lock while sleeping */ 348 so->so_unbind_mp = 349 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 350 } 351 352 if (flags & _SOBIND_REBIND) { 353 /* 354 * Called from solisten after doing an sotpi_unbind() or 355 * potentially without the unbind (latter for AF_INET{,6}). 356 */ 357 ASSERT(name == NULL && namelen == 0); 358 359 if (so->so_family == AF_UNIX) { 360 ASSERT(so->so_ux_bound_vp); 361 addr = &so->so_ux_laddr; 362 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 363 dprintso(so, 1, 364 ("sobind rebind UNIX: addrlen %d, addr 0x%p, vp %p\n", 365 addrlen, 366 ((struct so_ux_addr *)addr)->soua_vp, 367 so->so_ux_bound_vp)); 368 } else { 369 addr = so->so_laddr_sa; 370 addrlen = (t_uscalar_t)so->so_laddr_len; 371 } 372 } else if (flags & _SOBIND_UNSPEC) { 373 ASSERT(name == NULL && namelen == 0); 374 375 /* 376 * The caller checked SS_ISBOUND but not necessarily 377 * under so_lock 378 */ 379 if (so->so_state & SS_ISBOUND) { 380 /* No error */ 381 goto done; 382 } 383 384 /* Set an initial local address */ 385 switch (so->so_family) { 386 case AF_UNIX: 387 /* 388 * Use an address with same size as struct sockaddr 389 * just like BSD. 390 */ 391 so->so_laddr_len = 392 (socklen_t)sizeof (struct sockaddr); 393 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 394 bzero(so->so_laddr_sa, so->so_laddr_len); 395 so->so_laddr_sa->sa_family = so->so_family; 396 397 /* 398 * Pass down an address with the implicit bind 399 * magic number and the rest all zeros. 400 * The transport will return a unique address. 401 */ 402 so->so_ux_laddr.soua_vp = NULL; 403 so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 404 addr = &so->so_ux_laddr; 405 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 406 break; 407 408 case AF_INET: 409 case AF_INET6: 410 /* 411 * An unspecified bind in TPI has a NULL address. 412 * Set the address in sockfs to have the sa_family. 413 */ 414 so->so_laddr_len = (so->so_family == AF_INET) ? 415 (socklen_t)sizeof (sin_t) : 416 (socklen_t)sizeof (sin6_t); 417 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 418 bzero(so->so_laddr_sa, so->so_laddr_len); 419 so->so_laddr_sa->sa_family = so->so_family; 420 addr = NULL; 421 addrlen = 0; 422 break; 423 424 default: 425 /* 426 * An unspecified bind in TPI has a NULL address. 427 * Set the address in sockfs to be zero length. 428 * 429 * Can not assume there is a sa_family for all 430 * protocol families. For example, AF_X25 does not 431 * have a family field. 432 */ 433 so->so_laddr_len = 0; /* XXX correct? */ 434 bzero(so->so_laddr_sa, so->so_laddr_len); 435 addr = NULL; 436 addrlen = 0; 437 break; 438 } 439 440 } else { 441 if (so->so_state & SS_ISBOUND) { 442 /* 443 * If it is ok to rebind the socket, first unbind 444 * with the transport. A rebind to the NULL address 445 * is interpreted as an unbind. 446 * Note that a bind to NULL in BSD does unbind the 447 * socket but it fails with EINVAL. 448 * Note that regular sockets set SOV_SOCKBSD i.e. 449 * _SOBIND_SOCKBSD gets set here hence no type of 450 * socket does currently allow rebinding. 451 * 452 * If the name is NULL just do an unbind. 453 */ 454 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 455 name != NULL) { 456 error = EINVAL; 457 unbind_on_err = 0; 458 eprintsoline(so, error); 459 goto done; 460 } 461 if ((so->so_mode & SM_CONNREQUIRED) && 462 (so->so_state & SS_CANTREBIND)) { 463 error = EINVAL; 464 unbind_on_err = 0; 465 eprintsoline(so, error); 466 goto done; 467 } 468 error = sotpi_unbind(so, 0); 469 if (error) { 470 eprintsoline(so, error); 471 goto done; 472 } 473 ASSERT(!(so->so_state & SS_ISBOUND)); 474 if (name == NULL) { 475 so->so_state &= 476 ~(SS_ISCONNECTED|SS_ISCONNECTING); 477 goto done; 478 } 479 } 480 /* X/Open requires this check */ 481 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 482 if (xnet_check_print) { 483 printf("sockfs: X/Open bind state check " 484 "caused EINVAL\n"); 485 } 486 error = EINVAL; 487 goto done; 488 } 489 490 switch (so->so_family) { 491 case AF_UNIX: 492 /* 493 * All AF_UNIX addresses are nul terminated 494 * when copied (copyin_name) in so the minimum 495 * length is 3 bytes. 496 */ 497 if (name == NULL || 498 (ssize_t)namelen <= sizeof (short) + 1) { 499 error = EISDIR; 500 eprintsoline(so, error); 501 goto done; 502 } 503 /* 504 * Verify so_family matches the bound family. 505 * BSD does not check this for AF_UNIX resulting 506 * in funny mknods. 507 */ 508 if (name->sa_family != so->so_family) { 509 error = EAFNOSUPPORT; 510 goto done; 511 } 512 break; 513 case AF_INET: 514 if (name == NULL) { 515 error = EINVAL; 516 eprintsoline(so, error); 517 goto done; 518 } 519 if ((size_t)namelen != sizeof (sin_t)) { 520 error = name->sa_family != so->so_family ? 521 EAFNOSUPPORT : EINVAL; 522 eprintsoline(so, error); 523 goto done; 524 } 525 if ((flags & _SOBIND_XPG4_2) && 526 (name->sa_family != so->so_family)) { 527 /* 528 * This check has to be made for X/Open 529 * sockets however application failures have 530 * been observed when it is applied to 531 * all sockets. 532 */ 533 error = EAFNOSUPPORT; 534 eprintsoline(so, error); 535 goto done; 536 } 537 /* 538 * Force a zero sa_family to match so_family. 539 * 540 * Some programs like inetd(1M) don't set the 541 * family field. Other programs leave 542 * sin_family set to garbage - SunOS 4.X does 543 * not check the family field on a bind. 544 * We use the family field that 545 * was passed in to the socket() call. 546 */ 547 name->sa_family = so->so_family; 548 break; 549 550 case AF_INET6: { 551 #ifdef DEBUG 552 sin6_t *sin6 = (sin6_t *)name; 553 #endif /* DEBUG */ 554 555 if (name == NULL) { 556 error = EINVAL; 557 eprintsoline(so, error); 558 goto done; 559 } 560 if ((size_t)namelen != sizeof (sin6_t)) { 561 error = name->sa_family != so->so_family ? 562 EAFNOSUPPORT : EINVAL; 563 eprintsoline(so, error); 564 goto done; 565 } 566 if (name->sa_family != so->so_family) { 567 /* 568 * With IPv6 we require the family to match 569 * unlike in IPv4. 570 */ 571 error = EAFNOSUPPORT; 572 eprintsoline(so, error); 573 goto done; 574 } 575 #ifdef DEBUG 576 /* 577 * Verify that apps don't forget to clear 578 * sin6_scope_id etc 579 */ 580 if (sin6->sin6_scope_id != 0 && 581 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 582 cmn_err(CE_WARN, 583 "bind with uninitialized sin6_scope_id " 584 "(%d) on socket. Pid = %d\n", 585 (int)sin6->sin6_scope_id, 586 (int)curproc->p_pid); 587 } 588 if (sin6->__sin6_src_id != 0) { 589 cmn_err(CE_WARN, 590 "bind with uninitialized __sin6_src_id " 591 "(%d) on socket. Pid = %d\n", 592 (int)sin6->__sin6_src_id, 593 (int)curproc->p_pid); 594 } 595 #endif /* DEBUG */ 596 break; 597 } 598 default: 599 /* 600 * Don't do any length or sa_family check to allow 601 * non-sockaddr style addresses. 602 */ 603 if (name == NULL) { 604 error = EINVAL; 605 eprintsoline(so, error); 606 goto done; 607 } 608 break; 609 } 610 611 if (namelen > (t_uscalar_t)so->so_laddr_maxlen) { 612 error = ENAMETOOLONG; 613 eprintsoline(so, error); 614 goto done; 615 } 616 /* 617 * Save local address. 618 */ 619 so->so_laddr_len = (socklen_t)namelen; 620 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 621 bcopy(name, so->so_laddr_sa, namelen); 622 623 addr = so->so_laddr_sa; 624 addrlen = (t_uscalar_t)so->so_laddr_len; 625 switch (so->so_family) { 626 case AF_INET6: 627 case AF_INET: 628 break; 629 case AF_UNIX: { 630 struct sockaddr_un *soun = 631 (struct sockaddr_un *)so->so_laddr_sa; 632 struct vnode *vp; 633 struct vattr vattr; 634 635 ASSERT(so->so_ux_bound_vp == NULL); 636 /* 637 * Create vnode for the specified path name. 638 * Keep vnode held with a reference in so_ux_bound_vp. 639 * Use the vnode pointer as the address used in the 640 * bind with the transport. 641 * 642 * Use the same mode as in BSD. In particular this does 643 * not observe the umask. 644 */ 645 /* MAXPATHLEN + soun_family + nul termination */ 646 if (so->so_laddr_len > 647 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 648 error = ENAMETOOLONG; 649 eprintsoline(so, error); 650 goto done; 651 } 652 vattr.va_type = VSOCK; 653 vattr.va_mode = 0777 & ~u.u_cmask; 654 vattr.va_mask = AT_TYPE|AT_MODE; 655 /* NOTE: holding so_lock */ 656 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 657 EXCL, 0, &vp, CRMKNOD, 0, 0); 658 if (error) { 659 if (error == EEXIST) 660 error = EADDRINUSE; 661 eprintsoline(so, error); 662 goto done; 663 } 664 /* 665 * Establish pointer from the underlying filesystem 666 * vnode to the socket node. 667 * so_ux_bound_vp and v_stream->sd_vnode form the 668 * cross-linkage between the underlying filesystem 669 * node and the socket node. 670 */ 671 ASSERT(SOTOV(so)->v_stream); 672 mutex_enter(&vp->v_lock); 673 vp->v_stream = SOTOV(so)->v_stream; 674 so->so_ux_bound_vp = vp; 675 mutex_exit(&vp->v_lock); 676 677 /* 678 * Use the vnode pointer value as a unique address 679 * (together with the magic number to avoid conflicts 680 * with implicit binds) in the transport provider. 681 */ 682 so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp; 683 so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 684 addr = &so->so_ux_laddr; 685 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 686 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 687 addrlen, 688 ((struct so_ux_addr *)addr)->soua_vp)); 689 break; 690 } 691 } /* end switch (so->so_family) */ 692 } 693 694 /* 695 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 696 * the transport can start passing up T_CONN_IND messages 697 * as soon as it receives the bind req and strsock_proto() 698 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 699 */ 700 if (flags & _SOBIND_LISTEN) { 701 if ((so->so_state & SS_ACCEPTCONN) == 0) 702 clear_acceptconn_on_err = B_TRUE; 703 save_so_backlog = so->so_backlog; 704 restore_backlog_on_err = B_TRUE; 705 so->so_state |= SS_ACCEPTCONN; 706 so->so_backlog = backlog; 707 } 708 709 /* 710 * If NL7C addr(s) have been configured check for addr/port match, 711 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 712 * 713 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 714 * family sockets only. If match mark as such. 715 */ 716 if ((nl7c_enabled && addr != NULL && 717 (so->so_family == AF_INET || so->so_family == AF_INET6) && 718 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 719 so->so_nl7c_flags == NL7C_AF_NCA) { 720 /* 721 * NL7C is not supported in non-global zones, 722 * we enforce this restriction here. 723 */ 724 if (so->so_zoneid == GLOBAL_ZONEID) { 725 /* An NL7C socket, mark it */ 726 so->so_nl7c_flags |= NL7C_ENABLED; 727 } else 728 nl7c = NULL; 729 } 730 /* 731 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 732 * for other transports we will send in a O_T_BIND_REQ. 733 */ 734 if (tcp_udp_xport && 735 (so->so_family == AF_INET || so->so_family == AF_INET6)) 736 PRIM_type = T_BIND_REQ; 737 738 bind_req.PRIM_type = PRIM_type; 739 bind_req.ADDR_length = addrlen; 740 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 741 bind_req.CONIND_number = backlog; 742 /* NOTE: holding so_lock while sleeping */ 743 mp = soallocproto2(&bind_req, sizeof (bind_req), 744 addr, addrlen, 0, _ALLOC_SLEEP); 745 so->so_state &= ~SS_LADDR_VALID; 746 /* Done using so_laddr_sa - can drop the lock */ 747 mutex_exit(&so->so_lock); 748 749 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 750 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 751 if (error) { 752 eprintsoline(so, error); 753 mutex_enter(&so->so_lock); 754 goto done; 755 } 756 757 mutex_enter(&so->so_lock); 758 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 759 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 760 if (error) { 761 eprintsoline(so, error); 762 goto done; 763 } 764 ASSERT(mp); 765 /* 766 * Even if some TPI message (e.g. T_DISCON_IND) was received in 767 * strsock_proto while the lock was dropped above, the bind 768 * is allowed to complete. 769 */ 770 771 /* Mark as bound. This will be undone if we detect errors below. */ 772 if (flags & _SOBIND_NOXLATE) { 773 ASSERT(so->so_family == AF_UNIX); 774 so->so_state |= SS_FADDR_NOXLATE; 775 } 776 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 777 so->so_state |= SS_ISBOUND; 778 ASSERT(so->so_unbind_mp); 779 780 /* note that we've already set SS_ACCEPTCONN above */ 781 782 /* 783 * Recompute addrlen - an unspecied bind sent down an 784 * address of length zero but we expect the appropriate length 785 * in return. 786 */ 787 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 788 sizeof (so->so_ux_laddr) : so->so_laddr_len); 789 790 bind_ack = (struct T_bind_ack *)mp->b_rptr; 791 /* 792 * The alignment restriction is really too strict but 793 * we want enough alignment to inspect the fields of 794 * a sockaddr_in. 795 */ 796 addr = sogetoff(mp, bind_ack->ADDR_offset, 797 bind_ack->ADDR_length, 798 __TPI_ALIGN_SIZE); 799 if (addr == NULL) { 800 freemsg(mp); 801 error = EPROTO; 802 eprintsoline(so, error); 803 goto done; 804 } 805 if (!(flags & _SOBIND_UNSPEC)) { 806 /* 807 * Verify that the transport didn't return something we 808 * did not want e.g. an address other than what we asked for. 809 * 810 * NOTE: These checks would go away if/when we switch to 811 * using the new TPI (in which the transport would fail 812 * the request instead of assigning a different address). 813 * 814 * NOTE2: For protocols that we don't know (i.e. any 815 * other than AF_INET6, AF_INET and AF_UNIX), we 816 * cannot know if the transport should be expected to 817 * return the same address as that requested. 818 * 819 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 820 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 821 * 822 * For example, in the case of netatalk it may be 823 * inappropriate for the transport to return the 824 * requested address (as it may have allocated a local 825 * port number in behaviour similar to that of an 826 * AF_INET bind request with a port number of zero). 827 * 828 * Given the definition of O_T_BIND_REQ, where the 829 * transport may bind to an address other than the 830 * requested address, it's not possible to determine 831 * whether a returned address that differs from the 832 * requested address is a reason to fail (because the 833 * requested address was not available) or succeed 834 * (because the transport allocated an appropriate 835 * address and/or port). 836 * 837 * sockfs currently requires that the transport return 838 * the requested address in the T_BIND_ACK, unless 839 * there is code here to allow for any discrepancy. 840 * Such code exists for AF_INET and AF_INET6. 841 * 842 * Netatalk chooses to return the requested address 843 * rather than the (correct) allocated address. This 844 * means that netatalk violates the TPI specification 845 * (and would not function correctly if used from a 846 * TLI application), but it does mean that it works 847 * with sockfs. 848 * 849 * As noted above, using the newer XTI bind primitive 850 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 851 * allow sockfs to be more sure about whether or not 852 * the bind request had succeeded (as transports are 853 * not permitted to bind to a different address than 854 * that requested - they must return failure). 855 * Unfortunately, support for T_BIND_REQ may not be 856 * present in all transport implementations (netatalk, 857 * for example, doesn't have it), making the 858 * transition difficult. 859 */ 860 if (bind_ack->ADDR_length != addrlen) { 861 /* Assumes that the requested address was in use */ 862 freemsg(mp); 863 error = EADDRINUSE; 864 eprintsoline(so, error); 865 goto done; 866 } 867 868 switch (so->so_family) { 869 case AF_INET6: 870 case AF_INET: { 871 sin_t *rname, *aname; 872 873 rname = (sin_t *)addr; 874 aname = (sin_t *)so->so_laddr_sa; 875 876 /* 877 * Take advantage of the alignment 878 * of sin_port and sin6_port which fall 879 * in the same place in their data structures. 880 * Just use sin_port for either address family. 881 * 882 * This may become a problem if (heaven forbid) 883 * there's a separate ipv6port_reserved... :-P 884 * 885 * Binding to port 0 has the semantics of letting 886 * the transport bind to any port. 887 * 888 * If the transport is TCP or UDP since we had sent 889 * a T_BIND_REQ we would not get a port other than 890 * what we asked for. 891 */ 892 if (tcp_udp_xport) { 893 /* 894 * Pick up the new port number if we bound to 895 * port 0. 896 */ 897 if (aname->sin_port == 0) 898 aname->sin_port = rname->sin_port; 899 so->so_state |= SS_LADDR_VALID; 900 break; 901 } 902 if (aname->sin_port != 0 && 903 aname->sin_port != rname->sin_port) { 904 freemsg(mp); 905 error = EADDRINUSE; 906 eprintsoline(so, error); 907 goto done; 908 } 909 /* 910 * Pick up the new port number if we bound to port 0. 911 */ 912 aname->sin_port = rname->sin_port; 913 914 /* 915 * Unfortunately, addresses aren't _quite_ the same. 916 */ 917 if (so->so_family == AF_INET) { 918 if (aname->sin_addr.s_addr != 919 rname->sin_addr.s_addr) { 920 freemsg(mp); 921 error = EADDRNOTAVAIL; 922 eprintsoline(so, error); 923 goto done; 924 } 925 } else { 926 sin6_t *rname6 = (sin6_t *)rname; 927 sin6_t *aname6 = (sin6_t *)aname; 928 929 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 930 &rname6->sin6_addr)) { 931 freemsg(mp); 932 error = EADDRNOTAVAIL; 933 eprintsoline(so, error); 934 goto done; 935 } 936 } 937 break; 938 } 939 case AF_UNIX: 940 if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) { 941 freemsg(mp); 942 error = EADDRINUSE; 943 eprintsoline(so, error); 944 eprintso(so, 945 ("addrlen %d, addr 0x%x, vp %p\n", 946 addrlen, *((int *)addr), 947 so->so_ux_bound_vp)); 948 goto done; 949 } 950 so->so_state |= SS_LADDR_VALID; 951 break; 952 default: 953 /* 954 * NOTE: This assumes that addresses can be 955 * byte-compared for equivalence. 956 */ 957 if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) { 958 freemsg(mp); 959 error = EADDRINUSE; 960 eprintsoline(so, error); 961 goto done; 962 } 963 /* 964 * Don't mark SS_LADDR_VALID, as we cannot be 965 * sure that the returned address is the real 966 * bound address when talking to an unknown 967 * transport. 968 */ 969 break; 970 } 971 } else { 972 /* 973 * Save for returned address for getsockname. 974 * Needed for unspecific bind unless transport supports 975 * the TI_GETMYNAME ioctl. 976 * Do this for AF_INET{,6} even though they do, as 977 * caching info here is much better performance than 978 * a TPI/STREAMS trip to the transport for getsockname. 979 * Any which can't for some reason _must_ _not_ set 980 * LADDR_VALID here for the caching version of getsockname 981 * to not break; 982 */ 983 switch (so->so_family) { 984 case AF_UNIX: 985 /* 986 * Record the address bound with the transport 987 * for use by socketpair. 988 */ 989 bcopy(addr, &so->so_ux_laddr, addrlen); 990 so->so_state |= SS_LADDR_VALID; 991 break; 992 case AF_INET: 993 case AF_INET6: 994 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 995 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 996 so->so_state |= SS_LADDR_VALID; 997 break; 998 default: 999 /* 1000 * Don't mark SS_LADDR_VALID, as we cannot be 1001 * sure that the returned address is the real 1002 * bound address when talking to an unknown 1003 * transport. 1004 */ 1005 break; 1006 } 1007 } 1008 1009 if (nl7c == NULL && (so->so_nl7c_flags & NL7C_AF_NCA) && 1010 (so->so_nl7c_flags & NL7C_ENABLED)) { 1011 /* 1012 * Was an AF_NCA bind() so add it to the addr list for 1013 * reporting purposes. 1014 */ 1015 nl7c = nl7c_add_addr(addr, addrlen); 1016 } 1017 if (nl7c != NULL) { 1018 nl7c_listener_addr(nl7c, strvp2wq(SOTOV(so))); 1019 } 1020 1021 freemsg(mp); 1022 1023 done: 1024 if (error) { 1025 /* reset state & backlog to values held on entry */ 1026 if (clear_acceptconn_on_err == B_TRUE) 1027 so->so_state &= ~SS_ACCEPTCONN; 1028 if (restore_backlog_on_err == B_TRUE) 1029 so->so_backlog = save_so_backlog; 1030 1031 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1032 int err; 1033 1034 err = sotpi_unbind(so, 0); 1035 /* LINTED - statement has no consequent: if */ 1036 if (err) { 1037 eprintsoline(so, error); 1038 } else { 1039 ASSERT(!(so->so_state & SS_ISBOUND)); 1040 } 1041 } 1042 } 1043 if (!(flags & _SOBIND_LOCK_HELD)) { 1044 so_unlock_single(so, SOLOCKED); 1045 mutex_exit(&so->so_lock); 1046 } else { 1047 /* If the caller held the lock don't release it here */ 1048 ASSERT(MUTEX_HELD(&so->so_lock)); 1049 ASSERT(so->so_flag & SOLOCKED); 1050 } 1051 return (error); 1052 } 1053 1054 /* bind the socket */ 1055 int 1056 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1057 int flags) 1058 { 1059 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1060 return (sotpi_bindlisten(so, name, namelen, 0, flags)); 1061 1062 flags &= ~_SOBIND_SOCKETPAIR; 1063 return (sotpi_bindlisten(so, name, namelen, 1, flags)); 1064 } 1065 1066 /* 1067 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1068 * address, or when listen needs to unbind and bind. 1069 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1070 * so that a sobind can pick them up. 1071 */ 1072 static int 1073 sotpi_unbind(struct sonode *so, int flags) 1074 { 1075 struct T_unbind_req unbind_req; 1076 int error = 0; 1077 mblk_t *mp; 1078 1079 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1080 so, flags, pr_state(so->so_state, so->so_mode))); 1081 1082 ASSERT(MUTEX_HELD(&so->so_lock)); 1083 ASSERT(so->so_flag & SOLOCKED); 1084 1085 if (!(so->so_state & SS_ISBOUND)) { 1086 error = EINVAL; 1087 eprintsoline(so, error); 1088 goto done; 1089 } 1090 1091 mutex_exit(&so->so_lock); 1092 1093 /* 1094 * Flush the read and write side (except stream head read queue) 1095 * and send down T_UNBIND_REQ. 1096 */ 1097 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1098 1099 unbind_req.PRIM_type = T_UNBIND_REQ; 1100 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1101 0, _ALLOC_SLEEP); 1102 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1103 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1104 mutex_enter(&so->so_lock); 1105 if (error) { 1106 eprintsoline(so, error); 1107 goto done; 1108 } 1109 1110 error = sowaitokack(so, T_UNBIND_REQ); 1111 if (error) { 1112 eprintsoline(so, error); 1113 goto done; 1114 } 1115 1116 /* 1117 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1118 * strsock_proto while the lock was dropped above, the unbind 1119 * is allowed to complete. 1120 */ 1121 if (!(flags & _SOUNBIND_REBIND)) { 1122 /* 1123 * Clear out bound address. 1124 */ 1125 vnode_t *vp; 1126 1127 if ((vp = so->so_ux_bound_vp) != NULL) { 1128 ASSERT(vp->v_stream); 1129 so->so_ux_bound_vp = NULL; 1130 vn_rele_stream(vp); 1131 } 1132 /* Clear out address */ 1133 so->so_laddr_len = 0; 1134 } 1135 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1136 done: 1137 /* If the caller held the lock don't release it here */ 1138 ASSERT(MUTEX_HELD(&so->so_lock)); 1139 ASSERT(so->so_flag & SOLOCKED); 1140 1141 return (error); 1142 } 1143 1144 /* 1145 * listen on the socket. 1146 * For TPI conforming transports this has to first unbind with the transport 1147 * and then bind again using the new backlog. 1148 */ 1149 int 1150 sotpi_listen(struct sonode *so, int backlog) 1151 { 1152 int error = 0; 1153 1154 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1155 so, backlog, pr_state(so->so_state, so->so_mode))); 1156 1157 if (so->so_serv_type == T_CLTS) 1158 return (EOPNOTSUPP); 1159 1160 /* 1161 * If the socket is ready to accept connections already, then 1162 * return without doing anything. This avoids a problem where 1163 * a second listen() call fails if a connection is pending and 1164 * leaves the socket unbound. Only when we are not unbinding 1165 * with the transport can we safely increase the backlog. 1166 */ 1167 if (so->so_state & SS_ACCEPTCONN && 1168 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1169 /*CONSTCOND*/ 1170 !solisten_tpi_tcp)) 1171 return (0); 1172 1173 if (so->so_state & SS_ISCONNECTED) 1174 return (EINVAL); 1175 1176 mutex_enter(&so->so_lock); 1177 so_lock_single(so); /* Set SOLOCKED */ 1178 1179 if (backlog < 0) 1180 backlog = 0; 1181 /* 1182 * Use the same qlimit as in BSD. BSD checks the qlimit 1183 * before queuing the next connection implying that a 1184 * listen(sock, 0) allows one connection to be queued. 1185 * BSD also uses 1.5 times the requested backlog. 1186 * 1187 * XNS Issue 4 required a strict interpretation of the backlog. 1188 * This has been waived subsequently for Issue 4 and the change 1189 * incorporated in XNS Issue 5. So we aren't required to do 1190 * anything special for XPG apps. 1191 */ 1192 if (backlog >= (INT_MAX - 1) / 3) 1193 backlog = INT_MAX; 1194 else 1195 backlog = backlog * 3 / 2 + 1; 1196 1197 /* 1198 * If the listen doesn't change the backlog we do nothing. 1199 * This avoids an EPROTO error from the transport. 1200 */ 1201 if ((so->so_state & SS_ACCEPTCONN) && 1202 so->so_backlog == backlog) 1203 goto done; 1204 1205 if (!(so->so_state & SS_ISBOUND)) { 1206 /* 1207 * Must have been explicitly bound in the UNIX domain. 1208 */ 1209 if (so->so_family == AF_UNIX) { 1210 error = EINVAL; 1211 goto done; 1212 } 1213 error = sotpi_bindlisten(so, NULL, 0, backlog, 1214 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1215 } else if (backlog > 0) { 1216 /* 1217 * AF_INET{,6} hack to avoid losing the port. 1218 * Assumes that all AF_INET{,6} transports can handle a 1219 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1220 * has already bound thus it is possible to avoid the unbind. 1221 */ 1222 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1223 /*CONSTCOND*/ 1224 !solisten_tpi_tcp)) { 1225 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1226 if (error) 1227 goto done; 1228 } 1229 error = sotpi_bindlisten(so, NULL, 0, backlog, 1230 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1231 } else { 1232 so->so_state |= SS_ACCEPTCONN; 1233 so->so_backlog = backlog; 1234 } 1235 if (error) 1236 goto done; 1237 ASSERT(so->so_state & SS_ACCEPTCONN); 1238 done: 1239 so_unlock_single(so, SOLOCKED); 1240 mutex_exit(&so->so_lock); 1241 return (error); 1242 } 1243 1244 /* 1245 * Disconnect either a specified seqno or all (-1). 1246 * The former is used on listening sockets only. 1247 * 1248 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1249 * the current use of sodisconnect(seqno == -1) is only for shutdown 1250 * so there is no point (and potentially incorrect) to unbind. 1251 */ 1252 int 1253 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1254 { 1255 struct T_discon_req discon_req; 1256 int error = 0; 1257 mblk_t *mp; 1258 1259 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1260 so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1261 1262 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1263 mutex_enter(&so->so_lock); 1264 so_lock_single(so); /* Set SOLOCKED */ 1265 } else { 1266 ASSERT(MUTEX_HELD(&so->so_lock)); 1267 ASSERT(so->so_flag & SOLOCKED); 1268 } 1269 1270 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1271 error = EINVAL; 1272 eprintsoline(so, error); 1273 goto done; 1274 } 1275 1276 mutex_exit(&so->so_lock); 1277 /* 1278 * Flush the write side (unless this is a listener) 1279 * and then send down a T_DISCON_REQ. 1280 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1281 * and other messages.) 1282 */ 1283 if (!(so->so_state & SS_ACCEPTCONN)) 1284 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1285 1286 discon_req.PRIM_type = T_DISCON_REQ; 1287 discon_req.SEQ_number = seqno; 1288 mp = soallocproto1(&discon_req, sizeof (discon_req), 1289 0, _ALLOC_SLEEP); 1290 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1291 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1292 mutex_enter(&so->so_lock); 1293 if (error) { 1294 eprintsoline(so, error); 1295 goto done; 1296 } 1297 1298 error = sowaitokack(so, T_DISCON_REQ); 1299 if (error) { 1300 eprintsoline(so, error); 1301 goto done; 1302 } 1303 /* 1304 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1305 * strsock_proto while the lock was dropped above, the disconnect 1306 * is allowed to complete. However, it is not possible to 1307 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1308 */ 1309 so->so_state &= 1310 ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID); 1311 done: 1312 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1313 so_unlock_single(so, SOLOCKED); 1314 mutex_exit(&so->so_lock); 1315 } else { 1316 /* If the caller held the lock don't release it here */ 1317 ASSERT(MUTEX_HELD(&so->so_lock)); 1318 ASSERT(so->so_flag & SOLOCKED); 1319 } 1320 return (error); 1321 } 1322 1323 int 1324 sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) 1325 { 1326 struct T_conn_ind *conn_ind; 1327 struct T_conn_res *conn_res; 1328 int error = 0; 1329 mblk_t *mp; 1330 struct sonode *nso; 1331 vnode_t *nvp; 1332 void *src; 1333 t_uscalar_t srclen; 1334 void *opt; 1335 t_uscalar_t optlen; 1336 t_scalar_t PRIM_type; 1337 t_scalar_t SEQ_number; 1338 1339 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1340 so, fflag, nsop, pr_state(so->so_state, so->so_mode))); 1341 1342 /* 1343 * Defer single-threading the accepting socket until 1344 * the T_CONN_IND has been received and parsed and the 1345 * new sonode has been opened. 1346 */ 1347 1348 /* Check that we are not already connected */ 1349 if ((so->so_state & SS_ACCEPTCONN) == 0) 1350 goto conn_bad; 1351 again: 1352 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1353 goto e_bad; 1354 1355 ASSERT(mp); 1356 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1357 /* 1358 * Save SEQ_number for error paths. 1359 */ 1360 SEQ_number = conn_ind->SEQ_number; 1361 1362 srclen = conn_ind->SRC_length; 1363 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1364 if (src == NULL) { 1365 error = EPROTO; 1366 freemsg(mp); 1367 eprintsoline(so, error); 1368 goto disconnect_unlocked; 1369 } 1370 optlen = conn_ind->OPT_length; 1371 switch (so->so_family) { 1372 case AF_INET: 1373 case AF_INET6: 1374 if ((optlen == sizeof (intptr_t)) && 1375 ((so->so_state & SS_TCP_FAST_ACCEPT) != 0)) { 1376 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1377 &opt, conn_ind->OPT_length); 1378 } else { 1379 /* 1380 * The transport (in this case TCP) hasn't sent up 1381 * a pointer to an instance for the accept fast-path. 1382 * Disable fast-path completely because the call to 1383 * sotpi_create() below would otherwise create an 1384 * incomplete TCP instance, which would lead to 1385 * problems when sockfs sends a normal T_CONN_RES 1386 * message down the new stream. 1387 */ 1388 so->so_state &= ~SS_TCP_FAST_ACCEPT; 1389 opt = NULL; 1390 optlen = 0; 1391 } 1392 break; 1393 case AF_UNIX: 1394 default: 1395 if (optlen != 0) { 1396 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1397 __TPI_ALIGN_SIZE); 1398 if (opt == NULL) { 1399 error = EPROTO; 1400 freemsg(mp); 1401 eprintsoline(so, error); 1402 goto disconnect_unlocked; 1403 } 1404 } 1405 if (so->so_family == AF_UNIX) { 1406 if (!(so->so_state & SS_FADDR_NOXLATE)) { 1407 src = NULL; 1408 srclen = 0; 1409 } 1410 /* Extract src address from options */ 1411 if (optlen != 0) 1412 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1413 } 1414 break; 1415 } 1416 1417 /* 1418 * Create the new socket. 1419 */ 1420 VN_HOLD(so->so_accessvp); 1421 nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, 1422 so->so_protocol, so->so_version, so, &error); 1423 if (nso == NULL) { 1424 ASSERT(error != 0); 1425 /* 1426 * Accept can not fail with ENOBUFS. sotpi_create 1427 * sleeps waiting for memory until a signal is caught 1428 * so return EINTR. 1429 */ 1430 freemsg(mp); 1431 if (error == ENOBUFS) 1432 error = EINTR; 1433 goto e_disc_unl; 1434 } 1435 nvp = SOTOV(nso); 1436 1437 #ifdef DEBUG 1438 /* 1439 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1440 * it's inherited early to allow debugging of the accept code itself. 1441 */ 1442 nso->so_options |= so->so_options & SO_DEBUG; 1443 #endif /* DEBUG */ 1444 1445 /* 1446 * Save the SRC address from the T_CONN_IND 1447 * for getpeername to work on AF_UNIX and on transports that do not 1448 * support TI_GETPEERNAME. 1449 * 1450 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1451 * copyin_name(). 1452 */ 1453 if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) { 1454 error = EINVAL; 1455 freemsg(mp); 1456 eprintsoline(so, error); 1457 goto disconnect_vp_unlocked; 1458 } 1459 nso->so_faddr_len = (socklen_t)srclen; 1460 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1461 bcopy(src, nso->so_faddr_sa, srclen); 1462 nso->so_state |= SS_FADDR_VALID; 1463 1464 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1465 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1466 cred_t *cr; 1467 1468 if ((cr = DB_CRED(mp)) != NULL) { 1469 crhold(cr); 1470 nso->so_peercred = cr; 1471 nso->so_cpid = DB_CPID(mp); 1472 } 1473 freemsg(mp); 1474 1475 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1476 sizeof (intptr_t), 0, _ALLOC_INTR); 1477 if (mp == NULL) { 1478 /* 1479 * Accept can not fail with ENOBUFS. 1480 * A signal was caught so return EINTR. 1481 */ 1482 error = EINTR; 1483 eprintsoline(so, error); 1484 goto disconnect_vp_unlocked; 1485 } 1486 conn_res = (struct T_conn_res *)mp->b_rptr; 1487 } else { 1488 nso->so_peercred = DB_CRED(mp); 1489 nso->so_cpid = DB_CPID(mp); 1490 DB_CRED(mp) = NULL; 1491 1492 mp->b_rptr = DB_BASE(mp); 1493 conn_res = (struct T_conn_res *)mp->b_rptr; 1494 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1495 } 1496 1497 /* 1498 * New socket must be bound at least in sockfs and, except for AF_INET, 1499 * (or AF_INET6) it also has to be bound in the transport provider. 1500 * After accepting the connection on nso so_laddr_sa will be set to 1501 * contain the same address as the listener's local address 1502 * so the address we bind to isn't important. 1503 */ 1504 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1505 /*CONSTCOND*/ 1506 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1507 /* 1508 * Optimization for AF_INET{,6} transports 1509 * that can handle a T_CONN_RES without being bound. 1510 */ 1511 mutex_enter(&nso->so_lock); 1512 so_automatic_bind(nso); 1513 mutex_exit(&nso->so_lock); 1514 } else { 1515 /* Perform NULL bind with the transport provider. */ 1516 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) { 1517 ASSERT(error != ENOBUFS); 1518 freemsg(mp); 1519 eprintsoline(nso, error); 1520 goto disconnect_vp_unlocked; 1521 } 1522 } 1523 1524 /* 1525 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1526 * so that any data arriving on the new socket will cause the 1527 * appropriate signals to be delivered for the new socket. 1528 * 1529 * No other thread (except strsock_proto and strsock_misc) 1530 * can access the new socket thus we relax the locking. 1531 */ 1532 nso->so_pgrp = so->so_pgrp; 1533 nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE); 1534 1535 if (nso->so_pgrp != 0) { 1536 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1537 eprintsoline(nso, error); 1538 error = 0; 1539 nso->so_pgrp = 0; 1540 } 1541 } 1542 1543 /* 1544 * Make note of the socket level options. TCP and IP level options 1545 * are already inherited. We could do all this after accept is 1546 * successful but doing it here simplifies code and no harm done 1547 * for error case. 1548 */ 1549 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1550 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1551 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1552 nso->so_sndbuf = so->so_sndbuf; 1553 nso->so_rcvbuf = so->so_rcvbuf; 1554 if (nso->so_options & SO_LINGER) 1555 nso->so_linger = so->so_linger; 1556 1557 if ((so->so_state & SS_TCP_FAST_ACCEPT) != 0) { 1558 mblk_t *ack_mp; 1559 1560 ASSERT(opt != NULL); 1561 1562 conn_res->OPT_length = optlen; 1563 conn_res->OPT_offset = MBLKL(mp); 1564 bcopy(&opt, mp->b_wptr, optlen); 1565 mp->b_wptr += optlen; 1566 conn_res->PRIM_type = T_CONN_RES; 1567 conn_res->ACCEPTOR_id = 0; 1568 PRIM_type = T_CONN_RES; 1569 1570 /* Send down the T_CONN_RES on acceptor STREAM */ 1571 error = kstrputmsg(SOTOV(nso), mp, NULL, 1572 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1573 if (error) { 1574 mutex_enter(&so->so_lock); 1575 so_lock_single(so); 1576 eprintsoline(so, error); 1577 goto disconnect_vp; 1578 } 1579 mutex_enter(&nso->so_lock); 1580 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1581 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1582 if (error) { 1583 mutex_exit(&nso->so_lock); 1584 mutex_enter(&so->so_lock); 1585 so_lock_single(so); 1586 eprintsoline(so, error); 1587 goto disconnect_vp; 1588 } 1589 if (nso->so_family == AF_INET) { 1590 sin_t *sin; 1591 1592 sin = (sin_t *)(ack_mp->b_rptr + 1593 sizeof (struct T_ok_ack)); 1594 bcopy(sin, nso->so_laddr_sa, sizeof (sin_t)); 1595 nso->so_laddr_len = sizeof (sin_t); 1596 } else { 1597 sin6_t *sin6; 1598 1599 sin6 = (sin6_t *)(ack_mp->b_rptr + 1600 sizeof (struct T_ok_ack)); 1601 bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t)); 1602 nso->so_laddr_len = sizeof (sin6_t); 1603 } 1604 freemsg(ack_mp); 1605 1606 nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID; 1607 nso->so_priv = opt; 1608 1609 if (so->so_nl7c_flags & NL7C_ENABLED) { 1610 /* 1611 * An NL7C marked listen()er so the new socket 1612 * inherits the listen()er's NL7C state. 1613 * 1614 * When calling NL7C to process the new socket 1615 * pass the nonblocking i/o state of the listen 1616 * socket as this is the context we are in. 1617 */ 1618 nso->so_nl7c_flags = so->so_nl7c_flags; 1619 if (nl7c_process(nso, 1620 (nso->so_state & (SS_NONBLOCK|SS_NDELAY)), 1621 (int)((tcp_t *)nso->so_priv)->tcp_mss)) { 1622 /* 1623 * NL7C has completed processing on the 1624 * socket, close the socket and back to 1625 * the top to await the next T_CONN_IND. 1626 */ 1627 mutex_exit(&nso->so_lock); 1628 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1629 CRED()); 1630 VN_RELE(nvp); 1631 goto again; 1632 } 1633 /* Pass the new socket out */ 1634 } 1635 1636 mutex_exit(&nso->so_lock); 1637 1638 /* 1639 * Pass out new socket. 1640 */ 1641 if (nsop != NULL) 1642 *nsop = nso; 1643 1644 return (0); 1645 } 1646 1647 /* 1648 * Copy local address from listener. 1649 */ 1650 nso->so_laddr_len = so->so_laddr_len; 1651 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1652 bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len); 1653 nso->so_state |= SS_LADDR_VALID; 1654 1655 /* 1656 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1657 * which don't support the FireEngine accept fast-path. It is also 1658 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1659 * again. Neither sockfs nor TCP attempt to find out if some other 1660 * random module has been inserted in between (in which case we 1661 * should follow TLI accept behaviour). We blindly assume the worst 1662 * case and revert back to old behaviour i.e. TCP will not send us 1663 * any option (eager) and the accept should happen on the listener 1664 * queue. Any queued T_conn_ind have already got their options removed 1665 * by so_sock2_stream() when "sockmod" was I_POP'd. 1666 */ 1667 /* 1668 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1669 */ 1670 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1671 #ifdef _ILP32 1672 queue_t *q; 1673 1674 /* 1675 * Find read queue in driver 1676 * Can safely do this since we "own" nso/nvp. 1677 */ 1678 q = strvp2wq(nvp)->q_next; 1679 while (SAMESTR(q)) 1680 q = q->q_next; 1681 q = RD(q); 1682 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1683 #else 1684 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1685 #endif /* _ILP32 */ 1686 conn_res->PRIM_type = O_T_CONN_RES; 1687 PRIM_type = O_T_CONN_RES; 1688 } else { 1689 conn_res->ACCEPTOR_id = nso->so_acceptor_id; 1690 conn_res->PRIM_type = T_CONN_RES; 1691 PRIM_type = T_CONN_RES; 1692 } 1693 conn_res->SEQ_number = SEQ_number; 1694 conn_res->OPT_length = 0; 1695 conn_res->OPT_offset = 0; 1696 1697 mutex_enter(&so->so_lock); 1698 so_lock_single(so); /* Set SOLOCKED */ 1699 mutex_exit(&so->so_lock); 1700 1701 error = kstrputmsg(SOTOV(so), mp, NULL, 1702 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1703 mutex_enter(&so->so_lock); 1704 if (error) { 1705 eprintsoline(so, error); 1706 goto disconnect_vp; 1707 } 1708 error = sowaitokack(so, PRIM_type); 1709 if (error) { 1710 eprintsoline(so, error); 1711 goto disconnect_vp; 1712 } 1713 so_unlock_single(so, SOLOCKED); 1714 mutex_exit(&so->so_lock); 1715 1716 nso->so_state |= SS_ISCONNECTED; 1717 1718 /* 1719 * Pass out new socket. 1720 */ 1721 if (nsop != NULL) 1722 *nsop = nso; 1723 1724 return (0); 1725 1726 1727 eproto_disc_unl: 1728 error = EPROTO; 1729 e_disc_unl: 1730 eprintsoline(so, error); 1731 goto disconnect_unlocked; 1732 1733 pr_disc_vp_unl: 1734 eprintsoline(so, error); 1735 disconnect_vp_unlocked: 1736 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 1737 VN_RELE(nvp); 1738 disconnect_unlocked: 1739 (void) sodisconnect(so, SEQ_number, 0); 1740 return (error); 1741 1742 pr_disc_vp: 1743 eprintsoline(so, error); 1744 disconnect_vp: 1745 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 1746 so_unlock_single(so, SOLOCKED); 1747 mutex_exit(&so->so_lock); 1748 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 1749 VN_RELE(nvp); 1750 return (error); 1751 1752 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 1753 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 1754 ? EOPNOTSUPP : EINVAL; 1755 e_bad: 1756 eprintsoline(so, error); 1757 return (error); 1758 } 1759 1760 /* 1761 * connect a socket. 1762 * 1763 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 1764 * unconnect (by specifying a null address). 1765 */ 1766 int 1767 sotpi_connect(struct sonode *so, 1768 const struct sockaddr *name, 1769 socklen_t namelen, 1770 int fflag, 1771 int flags) 1772 { 1773 struct T_conn_req conn_req; 1774 int error = 0; 1775 mblk_t *mp; 1776 void *src; 1777 socklen_t srclen; 1778 void *addr; 1779 socklen_t addrlen; 1780 boolean_t need_unlock; 1781 1782 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 1783 so, name, namelen, fflag, flags, 1784 pr_state(so->so_state, so->so_mode))); 1785 1786 /* 1787 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 1788 * avoid sleeping for memory with SOLOCKED held. 1789 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen 1790 * + sizeof (struct T_opthdr). 1791 * (the AF_UNIX so_ux_addr_xlate() does not make the address 1792 * exceed so_faddr_maxlen). 1793 */ 1794 mp = soallocproto(sizeof (struct T_conn_req) + 1795 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 1796 if (mp == NULL) { 1797 /* 1798 * Connect can not fail with ENOBUFS. A signal was 1799 * caught so return EINTR. 1800 */ 1801 error = EINTR; 1802 eprintsoline(so, error); 1803 return (error); 1804 } 1805 1806 mutex_enter(&so->so_lock); 1807 /* 1808 * Make sure that there is a preallocated unbind_req 1809 * message before any binding. This message allocated when 1810 * the socket is created but it might be have been 1811 * consumed. 1812 */ 1813 if (so->so_unbind_mp == NULL) { 1814 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 1815 /* NOTE: holding so_lock while sleeping */ 1816 so->so_unbind_mp = 1817 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 1818 if (so->so_unbind_mp == NULL) { 1819 error = EINTR; 1820 need_unlock = B_FALSE; 1821 goto done; 1822 } 1823 } 1824 1825 so_lock_single(so); /* Set SOLOCKED */ 1826 need_unlock = B_TRUE; 1827 1828 /* 1829 * Can't have done a listen before connecting. 1830 */ 1831 if (so->so_state & SS_ACCEPTCONN) { 1832 error = EOPNOTSUPP; 1833 goto done; 1834 } 1835 1836 /* 1837 * Must be bound with the transport 1838 */ 1839 if (!(so->so_state & SS_ISBOUND)) { 1840 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 1841 /*CONSTCOND*/ 1842 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 1843 /* 1844 * Optimization for AF_INET{,6} transports 1845 * that can handle a T_CONN_REQ without being bound. 1846 */ 1847 so_automatic_bind(so); 1848 } else { 1849 error = sotpi_bind(so, NULL, 0, 1850 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 1851 if (error) 1852 goto done; 1853 } 1854 ASSERT(so->so_state & SS_ISBOUND); 1855 flags |= _SOCONNECT_DID_BIND; 1856 } 1857 1858 /* 1859 * Handle a connect to a name parameter of type AF_UNSPEC like a 1860 * connect to a null address. This is the portable method to 1861 * unconnect a socket. 1862 */ 1863 if ((namelen >= sizeof (sa_family_t)) && 1864 (name->sa_family == AF_UNSPEC)) { 1865 name = NULL; 1866 namelen = 0; 1867 } 1868 1869 /* 1870 * Check that we are not already connected. 1871 * A connection-oriented socket cannot be reconnected. 1872 * A connected connection-less socket can be 1873 * - connected to a different address by a subsequent connect 1874 * - "unconnected" by a connect to the NULL address 1875 */ 1876 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 1877 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 1878 if (so->so_mode & SM_CONNREQUIRED) { 1879 /* Connection-oriented socket */ 1880 error = so->so_state & SS_ISCONNECTED ? 1881 EISCONN : EALREADY; 1882 goto done; 1883 } 1884 /* Connection-less socket */ 1885 if (name == NULL) { 1886 /* 1887 * Remove the connected state and clear SO_DGRAM_ERRIND 1888 * since it was set when the socket was connected. 1889 * If this is UDP also send down a T_DISCON_REQ. 1890 */ 1891 int val; 1892 1893 if ((so->so_family == AF_INET || 1894 so->so_family == AF_INET6) && 1895 (so->so_type == SOCK_DGRAM || 1896 so->so_type == SOCK_RAW) && 1897 /*CONSTCOND*/ 1898 !soconnect_tpi_udp) { 1899 /* XXX What about implicitly unbinding here? */ 1900 error = sodisconnect(so, -1, 1901 _SODISCONNECT_LOCK_HELD); 1902 } else { 1903 so->so_state &= 1904 ~(SS_ISCONNECTED | SS_ISCONNECTING | 1905 SS_FADDR_VALID); 1906 so->so_faddr_len = 0; 1907 } 1908 1909 so_unlock_single(so, SOLOCKED); 1910 mutex_exit(&so->so_lock); 1911 1912 val = 0; 1913 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 1914 &val, (t_uscalar_t)sizeof (val)); 1915 1916 mutex_enter(&so->so_lock); 1917 so_lock_single(so); /* Set SOLOCKED */ 1918 goto done; 1919 } 1920 } 1921 ASSERT(so->so_state & SS_ISBOUND); 1922 1923 if (name == NULL || namelen == 0) { 1924 error = EINVAL; 1925 goto done; 1926 } 1927 /* 1928 * Mark the socket if so_faddr_sa represents the transport level 1929 * address. 1930 */ 1931 if (flags & _SOCONNECT_NOXLATE) { 1932 struct sockaddr_ux *soaddr_ux; 1933 1934 ASSERT(so->so_family == AF_UNIX); 1935 if (namelen != sizeof (struct sockaddr_ux)) { 1936 error = EINVAL; 1937 goto done; 1938 } 1939 soaddr_ux = (struct sockaddr_ux *)name; 1940 name = (struct sockaddr *)&soaddr_ux->sou_addr; 1941 namelen = sizeof (soaddr_ux->sou_addr); 1942 so->so_state |= SS_FADDR_NOXLATE; 1943 } 1944 1945 /* 1946 * Length and family checks. 1947 */ 1948 error = so_addr_verify(so, name, namelen); 1949 if (error) 1950 goto bad; 1951 1952 /* 1953 * Save foreign address. Needed for AF_UNIX as well as 1954 * transport providers that do not support TI_GETPEERNAME. 1955 * Also used for cached foreign address for TCP and UDP. 1956 */ 1957 if (namelen > (t_uscalar_t)so->so_faddr_maxlen) { 1958 error = EINVAL; 1959 goto done; 1960 } 1961 so->so_faddr_len = (socklen_t)namelen; 1962 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1963 bcopy(name, so->so_faddr_sa, namelen); 1964 so->so_state |= SS_FADDR_VALID; 1965 1966 if (so->so_family == AF_UNIX) { 1967 if (so->so_state & SS_FADDR_NOXLATE) { 1968 /* 1969 * Already have a transport internal address. Do not 1970 * pass any (transport internal) source address. 1971 */ 1972 addr = so->so_faddr_sa; 1973 addrlen = (t_uscalar_t)so->so_faddr_len; 1974 src = NULL; 1975 srclen = 0; 1976 } else { 1977 /* 1978 * Pass the sockaddr_un source address as an option 1979 * and translate the remote address. 1980 * Holding so_lock thus so_laddr_sa can not change. 1981 */ 1982 src = so->so_laddr_sa; 1983 srclen = (t_uscalar_t)so->so_laddr_len; 1984 dprintso(so, 1, 1985 ("sotpi_connect UNIX: srclen %d, src %p\n", 1986 srclen, src)); 1987 error = so_ux_addr_xlate(so, 1988 so->so_faddr_sa, (socklen_t)so->so_faddr_len, 1989 (flags & _SOCONNECT_XPG4_2), 1990 &addr, &addrlen); 1991 if (error) 1992 goto bad; 1993 } 1994 } else { 1995 addr = so->so_faddr_sa; 1996 addrlen = (t_uscalar_t)so->so_faddr_len; 1997 src = NULL; 1998 srclen = 0; 1999 } 2000 /* 2001 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2002 * option which asks the transport provider to send T_UDERR_IND 2003 * messages. These T_UDERR_IND messages are used to return connected 2004 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2005 * 2006 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2007 * we send down a T_CONN_REQ. This is needed to let the 2008 * transport assign a local address that is consistent with 2009 * the remote address. Applications depend on a getsockname() 2010 * after a connect() to retrieve the "source" IP address for 2011 * the connected socket. Invalidate the cached local address 2012 * to force getsockname() to enquire of the transport. 2013 */ 2014 if (!(so->so_mode & SM_CONNREQUIRED)) { 2015 /* 2016 * Datagram socket. 2017 */ 2018 int32_t val; 2019 2020 so_unlock_single(so, SOLOCKED); 2021 mutex_exit(&so->so_lock); 2022 2023 val = 1; 2024 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2025 &val, (t_uscalar_t)sizeof (val)); 2026 2027 mutex_enter(&so->so_lock); 2028 so_lock_single(so); /* Set SOLOCKED */ 2029 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2030 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2031 soconnect_tpi_udp) { 2032 soisconnected(so); 2033 goto done; 2034 } 2035 /* 2036 * Send down T_CONN_REQ etc. 2037 * Clear fflag to avoid returning EWOULDBLOCK. 2038 */ 2039 fflag = 0; 2040 ASSERT(so->so_family != AF_UNIX); 2041 so->so_state &= ~SS_LADDR_VALID; 2042 } else if (so->so_laddr_len != 0) { 2043 /* 2044 * If the local address or port was "any" then it may be 2045 * changed by the transport as a result of the 2046 * connect. Invalidate the cached version if we have one. 2047 */ 2048 switch (so->so_family) { 2049 case AF_INET: 2050 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t)); 2051 if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr == 2052 INADDR_ANY || 2053 ((sin_t *)so->so_laddr_sa)->sin_port == 0) 2054 so->so_state &= ~SS_LADDR_VALID; 2055 break; 2056 2057 case AF_INET6: 2058 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t)); 2059 if (IN6_IS_ADDR_UNSPECIFIED( 2060 &((sin6_t *)so->so_laddr_sa) ->sin6_addr) || 2061 IN6_IS_ADDR_V4MAPPED_ANY( 2062 &((sin6_t *)so->so_laddr_sa)->sin6_addr) || 2063 ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) 2064 so->so_state &= ~SS_LADDR_VALID; 2065 break; 2066 2067 default: 2068 break; 2069 } 2070 } 2071 2072 /* 2073 * Check for failure of an earlier call 2074 */ 2075 if (so->so_error != 0) 2076 goto so_bad; 2077 2078 /* 2079 * Send down T_CONN_REQ. Message was allocated above. 2080 */ 2081 conn_req.PRIM_type = T_CONN_REQ; 2082 conn_req.DEST_length = addrlen; 2083 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2084 if (srclen == 0) { 2085 conn_req.OPT_length = 0; 2086 conn_req.OPT_offset = 0; 2087 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2088 soappendmsg(mp, addr, addrlen); 2089 } else { 2090 /* 2091 * There is a AF_UNIX sockaddr_un to include as a source 2092 * address option. 2093 */ 2094 struct T_opthdr toh; 2095 2096 toh.level = SOL_SOCKET; 2097 toh.name = SO_SRCADDR; 2098 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2099 toh.status = 0; 2100 conn_req.OPT_length = 2101 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2102 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2103 _TPI_ALIGN_TOPT(addrlen)); 2104 2105 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2106 soappendmsg(mp, addr, addrlen); 2107 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2108 soappendmsg(mp, &toh, sizeof (toh)); 2109 soappendmsg(mp, src, srclen); 2110 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2111 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2112 } 2113 /* 2114 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2115 * in order to have the right state when the T_CONN_CON shows up. 2116 */ 2117 soisconnecting(so); 2118 mutex_exit(&so->so_lock); 2119 2120 #ifdef C2_AUDIT 2121 if (audit_active) 2122 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2123 #endif /* C2_AUDIT */ 2124 2125 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2126 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2127 mp = NULL; 2128 mutex_enter(&so->so_lock); 2129 if (error != 0) 2130 goto bad; 2131 2132 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2133 goto bad; 2134 2135 /* Allow other threads to access the socket */ 2136 so_unlock_single(so, SOLOCKED); 2137 need_unlock = B_FALSE; 2138 2139 /* 2140 * Wait until we get a T_CONN_CON or an error 2141 */ 2142 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2143 so_lock_single(so); /* Set SOLOCKED */ 2144 need_unlock = B_TRUE; 2145 } 2146 2147 done: 2148 freemsg(mp); 2149 switch (error) { 2150 case EINPROGRESS: 2151 case EALREADY: 2152 case EISCONN: 2153 case EINTR: 2154 /* Non-fatal errors */ 2155 so->so_state &= ~SS_LADDR_VALID; 2156 /* FALLTHRU */ 2157 case 0: 2158 break; 2159 2160 case EHOSTUNREACH: 2161 if (flags & _SOCONNECT_XPG4_2) { 2162 /* 2163 * X/Open specification contains a requirement that 2164 * ENETUNREACH be returned but does not require 2165 * EHOSTUNREACH. In order to keep the test suite 2166 * happy we mess with the errno here. 2167 */ 2168 error = ENETUNREACH; 2169 } 2170 /* FALLTHRU */ 2171 2172 default: 2173 ASSERT(need_unlock); 2174 /* 2175 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2176 * and invalidate local-address cache 2177 */ 2178 so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID); 2179 /* A discon_ind might have already unbound us */ 2180 if ((flags & _SOCONNECT_DID_BIND) && 2181 (so->so_state & SS_ISBOUND)) { 2182 int err; 2183 2184 err = sotpi_unbind(so, 0); 2185 /* LINTED - statement has no conseq */ 2186 if (err) { 2187 eprintsoline(so, err); 2188 } 2189 } 2190 break; 2191 } 2192 if (need_unlock) 2193 so_unlock_single(so, SOLOCKED); 2194 mutex_exit(&so->so_lock); 2195 return (error); 2196 2197 so_bad: error = sogeterr(so); 2198 bad: eprintsoline(so, error); 2199 goto done; 2200 } 2201 2202 int 2203 sotpi_shutdown(struct sonode *so, int how) 2204 { 2205 struct T_ordrel_req ordrel_req; 2206 mblk_t *mp; 2207 uint_t old_state, state_change; 2208 int error = 0; 2209 2210 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2211 so, how, pr_state(so->so_state, so->so_mode))); 2212 2213 mutex_enter(&so->so_lock); 2214 so_lock_single(so); /* Set SOLOCKED */ 2215 2216 /* 2217 * SunOS 4.X has no check for datagram sockets. 2218 * 5.X checks that it is connected (ENOTCONN) 2219 * X/Open requires that we check the connected state. 2220 */ 2221 if (!(so->so_state & SS_ISCONNECTED)) { 2222 if (!xnet_skip_checks) { 2223 error = ENOTCONN; 2224 if (xnet_check_print) { 2225 printf("sockfs: X/Open shutdown check " 2226 "caused ENOTCONN\n"); 2227 } 2228 } 2229 goto done; 2230 } 2231 /* 2232 * Record the current state and then perform any state changes. 2233 * Then use the difference between the old and new states to 2234 * determine which messages need to be sent. 2235 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2236 * duplicate calls to shutdown(). 2237 */ 2238 old_state = so->so_state; 2239 2240 switch (how) { 2241 case 0: 2242 socantrcvmore(so); 2243 break; 2244 case 1: 2245 socantsendmore(so); 2246 break; 2247 case 2: 2248 socantsendmore(so); 2249 socantrcvmore(so); 2250 break; 2251 default: 2252 error = EINVAL; 2253 goto done; 2254 } 2255 2256 /* 2257 * Assumes that the SS_CANT* flags are never cleared in the above code. 2258 */ 2259 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2260 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2261 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2262 2263 switch (state_change) { 2264 case 0: 2265 dprintso(so, 1, 2266 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2267 so->so_state)); 2268 goto done; 2269 2270 case SS_CANTRCVMORE: 2271 mutex_exit(&so->so_lock); 2272 strseteof(SOTOV(so), 1); 2273 /* 2274 * strseteof takes care of read side wakeups, 2275 * pollwakeups, and signals. 2276 */ 2277 /* 2278 * Get the read lock before flushing data to avoid problems 2279 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2280 */ 2281 mutex_enter(&so->so_lock); 2282 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2283 mutex_exit(&so->so_lock); 2284 2285 /* Flush read side queue */ 2286 strflushrq(SOTOV(so), FLUSHALL); 2287 2288 mutex_enter(&so->so_lock); 2289 so_unlock_read(so); /* Clear SOREADLOCKED */ 2290 break; 2291 2292 case SS_CANTSENDMORE: 2293 mutex_exit(&so->so_lock); 2294 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2295 mutex_enter(&so->so_lock); 2296 break; 2297 2298 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2299 mutex_exit(&so->so_lock); 2300 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2301 strseteof(SOTOV(so), 1); 2302 /* 2303 * strseteof takes care of read side wakeups, 2304 * pollwakeups, and signals. 2305 */ 2306 /* 2307 * Get the read lock before flushing data to avoid problems 2308 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2309 */ 2310 mutex_enter(&so->so_lock); 2311 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2312 mutex_exit(&so->so_lock); 2313 2314 /* Flush read side queue */ 2315 strflushrq(SOTOV(so), FLUSHALL); 2316 2317 mutex_enter(&so->so_lock); 2318 so_unlock_read(so); /* Clear SOREADLOCKED */ 2319 break; 2320 } 2321 2322 ASSERT(MUTEX_HELD(&so->so_lock)); 2323 2324 /* 2325 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2326 * was set due to this call and the new state has both of them set: 2327 * Send the AF_UNIX close indication 2328 * For T_COTS send a discon_ind 2329 * 2330 * If cantsend was set due to this call: 2331 * For T_COTSORD send an ordrel_ind 2332 * 2333 * Note that for T_CLTS there is no message sent here. 2334 */ 2335 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2336 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2337 /* 2338 * For SunOS 4.X compatibility we tell the other end 2339 * that we are unable to receive at this point. 2340 */ 2341 if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS) 2342 so_unix_close(so); 2343 2344 if (so->so_serv_type == T_COTS) 2345 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2346 } 2347 if ((state_change & SS_CANTSENDMORE) && 2348 (so->so_serv_type == T_COTS_ORD)) { 2349 /* Send an orderly release */ 2350 ordrel_req.PRIM_type = T_ORDREL_REQ; 2351 2352 mutex_exit(&so->so_lock); 2353 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2354 0, _ALLOC_SLEEP); 2355 /* 2356 * Send down the T_ORDREL_REQ even if there is flow control. 2357 * This prevents shutdown from blocking. 2358 * Note that there is no T_OK_ACK for ordrel_req. 2359 */ 2360 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2361 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2362 mutex_enter(&so->so_lock); 2363 if (error) { 2364 eprintsoline(so, error); 2365 goto done; 2366 } 2367 } 2368 2369 done: 2370 so_unlock_single(so, SOLOCKED); 2371 mutex_exit(&so->so_lock); 2372 return (error); 2373 } 2374 2375 /* 2376 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2377 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2378 * that we have closed. 2379 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2380 * T_UNITDATA_REQ containing the same option. 2381 * 2382 * For SOCK_DGRAM half-connections (somebody connected to this end 2383 * but this end is not connect) we don't know where to send any 2384 * SO_UNIX_CLOSE. 2385 * 2386 * We have to ignore stream head errors just in case there has been 2387 * a shutdown(output). 2388 * Ignore any flow control to try to get the message more quickly to the peer. 2389 * While locally ignoring flow control solves the problem when there 2390 * is only the loopback transport on the stream it would not provide 2391 * the correct AF_UNIX socket semantics when one or more modules have 2392 * been pushed. 2393 */ 2394 void 2395 so_unix_close(struct sonode *so) 2396 { 2397 int error; 2398 struct T_opthdr toh; 2399 mblk_t *mp; 2400 2401 ASSERT(MUTEX_HELD(&so->so_lock)); 2402 2403 ASSERT(so->so_family == AF_UNIX); 2404 2405 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2406 (SS_ISCONNECTED|SS_ISBOUND)) 2407 return; 2408 2409 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2410 so, pr_state(so->so_state, so->so_mode))); 2411 2412 toh.level = SOL_SOCKET; 2413 toh.name = SO_UNIX_CLOSE; 2414 2415 /* zero length + header */ 2416 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2417 toh.status = 0; 2418 2419 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2420 struct T_optdata_req tdr; 2421 2422 tdr.PRIM_type = T_OPTDATA_REQ; 2423 tdr.DATA_flag = 0; 2424 2425 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2426 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2427 2428 /* NOTE: holding so_lock while sleeping */ 2429 mp = soallocproto2(&tdr, sizeof (tdr), 2430 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2431 } else { 2432 struct T_unitdata_req tudr; 2433 void *addr; 2434 socklen_t addrlen; 2435 void *src; 2436 socklen_t srclen; 2437 struct T_opthdr toh2; 2438 t_scalar_t size; 2439 2440 /* Connecteded DGRAM socket */ 2441 2442 /* 2443 * For AF_UNIX the destination address is translated to 2444 * an internal name and the source address is passed as 2445 * an option. 2446 */ 2447 /* 2448 * Length and family checks. 2449 */ 2450 error = so_addr_verify(so, so->so_faddr_sa, 2451 (t_uscalar_t)so->so_faddr_len); 2452 if (error) { 2453 eprintsoline(so, error); 2454 return; 2455 } 2456 if (so->so_state & SS_FADDR_NOXLATE) { 2457 /* 2458 * Already have a transport internal address. Do not 2459 * pass any (transport internal) source address. 2460 */ 2461 addr = so->so_faddr_sa; 2462 addrlen = (t_uscalar_t)so->so_faddr_len; 2463 src = NULL; 2464 srclen = 0; 2465 } else { 2466 /* 2467 * Pass the sockaddr_un source address as an option 2468 * and translate the remote address. 2469 * Holding so_lock thus so_laddr_sa can not change. 2470 */ 2471 src = so->so_laddr_sa; 2472 srclen = (socklen_t)so->so_laddr_len; 2473 dprintso(so, 1, 2474 ("so_ux_close: srclen %d, src %p\n", 2475 srclen, src)); 2476 error = so_ux_addr_xlate(so, 2477 so->so_faddr_sa, 2478 (socklen_t)so->so_faddr_len, 0, 2479 &addr, &addrlen); 2480 if (error) { 2481 eprintsoline(so, error); 2482 return; 2483 } 2484 } 2485 tudr.PRIM_type = T_UNITDATA_REQ; 2486 tudr.DEST_length = addrlen; 2487 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2488 if (srclen == 0) { 2489 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2490 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2491 _TPI_ALIGN_TOPT(addrlen)); 2492 2493 size = tudr.OPT_offset + tudr.OPT_length; 2494 /* NOTE: holding so_lock while sleeping */ 2495 mp = soallocproto2(&tudr, sizeof (tudr), 2496 addr, addrlen, size, _ALLOC_SLEEP); 2497 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2498 soappendmsg(mp, &toh, sizeof (toh)); 2499 } else { 2500 /* 2501 * There is a AF_UNIX sockaddr_un to include as a 2502 * source address option. 2503 */ 2504 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2505 _TPI_ALIGN_TOPT(srclen)); 2506 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2507 _TPI_ALIGN_TOPT(addrlen)); 2508 2509 toh2.level = SOL_SOCKET; 2510 toh2.name = SO_SRCADDR; 2511 toh2.len = (t_uscalar_t)(srclen + 2512 sizeof (struct T_opthdr)); 2513 toh2.status = 0; 2514 2515 size = tudr.OPT_offset + tudr.OPT_length; 2516 2517 /* NOTE: holding so_lock while sleeping */ 2518 mp = soallocproto2(&tudr, sizeof (tudr), 2519 addr, addrlen, size, _ALLOC_SLEEP); 2520 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2521 soappendmsg(mp, &toh, sizeof (toh)); 2522 soappendmsg(mp, &toh2, sizeof (toh2)); 2523 soappendmsg(mp, src, srclen); 2524 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2525 } 2526 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2527 } 2528 mutex_exit(&so->so_lock); 2529 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2530 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2531 mutex_enter(&so->so_lock); 2532 } 2533 2534 /* 2535 * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 2536 */ 2537 int 2538 sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) 2539 { 2540 mblk_t *mp, *nmp; 2541 int error; 2542 2543 dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", so, msg, flags)); 2544 2545 /* 2546 * There is never any oob data with addresses or control since 2547 * the T_EXDATA_IND does not carry any options. 2548 */ 2549 msg->msg_controllen = 0; 2550 msg->msg_namelen = 0; 2551 2552 mutex_enter(&so->so_lock); 2553 ASSERT(so_verify_oobstate(so)); 2554 if ((so->so_options & SO_OOBINLINE) || 2555 (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 2556 dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 2557 mutex_exit(&so->so_lock); 2558 return (EINVAL); 2559 } 2560 if (!(so->so_state & SS_HAVEOOBDATA)) { 2561 dprintso(so, 1, ("sorecvoob: no data yet\n")); 2562 mutex_exit(&so->so_lock); 2563 return (EWOULDBLOCK); 2564 } 2565 ASSERT(so->so_oobmsg != NULL); 2566 mp = so->so_oobmsg; 2567 if (flags & MSG_PEEK) { 2568 /* 2569 * Since recv* can not return ENOBUFS we can not use dupmsg. 2570 * Instead we revert to the consolidation private 2571 * allocb_wait plus bcopy. 2572 */ 2573 mblk_t *mp1; 2574 2575 mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 2576 ASSERT(mp1); 2577 2578 while (mp != NULL) { 2579 ssize_t size; 2580 2581 size = MBLKL(mp); 2582 bcopy(mp->b_rptr, mp1->b_wptr, size); 2583 mp1->b_wptr += size; 2584 ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 2585 mp = mp->b_cont; 2586 } 2587 mp = mp1; 2588 } else { 2589 /* 2590 * Update the state indicating that the data has been consumed. 2591 * Keep SS_OOBPEND set until data is consumed past the mark. 2592 */ 2593 so->so_oobmsg = NULL; 2594 so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 2595 } 2596 dprintso(so, 1, 2597 ("after recvoob(%p): counts %d/%d state %s\n", 2598 so, so->so_oobsigcnt, 2599 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2600 ASSERT(so_verify_oobstate(so)); 2601 mutex_exit(&so->so_lock); 2602 2603 error = 0; 2604 nmp = mp; 2605 while (nmp != NULL && uiop->uio_resid > 0) { 2606 ssize_t n = MBLKL(nmp); 2607 2608 n = MIN(n, uiop->uio_resid); 2609 if (n > 0) 2610 error = uiomove(nmp->b_rptr, n, 2611 UIO_READ, uiop); 2612 if (error) 2613 break; 2614 nmp = nmp->b_cont; 2615 } 2616 freemsg(mp); 2617 return (error); 2618 } 2619 2620 /* 2621 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2622 * In addition, the caller typically verifies that there is some 2623 * potential state to clear by checking 2624 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2625 * before calling this routine. 2626 * Note that such a check can be made without holding so_lock since 2627 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2628 * decrements so_oobsigcnt. 2629 * 2630 * When data is read *after* the point that all pending 2631 * oob data has been consumed the oob indication is cleared. 2632 * 2633 * This logic keeps select/poll returning POLLRDBAND and 2634 * SIOCATMARK returning true until we have read past 2635 * the mark. 2636 */ 2637 static void 2638 sorecv_update_oobstate(struct sonode *so) 2639 { 2640 mutex_enter(&so->so_lock); 2641 ASSERT(so_verify_oobstate(so)); 2642 dprintso(so, 1, 2643 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2644 so->so_oobsigcnt, 2645 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2646 if (so->so_oobsigcnt == 0) { 2647 /* No more pending oob indications */ 2648 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2649 freemsg(so->so_oobmsg); 2650 so->so_oobmsg = NULL; 2651 } 2652 ASSERT(so_verify_oobstate(so)); 2653 mutex_exit(&so->so_lock); 2654 } 2655 2656 /* 2657 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2658 */ 2659 static int 2660 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2661 { 2662 int error = 0; 2663 mblk_t *tmp = NULL; 2664 mblk_t *pmp = NULL; 2665 mblk_t *nmp = so->so_nl7c_rcv_mp; 2666 2667 ASSERT(nmp != NULL); 2668 2669 while (nmp != NULL && uiop->uio_resid > 0) { 2670 ssize_t n; 2671 2672 if (DB_TYPE(nmp) == M_DATA) { 2673 /* 2674 * We have some data, uiomove up to resid bytes. 2675 */ 2676 n = MIN(MBLKL(nmp), uiop->uio_resid); 2677 if (n > 0) 2678 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2679 if (error) 2680 break; 2681 nmp->b_rptr += n; 2682 if (nmp->b_rptr == nmp->b_wptr) { 2683 pmp = nmp; 2684 nmp = nmp->b_cont; 2685 } 2686 } else { 2687 /* 2688 * We only handle data, save for caller to handle. 2689 */ 2690 if (pmp != NULL) { 2691 pmp->b_cont = nmp->b_cont; 2692 } 2693 nmp->b_cont = NULL; 2694 if (*rmp == NULL) { 2695 *rmp = nmp; 2696 } else { 2697 tmp->b_next = nmp; 2698 } 2699 nmp = nmp->b_cont; 2700 tmp = nmp; 2701 } 2702 } 2703 if (pmp != NULL) { 2704 /* Free any mblk_t(s) which we have consumed */ 2705 pmp->b_cont = NULL; 2706 freemsg(so->so_nl7c_rcv_mp); 2707 } 2708 if ((so->so_nl7c_rcv_mp = nmp) == NULL) { 2709 /* Last mblk_t so return the saved rval from kstrgetmsg() */ 2710 rp->r_vals = so->so_nl7c_rcv_rval; 2711 so->so_nl7c_rcv_rval = 0; 2712 } else { 2713 /* More mblk_t(s) to process so no rval to return */ 2714 rp->r_vals = 0; 2715 } 2716 return (error); 2717 } 2718 2719 /* 2720 * Receive the next message on the queue. 2721 * If msg_controllen is non-zero when called the caller is interested in 2722 * any received control info (options). 2723 * If msg_namelen is non-zero when called the caller is interested in 2724 * any received source address. 2725 * The routine returns with msg_control and msg_name pointing to 2726 * kmem_alloc'ed memory which the caller has to free. 2727 */ 2728 int 2729 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2730 { 2731 union T_primitives *tpr; 2732 mblk_t *mp; 2733 uchar_t pri; 2734 int pflag, opflag; 2735 void *control; 2736 t_uscalar_t controllen; 2737 t_uscalar_t namelen; 2738 int so_state = so->so_state; /* Snapshot */ 2739 ssize_t saved_resid; 2740 int error; 2741 rval_t rval; 2742 int flags; 2743 clock_t timout; 2744 int first; 2745 2746 flags = msg->msg_flags; 2747 msg->msg_flags = 0; 2748 2749 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2750 so, msg, flags, 2751 pr_state(so->so_state, so->so_mode), so->so_error)); 2752 2753 /* 2754 * If we are not connected because we have never been connected 2755 * we return ENOTCONN. If we have been connected (but are no longer 2756 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2757 * the EOF. 2758 * 2759 * An alternative would be to post an ENOTCONN error in stream head 2760 * (read+write) and clear it when we're connected. However, that error 2761 * would cause incorrect poll/select behavior! 2762 */ 2763 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2764 (so->so_mode & SM_CONNREQUIRED)) { 2765 return (ENOTCONN); 2766 } 2767 2768 /* 2769 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2770 * after checking that the read queue is empty) and returns zero. 2771 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2772 * is zero. 2773 */ 2774 2775 if (flags & MSG_OOB) { 2776 /* Check that the transport supports OOB */ 2777 if (!(so->so_mode & SM_EXDATA)) 2778 return (EOPNOTSUPP); 2779 return (sorecvoob(so, msg, uiop, flags)); 2780 } 2781 2782 /* 2783 * Set msg_controllen and msg_namelen to zero here to make it 2784 * simpler in the cases that no control or name is returned. 2785 */ 2786 controllen = msg->msg_controllen; 2787 namelen = msg->msg_namelen; 2788 msg->msg_controllen = 0; 2789 msg->msg_namelen = 0; 2790 2791 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2792 namelen, controllen)); 2793 2794 /* 2795 * If an NL7C enabled socket and not waiting for write data. 2796 */ 2797 mutex_enter(&so->so_lock); 2798 if ((so->so_nl7c_flags & (NL7C_ENABLED|NL7C_WAITWRITE)) == 2799 NL7C_ENABLED) { 2800 if (so->so_nl7c_uri) { 2801 /* 2802 * Close uri processing for a previous request. 2803 */ 2804 nl7c_close(so); 2805 } 2806 if (nl7c_process(so, 2807 (so->so_state & (SS_NONBLOCK|SS_NDELAY)), 2808 (int)((tcp_t *)so->so_priv)->tcp_mss)) { 2809 /* 2810 * NL7C has completed processing on the socket, 2811 * clear the enabled bit as no further NL7C 2812 * processing will be needed. 2813 */ 2814 so->so_nl7c_flags = 0; 2815 } 2816 } 2817 2818 /* 2819 * Only one reader is allowed at any given time. This is needed 2820 * for T_EXDATA handling and, in the future, MSG_WAITALL. 2821 * 2822 * This is slightly different that BSD behavior in that it fails with 2823 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 2824 * is single-threaded using sblock(), which is dropped while waiting 2825 * for data to appear. The difference shows up e.g. if one 2826 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 2827 * does use nonblocking io and different threads are reading each 2828 * file descriptor. In BSD there would never be an EWOULDBLOCK error 2829 * in this case as long as the read queue doesn't get empty. 2830 * In this implementation the thread using nonblocking io can 2831 * get an EWOULDBLOCK error due to the blocking thread executing 2832 * e.g. in the uiomove in kstrgetmsg. 2833 * This difference is not believed to be significant. 2834 */ 2835 error = so_lock_read_intr(so, uiop->uio_fmode); /* Set SOREADLOCKED */ 2836 mutex_exit(&so->so_lock); 2837 if (error) 2838 return (error); 2839 2840 /* 2841 * Tell kstrgetmsg to not inspect the stream head errors until all 2842 * queued data has been consumed. 2843 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 2844 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 2845 * 2846 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 2847 * to T_OPTDATA_IND that do not contain any user-visible control msg. 2848 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 2849 */ 2850 pflag = MSG_ANY | MSG_DELAYERROR; 2851 if (flags & MSG_PEEK) { 2852 pflag |= MSG_IPEEK; 2853 flags &= ~MSG_WAITALL; 2854 } 2855 if (so->so_mode & SM_ATOMIC) 2856 pflag |= MSG_DISCARDTAIL; 2857 2858 if (flags & MSG_DONTWAIT) 2859 timout = 0; 2860 else 2861 timout = -1; 2862 opflag = pflag; 2863 first = 1; 2864 2865 /* 2866 * If so saved NL7C rcv mblk_t(s) uiomove them first 2867 * else get'm from the streamhead. 2868 */ 2869 retry: 2870 saved_resid = uiop->uio_resid; 2871 pri = 0; 2872 mp = NULL; 2873 if (so->so_nl7c_rcv_mp != NULL) { 2874 error = nl7c_sorecv(so, &mp, uiop, &rval); 2875 } else { 2876 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 2877 timout, &rval); 2878 } 2879 if (error) { 2880 switch (error) { 2881 case EINTR: 2882 case EWOULDBLOCK: 2883 if (!first) 2884 error = 0; 2885 break; 2886 case ETIME: 2887 /* Returned from kstrgetmsg when timeout expires */ 2888 if (!first) 2889 error = 0; 2890 else 2891 error = EWOULDBLOCK; 2892 break; 2893 default: 2894 eprintsoline(so, error); 2895 break; 2896 } 2897 mutex_enter(&so->so_lock); 2898 so_unlock_read(so); /* Clear SOREADLOCKED */ 2899 mutex_exit(&so->so_lock); 2900 return (error); 2901 } 2902 /* 2903 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 2904 * For non-datagrams MOREDATA is used to set MSG_EOR. 2905 */ 2906 ASSERT(!(rval.r_val1 & MORECTL)); 2907 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 2908 msg->msg_flags |= MSG_TRUNC; 2909 2910 if (mp == NULL) { 2911 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 2912 /* 2913 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 2914 * The draft Posix socket spec states that the mark should 2915 * not be cleared when peeking. We follow the latter. 2916 */ 2917 if ((so->so_state & 2918 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 2919 (uiop->uio_resid != saved_resid) && 2920 !(flags & MSG_PEEK)) { 2921 sorecv_update_oobstate(so); 2922 } 2923 2924 mutex_enter(&so->so_lock); 2925 /* Set MSG_EOR based on MOREDATA */ 2926 if (!(rval.r_val1 & MOREDATA)) { 2927 if (so->so_state & SS_SAVEDEOR) { 2928 msg->msg_flags |= MSG_EOR; 2929 so->so_state &= ~SS_SAVEDEOR; 2930 } 2931 } 2932 /* 2933 * If some data was received (i.e. not EOF) and the 2934 * read/recv* has not been satisfied wait for some more. 2935 */ 2936 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 2937 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 2938 mutex_exit(&so->so_lock); 2939 first = 0; 2940 pflag = opflag | MSG_NOMARK; 2941 goto retry; 2942 } 2943 so_unlock_read(so); /* Clear SOREADLOCKED */ 2944 mutex_exit(&so->so_lock); 2945 return (0); 2946 } 2947 2948 /* strsock_proto has already verified length and alignment */ 2949 tpr = (union T_primitives *)mp->b_rptr; 2950 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 2951 2952 switch (tpr->type) { 2953 case T_DATA_IND: { 2954 if ((so->so_state & 2955 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 2956 (uiop->uio_resid != saved_resid) && 2957 !(flags & MSG_PEEK)) { 2958 sorecv_update_oobstate(so); 2959 } 2960 2961 /* 2962 * Set msg_flags to MSG_EOR based on 2963 * MORE_flag and MOREDATA. 2964 */ 2965 mutex_enter(&so->so_lock); 2966 so->so_state &= ~SS_SAVEDEOR; 2967 if (!(tpr->data_ind.MORE_flag & 1)) { 2968 if (!(rval.r_val1 & MOREDATA)) 2969 msg->msg_flags |= MSG_EOR; 2970 else 2971 so->so_state |= SS_SAVEDEOR; 2972 } 2973 freemsg(mp); 2974 /* 2975 * If some data was received (i.e. not EOF) and the 2976 * read/recv* has not been satisfied wait for some more. 2977 */ 2978 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 2979 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 2980 mutex_exit(&so->so_lock); 2981 first = 0; 2982 pflag = opflag | MSG_NOMARK; 2983 goto retry; 2984 } 2985 so_unlock_read(so); /* Clear SOREADLOCKED */ 2986 mutex_exit(&so->so_lock); 2987 return (0); 2988 } 2989 case T_UNITDATA_IND: { 2990 void *addr; 2991 t_uscalar_t addrlen; 2992 void *abuf; 2993 t_uscalar_t optlen; 2994 void *opt; 2995 2996 if ((so->so_state & 2997 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 2998 (uiop->uio_resid != saved_resid) && 2999 !(flags & MSG_PEEK)) { 3000 sorecv_update_oobstate(so); 3001 } 3002 3003 if (namelen != 0) { 3004 /* Caller wants source address */ 3005 addrlen = tpr->unitdata_ind.SRC_length; 3006 addr = sogetoff(mp, 3007 tpr->unitdata_ind.SRC_offset, 3008 addrlen, 1); 3009 if (addr == NULL) { 3010 freemsg(mp); 3011 error = EPROTO; 3012 eprintsoline(so, error); 3013 goto err; 3014 } 3015 if (so->so_family == AF_UNIX) { 3016 /* 3017 * Can not use the transport level address. 3018 * If there is a SO_SRCADDR option carrying 3019 * the socket level address it will be 3020 * extracted below. 3021 */ 3022 addr = NULL; 3023 addrlen = 0; 3024 } 3025 } 3026 optlen = tpr->unitdata_ind.OPT_length; 3027 if (optlen != 0) { 3028 t_uscalar_t ncontrollen; 3029 3030 /* 3031 * Extract any source address option. 3032 * Determine how large cmsg buffer is needed. 3033 */ 3034 opt = sogetoff(mp, 3035 tpr->unitdata_ind.OPT_offset, 3036 optlen, __TPI_ALIGN_SIZE); 3037 3038 if (opt == NULL) { 3039 freemsg(mp); 3040 error = EPROTO; 3041 eprintsoline(so, error); 3042 goto err; 3043 } 3044 if (so->so_family == AF_UNIX) 3045 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3046 ncontrollen = so_cmsglen(mp, opt, optlen, 3047 !(flags & MSG_XPG4_2)); 3048 if (controllen != 0) 3049 controllen = ncontrollen; 3050 else if (ncontrollen != 0) 3051 msg->msg_flags |= MSG_CTRUNC; 3052 } else { 3053 controllen = 0; 3054 } 3055 3056 if (namelen != 0) { 3057 /* 3058 * Return address to caller. 3059 * Caller handles truncation if length 3060 * exceeds msg_namelen. 3061 * NOTE: AF_UNIX NUL termination is ensured by 3062 * the sender's copyin_name(). 3063 */ 3064 abuf = kmem_alloc(addrlen, KM_SLEEP); 3065 3066 bcopy(addr, abuf, addrlen); 3067 msg->msg_name = abuf; 3068 msg->msg_namelen = addrlen; 3069 } 3070 3071 if (controllen != 0) { 3072 /* 3073 * Return control msg to caller. 3074 * Caller handles truncation if length 3075 * exceeds msg_controllen. 3076 */ 3077 control = kmem_alloc(controllen, KM_SLEEP); 3078 3079 error = so_opt2cmsg(mp, opt, optlen, 3080 !(flags & MSG_XPG4_2), 3081 control, controllen); 3082 if (error) { 3083 freemsg(mp); 3084 if (msg->msg_namelen != 0) 3085 kmem_free(msg->msg_name, 3086 msg->msg_namelen); 3087 kmem_free(control, controllen); 3088 eprintsoline(so, error); 3089 goto err; 3090 } 3091 msg->msg_control = control; 3092 msg->msg_controllen = controllen; 3093 } 3094 3095 freemsg(mp); 3096 mutex_enter(&so->so_lock); 3097 so_unlock_read(so); /* Clear SOREADLOCKED */ 3098 mutex_exit(&so->so_lock); 3099 return (0); 3100 } 3101 case T_OPTDATA_IND: { 3102 struct T_optdata_req *tdr; 3103 void *opt; 3104 t_uscalar_t optlen; 3105 3106 if ((so->so_state & 3107 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3108 (uiop->uio_resid != saved_resid) && 3109 !(flags & MSG_PEEK)) { 3110 sorecv_update_oobstate(so); 3111 } 3112 3113 tdr = (struct T_optdata_req *)mp->b_rptr; 3114 optlen = tdr->OPT_length; 3115 if (optlen != 0) { 3116 t_uscalar_t ncontrollen; 3117 /* 3118 * Determine how large cmsg buffer is needed. 3119 */ 3120 opt = sogetoff(mp, 3121 tpr->optdata_ind.OPT_offset, 3122 optlen, __TPI_ALIGN_SIZE); 3123 3124 if (opt == NULL) { 3125 freemsg(mp); 3126 error = EPROTO; 3127 eprintsoline(so, error); 3128 goto err; 3129 } 3130 3131 ncontrollen = so_cmsglen(mp, opt, optlen, 3132 !(flags & MSG_XPG4_2)); 3133 if (controllen != 0) 3134 controllen = ncontrollen; 3135 else if (ncontrollen != 0) 3136 msg->msg_flags |= MSG_CTRUNC; 3137 } else { 3138 controllen = 0; 3139 } 3140 3141 if (controllen != 0) { 3142 /* 3143 * Return control msg to caller. 3144 * Caller handles truncation if length 3145 * exceeds msg_controllen. 3146 */ 3147 control = kmem_alloc(controllen, KM_SLEEP); 3148 3149 error = so_opt2cmsg(mp, opt, optlen, 3150 !(flags & MSG_XPG4_2), 3151 control, controllen); 3152 if (error) { 3153 freemsg(mp); 3154 kmem_free(control, controllen); 3155 eprintsoline(so, error); 3156 goto err; 3157 } 3158 msg->msg_control = control; 3159 msg->msg_controllen = controllen; 3160 } 3161 3162 /* 3163 * Set msg_flags to MSG_EOR based on 3164 * DATA_flag and MOREDATA. 3165 */ 3166 mutex_enter(&so->so_lock); 3167 so->so_state &= ~SS_SAVEDEOR; 3168 if (!(tpr->data_ind.MORE_flag & 1)) { 3169 if (!(rval.r_val1 & MOREDATA)) 3170 msg->msg_flags |= MSG_EOR; 3171 else 3172 so->so_state |= SS_SAVEDEOR; 3173 } 3174 freemsg(mp); 3175 /* 3176 * If some data was received (i.e. not EOF) and the 3177 * read/recv* has not been satisfied wait for some more. 3178 * Not possible to wait if control info was received. 3179 */ 3180 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3181 controllen == 0 && 3182 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3183 mutex_exit(&so->so_lock); 3184 first = 0; 3185 pflag = opflag | MSG_NOMARK; 3186 goto retry; 3187 } 3188 so_unlock_read(so); /* Clear SOREADLOCKED */ 3189 mutex_exit(&so->so_lock); 3190 return (0); 3191 } 3192 case T_EXDATA_IND: { 3193 dprintso(so, 1, 3194 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3195 "state %s\n", 3196 so->so_oobsigcnt, so->so_oobcnt, 3197 saved_resid - uiop->uio_resid, 3198 pr_state(so->so_state, so->so_mode))); 3199 /* 3200 * kstrgetmsg handles MSGMARK so there is nothing to 3201 * inspect in the T_EXDATA_IND. 3202 * strsock_proto makes the stream head queue the T_EXDATA_IND 3203 * as a separate message with no M_DATA component. Furthermore, 3204 * the stream head does not consolidate M_DATA messages onto 3205 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3206 * remains a message by itself. This is needed since MSGMARK 3207 * marks both the whole message as well as the last byte 3208 * of the message. 3209 */ 3210 freemsg(mp); 3211 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3212 if (flags & MSG_PEEK) { 3213 /* 3214 * Even though we are peeking we consume the 3215 * T_EXDATA_IND thereby moving the mark information 3216 * to SS_RCVATMARK. Then the oob code below will 3217 * retry the peeking kstrgetmsg. 3218 * Note that the stream head read queue is 3219 * never flushed without holding SOREADLOCKED 3220 * thus the T_EXDATA_IND can not disappear 3221 * underneath us. 3222 */ 3223 dprintso(so, 1, 3224 ("sotpi_recvmsg: consume EXDATA_IND " 3225 "counts %d/%d state %s\n", 3226 so->so_oobsigcnt, 3227 so->so_oobcnt, 3228 pr_state(so->so_state, so->so_mode))); 3229 3230 pflag = MSG_ANY | MSG_DELAYERROR; 3231 if (so->so_mode & SM_ATOMIC) 3232 pflag |= MSG_DISCARDTAIL; 3233 3234 pri = 0; 3235 mp = NULL; 3236 3237 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3238 &pri, &pflag, (clock_t)-1, &rval); 3239 ASSERT(uiop->uio_resid == saved_resid); 3240 3241 if (error) { 3242 #ifdef SOCK_DEBUG 3243 if (error != EWOULDBLOCK && error != EINTR) { 3244 eprintsoline(so, error); 3245 } 3246 #endif /* SOCK_DEBUG */ 3247 mutex_enter(&so->so_lock); 3248 so_unlock_read(so); /* Clear SOREADLOCKED */ 3249 mutex_exit(&so->so_lock); 3250 return (error); 3251 } 3252 ASSERT(mp); 3253 tpr = (union T_primitives *)mp->b_rptr; 3254 ASSERT(tpr->type == T_EXDATA_IND); 3255 freemsg(mp); 3256 } /* end "if (flags & MSG_PEEK)" */ 3257 3258 /* 3259 * Decrement the number of queued and pending oob. 3260 * 3261 * SS_RCVATMARK is cleared when we read past a mark. 3262 * SS_HAVEOOBDATA is cleared when we've read past the 3263 * last mark. 3264 * SS_OOBPEND is cleared if we've read past the last 3265 * mark and no (new) SIGURG has been posted. 3266 */ 3267 mutex_enter(&so->so_lock); 3268 ASSERT(so_verify_oobstate(so)); 3269 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 3270 ASSERT(so->so_oobsigcnt > 0); 3271 so->so_oobsigcnt--; 3272 ASSERT(so->so_oobcnt > 0); 3273 so->so_oobcnt--; 3274 /* 3275 * Since the T_EXDATA_IND has been removed from the stream 3276 * head, but we have not read data past the mark, 3277 * sockfs needs to track that the socket is still at the mark. 3278 * 3279 * Since no data was received call kstrgetmsg again to wait 3280 * for data. 3281 */ 3282 so->so_state |= SS_RCVATMARK; 3283 mutex_exit(&so->so_lock); 3284 dprintso(so, 1, 3285 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3286 so->so_oobsigcnt, so->so_oobcnt, 3287 pr_state(so->so_state, so->so_mode))); 3288 pflag = opflag; 3289 goto retry; 3290 } 3291 default: 3292 ASSERT(0); 3293 freemsg(mp); 3294 error = EPROTO; 3295 eprintsoline(so, error); 3296 goto err; 3297 } 3298 /* NOTREACHED */ 3299 err: 3300 mutex_enter(&so->so_lock); 3301 so_unlock_read(so); /* Clear SOREADLOCKED */ 3302 mutex_exit(&so->so_lock); 3303 return (error); 3304 } 3305 3306 /* 3307 * Sending data with options on a datagram socket. 3308 * Assumes caller has verified that SS_ISBOUND etc. are set. 3309 */ 3310 static int 3311 sosend_dgramcmsg(struct sonode *so, 3312 struct sockaddr *name, 3313 t_uscalar_t namelen, 3314 struct uio *uiop, 3315 void *control, 3316 t_uscalar_t controllen, 3317 int flags) 3318 { 3319 struct T_unitdata_req tudr; 3320 mblk_t *mp; 3321 int error; 3322 void *addr; 3323 socklen_t addrlen; 3324 void *src; 3325 socklen_t srclen; 3326 ssize_t len; 3327 int size; 3328 struct T_opthdr toh; 3329 struct fdbuf *fdbuf; 3330 t_uscalar_t optlen; 3331 void *fds; 3332 int fdlen; 3333 3334 ASSERT(name && namelen); 3335 ASSERT(control && controllen); 3336 3337 len = uiop->uio_resid; 3338 if (len > (ssize_t)so->so_tidu_size) { 3339 return (EMSGSIZE); 3340 } 3341 3342 /* 3343 * For AF_UNIX the destination address is translated to an internal 3344 * name and the source address is passed as an option. 3345 * Also, file descriptors are passed as file pointers in an 3346 * option. 3347 */ 3348 3349 /* 3350 * Length and family checks. 3351 */ 3352 error = so_addr_verify(so, name, namelen); 3353 if (error) { 3354 eprintsoline(so, error); 3355 return (error); 3356 } 3357 if (so->so_family == AF_UNIX) { 3358 if (so->so_state & SS_FADDR_NOXLATE) { 3359 /* 3360 * Already have a transport internal address. Do not 3361 * pass any (transport internal) source address. 3362 */ 3363 addr = name; 3364 addrlen = namelen; 3365 src = NULL; 3366 srclen = 0; 3367 } else { 3368 /* 3369 * Pass the sockaddr_un source address as an option 3370 * and translate the remote address. 3371 * 3372 * Note that this code does not prevent so_laddr_sa 3373 * from changing while it is being used. Thus 3374 * if an unbind+bind occurs concurrently with this 3375 * send the peer might see a partially new and a 3376 * partially old "from" address. 3377 */ 3378 src = so->so_laddr_sa; 3379 srclen = (t_uscalar_t)so->so_laddr_len; 3380 dprintso(so, 1, 3381 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3382 srclen, src)); 3383 error = so_ux_addr_xlate(so, name, namelen, 3384 (flags & MSG_XPG4_2), 3385 &addr, &addrlen); 3386 if (error) { 3387 eprintsoline(so, error); 3388 return (error); 3389 } 3390 } 3391 } else { 3392 addr = name; 3393 addrlen = namelen; 3394 src = NULL; 3395 srclen = 0; 3396 } 3397 optlen = so_optlen(control, controllen, 3398 !(flags & MSG_XPG4_2)); 3399 tudr.PRIM_type = T_UNITDATA_REQ; 3400 tudr.DEST_length = addrlen; 3401 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3402 if (srclen != 0) 3403 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3404 _TPI_ALIGN_TOPT(srclen)); 3405 else 3406 tudr.OPT_length = optlen; 3407 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3408 _TPI_ALIGN_TOPT(addrlen)); 3409 3410 size = tudr.OPT_offset + tudr.OPT_length; 3411 3412 /* 3413 * File descriptors only when SM_FDPASSING set. 3414 */ 3415 error = so_getfdopt(control, controllen, 3416 !(flags & MSG_XPG4_2), &fds, &fdlen); 3417 if (error) 3418 return (error); 3419 if (fdlen != -1) { 3420 if (!(so->so_mode & SM_FDPASSING)) 3421 return (EOPNOTSUPP); 3422 3423 error = fdbuf_create(fds, fdlen, &fdbuf); 3424 if (error) 3425 return (error); 3426 mp = fdbuf_allocmsg(size, fdbuf); 3427 } else { 3428 mp = soallocproto(size, _ALLOC_INTR); 3429 if (mp == NULL) { 3430 /* 3431 * Caught a signal waiting for memory. 3432 * Let send* return EINTR. 3433 */ 3434 return (EINTR); 3435 } 3436 } 3437 soappendmsg(mp, &tudr, sizeof (tudr)); 3438 soappendmsg(mp, addr, addrlen); 3439 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3440 3441 if (fdlen != -1) { 3442 ASSERT(fdbuf != NULL); 3443 toh.level = SOL_SOCKET; 3444 toh.name = SO_FILEP; 3445 toh.len = fdbuf->fd_size + 3446 (t_uscalar_t)sizeof (struct T_opthdr); 3447 toh.status = 0; 3448 soappendmsg(mp, &toh, sizeof (toh)); 3449 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3450 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3451 } 3452 if (srclen != 0) { 3453 /* 3454 * There is a AF_UNIX sockaddr_un to include as a source 3455 * address option. 3456 */ 3457 toh.level = SOL_SOCKET; 3458 toh.name = SO_SRCADDR; 3459 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3460 toh.status = 0; 3461 soappendmsg(mp, &toh, sizeof (toh)); 3462 soappendmsg(mp, src, srclen); 3463 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3464 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3465 } 3466 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3467 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3468 /* At most 3 bytes left in the message */ 3469 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3470 ASSERT(MBLKL(mp) <= (ssize_t)size); 3471 3472 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3473 #ifdef C2_AUDIT 3474 if (audit_active) 3475 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3476 #endif /* C2_AUDIT */ 3477 3478 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3479 #ifdef SOCK_DEBUG 3480 if (error) { 3481 eprintsoline(so, error); 3482 } 3483 #endif /* SOCK_DEBUG */ 3484 return (error); 3485 } 3486 3487 /* 3488 * Sending data with options on a connected stream socket. 3489 * Assumes caller has verified that SS_ISCONNECTED is set. 3490 */ 3491 static int 3492 sosend_svccmsg(struct sonode *so, 3493 struct uio *uiop, 3494 int more, 3495 void *control, 3496 t_uscalar_t controllen, 3497 int flags) 3498 { 3499 struct T_optdata_req tdr; 3500 mblk_t *mp; 3501 int error; 3502 ssize_t iosize; 3503 int first = 1; 3504 int size; 3505 struct fdbuf *fdbuf; 3506 t_uscalar_t optlen; 3507 void *fds; 3508 int fdlen; 3509 struct T_opthdr toh; 3510 3511 dprintso(so, 1, 3512 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3513 3514 /* 3515 * Has to be bound and connected. However, since no locks are 3516 * held the state could have changed after sotpi_sendmsg checked it 3517 * thus it is not possible to ASSERT on the state. 3518 */ 3519 3520 /* Options on connection-oriented only when SM_OPTDATA set. */ 3521 if (!(so->so_mode & SM_OPTDATA)) 3522 return (EOPNOTSUPP); 3523 3524 do { 3525 /* 3526 * Set the MORE flag if uio_resid does not fit in this 3527 * message or if the caller passed in "more". 3528 * Error for transports with zero tidu_size. 3529 */ 3530 tdr.PRIM_type = T_OPTDATA_REQ; 3531 iosize = so->so_tidu_size; 3532 if (iosize <= 0) 3533 return (EMSGSIZE); 3534 if (uiop->uio_resid > iosize) { 3535 tdr.DATA_flag = 1; 3536 } else { 3537 if (more) 3538 tdr.DATA_flag = 1; 3539 else 3540 tdr.DATA_flag = 0; 3541 iosize = uiop->uio_resid; 3542 } 3543 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3544 tdr.DATA_flag, iosize)); 3545 3546 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3547 tdr.OPT_length = optlen; 3548 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3549 3550 size = (int)sizeof (tdr) + optlen; 3551 /* 3552 * File descriptors only when SM_FDPASSING set. 3553 */ 3554 error = so_getfdopt(control, controllen, 3555 !(flags & MSG_XPG4_2), &fds, &fdlen); 3556 if (error) 3557 return (error); 3558 if (fdlen != -1) { 3559 if (!(so->so_mode & SM_FDPASSING)) 3560 return (EOPNOTSUPP); 3561 3562 error = fdbuf_create(fds, fdlen, &fdbuf); 3563 if (error) 3564 return (error); 3565 mp = fdbuf_allocmsg(size, fdbuf); 3566 } else { 3567 mp = soallocproto(size, _ALLOC_INTR); 3568 if (mp == NULL) { 3569 /* 3570 * Caught a signal waiting for memory. 3571 * Let send* return EINTR. 3572 */ 3573 return (first ? EINTR : 0); 3574 } 3575 } 3576 soappendmsg(mp, &tdr, sizeof (tdr)); 3577 3578 if (fdlen != -1) { 3579 ASSERT(fdbuf != NULL); 3580 toh.level = SOL_SOCKET; 3581 toh.name = SO_FILEP; 3582 toh.len = fdbuf->fd_size + 3583 (t_uscalar_t)sizeof (struct T_opthdr); 3584 toh.status = 0; 3585 soappendmsg(mp, &toh, sizeof (toh)); 3586 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3587 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3588 } 3589 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3590 /* At most 3 bytes left in the message */ 3591 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3592 ASSERT(MBLKL(mp) <= (ssize_t)size); 3593 3594 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3595 3596 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3597 0, MSG_BAND, 0); 3598 if (error) { 3599 if (!first && error == EWOULDBLOCK) 3600 return (0); 3601 eprintsoline(so, error); 3602 return (error); 3603 } 3604 control = NULL; 3605 first = 0; 3606 if (uiop->uio_resid > 0) { 3607 /* 3608 * Recheck for fatal errors. Fail write even though 3609 * some data have been written. This is consistent 3610 * with strwrite semantics and BSD sockets semantics. 3611 */ 3612 if (so->so_state & SS_CANTSENDMORE) { 3613 tsignal(curthread, SIGPIPE); 3614 eprintsoline(so, error); 3615 return (EPIPE); 3616 } 3617 if (so->so_error != 0) { 3618 mutex_enter(&so->so_lock); 3619 error = sogeterr(so); 3620 mutex_exit(&so->so_lock); 3621 if (error != 0) { 3622 eprintsoline(so, error); 3623 return (error); 3624 } 3625 } 3626 } 3627 } while (uiop->uio_resid > 0); 3628 return (0); 3629 } 3630 3631 /* 3632 * Sending data on a datagram socket. 3633 * Assumes caller has verified that SS_ISBOUND etc. are set. 3634 * 3635 * For AF_UNIX the destination address is translated to an internal 3636 * name and the source address is passed as an option. 3637 */ 3638 int 3639 sosend_dgram(struct sonode *so, 3640 struct sockaddr *name, 3641 socklen_t namelen, 3642 struct uio *uiop, 3643 int flags) 3644 { 3645 struct T_unitdata_req tudr; 3646 mblk_t *mp; 3647 int error; 3648 void *addr; 3649 socklen_t addrlen; 3650 void *src; 3651 socklen_t srclen; 3652 ssize_t len; 3653 3654 ASSERT(name && namelen); 3655 3656 len = uiop->uio_resid; 3657 if (len > so->so_tidu_size) { 3658 error = EMSGSIZE; 3659 goto done; 3660 } 3661 3662 /* 3663 * Length and family checks. 3664 */ 3665 error = so_addr_verify(so, name, namelen); 3666 if (error) { 3667 eprintsoline(so, error); 3668 goto done; 3669 } 3670 if (so->so_family == AF_UNIX) { 3671 if (so->so_state & SS_FADDR_NOXLATE) { 3672 /* 3673 * Already have a transport internal address. Do not 3674 * pass any (transport internal) source address. 3675 */ 3676 addr = name; 3677 addrlen = namelen; 3678 src = NULL; 3679 srclen = 0; 3680 } else { 3681 /* 3682 * Pass the sockaddr_un source address as an option 3683 * and translate the remote address. 3684 * 3685 * Note that this code does not prevent so_laddr_sa 3686 * from changing while it is being used. Thus 3687 * if an unbind+bind occurs concurrently with this 3688 * send the peer might see a partially new and a 3689 * partially old "from" address. 3690 */ 3691 src = so->so_laddr_sa; 3692 srclen = (socklen_t)so->so_laddr_len; 3693 dprintso(so, 1, 3694 ("sosend_dgram UNIX: srclen %d, src %p\n", 3695 srclen, src)); 3696 error = so_ux_addr_xlate(so, name, namelen, 3697 (flags & MSG_XPG4_2), 3698 &addr, &addrlen); 3699 if (error) { 3700 eprintsoline(so, error); 3701 goto done; 3702 } 3703 } 3704 } else { 3705 addr = name; 3706 addrlen = namelen; 3707 src = NULL; 3708 srclen = 0; 3709 } 3710 tudr.PRIM_type = T_UNITDATA_REQ; 3711 tudr.DEST_length = addrlen; 3712 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3713 if (srclen == 0) { 3714 tudr.OPT_length = 0; 3715 tudr.OPT_offset = 0; 3716 3717 mp = soallocproto2(&tudr, sizeof (tudr), 3718 addr, addrlen, 0, _ALLOC_INTR); 3719 if (mp == NULL) { 3720 /* 3721 * Caught a signal waiting for memory. 3722 * Let send* return EINTR. 3723 */ 3724 error = EINTR; 3725 goto done; 3726 } 3727 } else { 3728 /* 3729 * There is a AF_UNIX sockaddr_un to include as a source 3730 * address option. 3731 */ 3732 struct T_opthdr toh; 3733 ssize_t size; 3734 3735 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3736 _TPI_ALIGN_TOPT(srclen)); 3737 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3738 _TPI_ALIGN_TOPT(addrlen)); 3739 3740 toh.level = SOL_SOCKET; 3741 toh.name = SO_SRCADDR; 3742 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3743 toh.status = 0; 3744 3745 size = tudr.OPT_offset + tudr.OPT_length; 3746 mp = soallocproto2(&tudr, sizeof (tudr), 3747 addr, addrlen, size, _ALLOC_INTR); 3748 if (mp == NULL) { 3749 /* 3750 * Caught a signal waiting for memory. 3751 * Let send* return EINTR. 3752 */ 3753 error = EINTR; 3754 goto done; 3755 } 3756 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3757 soappendmsg(mp, &toh, sizeof (toh)); 3758 soappendmsg(mp, src, srclen); 3759 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3760 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3761 } 3762 3763 #ifdef C2_AUDIT 3764 if (audit_active) 3765 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3766 #endif /* C2_AUDIT */ 3767 3768 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3769 done: 3770 #ifdef SOCK_DEBUG 3771 if (error) { 3772 eprintsoline(so, error); 3773 } 3774 #endif /* SOCK_DEBUG */ 3775 return (error); 3776 } 3777 3778 /* 3779 * Sending data on a connected stream socket. 3780 * Assumes caller has verified that SS_ISCONNECTED is set. 3781 */ 3782 int 3783 sosend_svc(struct sonode *so, 3784 struct uio *uiop, 3785 t_scalar_t prim, 3786 int more, 3787 int sflag) 3788 { 3789 struct T_data_req tdr; 3790 mblk_t *mp; 3791 int error; 3792 ssize_t iosize; 3793 int first = 1; 3794 3795 dprintso(so, 1, 3796 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 3797 so, uiop->uio_resid, prim, sflag)); 3798 3799 /* 3800 * Has to be bound and connected. However, since no locks are 3801 * held the state could have changed after sotpi_sendmsg checked it 3802 * thus it is not possible to ASSERT on the state. 3803 */ 3804 3805 do { 3806 /* 3807 * Set the MORE flag if uio_resid does not fit in this 3808 * message or if the caller passed in "more". 3809 * Error for transports with zero tidu_size. 3810 */ 3811 tdr.PRIM_type = prim; 3812 iosize = so->so_tidu_size; 3813 if (iosize <= 0) 3814 return (EMSGSIZE); 3815 if (uiop->uio_resid > iosize) { 3816 tdr.MORE_flag = 1; 3817 } else { 3818 if (more) 3819 tdr.MORE_flag = 1; 3820 else 3821 tdr.MORE_flag = 0; 3822 iosize = uiop->uio_resid; 3823 } 3824 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 3825 prim, tdr.MORE_flag, iosize)); 3826 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 3827 if (mp == NULL) { 3828 /* 3829 * Caught a signal waiting for memory. 3830 * Let send* return EINTR. 3831 */ 3832 if (first) 3833 return (EINTR); 3834 else 3835 return (0); 3836 } 3837 3838 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3839 0, sflag | MSG_BAND, 0); 3840 if (error) { 3841 if (!first && error == EWOULDBLOCK) 3842 return (0); 3843 eprintsoline(so, error); 3844 return (error); 3845 } 3846 first = 0; 3847 if (uiop->uio_resid > 0) { 3848 /* 3849 * Recheck for fatal errors. Fail write even though 3850 * some data have been written. This is consistent 3851 * with strwrite semantics and BSD sockets semantics. 3852 */ 3853 if (so->so_state & SS_CANTSENDMORE) { 3854 tsignal(curthread, SIGPIPE); 3855 eprintsoline(so, error); 3856 return (EPIPE); 3857 } 3858 if (so->so_error != 0) { 3859 mutex_enter(&so->so_lock); 3860 error = sogeterr(so); 3861 mutex_exit(&so->so_lock); 3862 if (error != 0) { 3863 eprintsoline(so, error); 3864 return (error); 3865 } 3866 } 3867 } 3868 } while (uiop->uio_resid > 0); 3869 return (0); 3870 } 3871 3872 /* 3873 * Check the state for errors and call the appropriate send function. 3874 * 3875 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 3876 * this function issues a setsockopt to toggle SO_DONTROUTE before and 3877 * after sending the message. 3878 */ 3879 static int 3880 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 3881 { 3882 int so_state; 3883 int so_mode; 3884 int error; 3885 struct sockaddr *name; 3886 t_uscalar_t namelen; 3887 int dontroute; 3888 int flags; 3889 3890 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 3891 so, msg, msg->msg_flags, 3892 pr_state(so->so_state, so->so_mode), so->so_error)); 3893 3894 mutex_enter(&so->so_lock); 3895 so_state = so->so_state; 3896 3897 if (so_state & SS_CANTSENDMORE) { 3898 mutex_exit(&so->so_lock); 3899 tsignal(curthread, SIGPIPE); 3900 return (EPIPE); 3901 } 3902 3903 if (so->so_error != 0) { 3904 error = sogeterr(so); 3905 if (error != 0) { 3906 mutex_exit(&so->so_lock); 3907 return (error); 3908 } 3909 } 3910 3911 name = (struct sockaddr *)msg->msg_name; 3912 namelen = msg->msg_namelen; 3913 3914 so_mode = so->so_mode; 3915 3916 if (name == NULL) { 3917 if (!(so_state & SS_ISCONNECTED)) { 3918 mutex_exit(&so->so_lock); 3919 if (so_mode & SM_CONNREQUIRED) 3920 return (ENOTCONN); 3921 else 3922 return (EDESTADDRREQ); 3923 } 3924 if (so_mode & SM_CONNREQUIRED) { 3925 name = NULL; 3926 namelen = 0; 3927 } else { 3928 /* 3929 * Note that this code does not prevent so_faddr_sa 3930 * from changing while it is being used. Thus 3931 * if an "unconnect"+connect occurs concurrently with 3932 * this send the datagram might be delivered to a 3933 * garbaled address. 3934 */ 3935 ASSERT(so->so_faddr_sa); 3936 name = so->so_faddr_sa; 3937 namelen = (t_uscalar_t)so->so_faddr_len; 3938 } 3939 } else { 3940 if (!(so_state & SS_ISCONNECTED) && 3941 (so_mode & SM_CONNREQUIRED)) { 3942 /* Required but not connected */ 3943 mutex_exit(&so->so_lock); 3944 return (ENOTCONN); 3945 } 3946 /* 3947 * Ignore the address on connection-oriented sockets. 3948 * Just like BSD this code does not generate an error for 3949 * TCP (a CONNREQUIRED socket) when sending to an address 3950 * passed in with sendto/sendmsg. Instead the data is 3951 * delivered on the connection as if no address had been 3952 * supplied. 3953 */ 3954 if ((so_state & SS_ISCONNECTED) && 3955 !(so_mode & SM_CONNREQUIRED)) { 3956 mutex_exit(&so->so_lock); 3957 return (EISCONN); 3958 } 3959 if (!(so_state & SS_ISBOUND)) { 3960 so_lock_single(so); /* Set SOLOCKED */ 3961 error = sotpi_bind(so, NULL, 0, 3962 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 3963 so_unlock_single(so, SOLOCKED); 3964 if (error) { 3965 mutex_exit(&so->so_lock); 3966 eprintsoline(so, error); 3967 return (error); 3968 } 3969 } 3970 /* 3971 * Handle delayed datagram errors. These are only queued 3972 * when the application sets SO_DGRAM_ERRIND. 3973 * Return the error if we are sending to the address 3974 * that was returned in the last T_UDERROR_IND. 3975 * If sending to some other address discard the delayed 3976 * error indication. 3977 */ 3978 if (so->so_delayed_error) { 3979 struct T_uderror_ind *tudi; 3980 void *addr; 3981 t_uscalar_t addrlen; 3982 boolean_t match = B_FALSE; 3983 3984 ASSERT(so->so_eaddr_mp); 3985 error = so->so_delayed_error; 3986 so->so_delayed_error = 0; 3987 tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; 3988 addrlen = tudi->DEST_length; 3989 addr = sogetoff(so->so_eaddr_mp, 3990 tudi->DEST_offset, 3991 addrlen, 1); 3992 ASSERT(addr); /* Checked by strsock_proto */ 3993 switch (so->so_family) { 3994 case AF_INET: { 3995 /* Compare just IP address and port */ 3996 sin_t *sin1 = (sin_t *)name; 3997 sin_t *sin2 = (sin_t *)addr; 3998 3999 if (addrlen == sizeof (sin_t) && 4000 namelen == addrlen && 4001 sin1->sin_port == sin2->sin_port && 4002 sin1->sin_addr.s_addr == 4003 sin2->sin_addr.s_addr) 4004 match = B_TRUE; 4005 break; 4006 } 4007 case AF_INET6: { 4008 /* Compare just IP address and port. Not flow */ 4009 sin6_t *sin1 = (sin6_t *)name; 4010 sin6_t *sin2 = (sin6_t *)addr; 4011 4012 if (addrlen == sizeof (sin6_t) && 4013 namelen == addrlen && 4014 sin1->sin6_port == sin2->sin6_port && 4015 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4016 &sin2->sin6_addr)) 4017 match = B_TRUE; 4018 break; 4019 } 4020 case AF_UNIX: 4021 default: 4022 if (namelen == addrlen && 4023 bcmp(name, addr, namelen) == 0) 4024 match = B_TRUE; 4025 } 4026 if (match) { 4027 freemsg(so->so_eaddr_mp); 4028 so->so_eaddr_mp = NULL; 4029 mutex_exit(&so->so_lock); 4030 #ifdef DEBUG 4031 dprintso(so, 0, 4032 ("sockfs delayed error %d for %s\n", 4033 error, 4034 pr_addr(so->so_family, name, namelen))); 4035 #endif /* DEBUG */ 4036 return (error); 4037 } 4038 freemsg(so->so_eaddr_mp); 4039 so->so_eaddr_mp = NULL; 4040 } 4041 } 4042 mutex_exit(&so->so_lock); 4043 4044 flags = msg->msg_flags; 4045 dontroute = 0; 4046 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4047 uint32_t val; 4048 4049 val = 1; 4050 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4051 &val, (t_uscalar_t)sizeof (val)); 4052 if (error) 4053 return (error); 4054 dontroute = 1; 4055 } 4056 4057 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4058 error = EOPNOTSUPP; 4059 goto done; 4060 } 4061 if (msg->msg_controllen != 0) { 4062 if (!(so_mode & SM_CONNREQUIRED)) { 4063 error = sosend_dgramcmsg(so, name, namelen, uiop, 4064 msg->msg_control, msg->msg_controllen, 4065 flags); 4066 } else { 4067 if (flags & MSG_OOB) { 4068 /* Can't generate T_EXDATA_REQ with options */ 4069 error = EOPNOTSUPP; 4070 goto done; 4071 } 4072 error = sosend_svccmsg(so, uiop, 4073 !(flags & MSG_EOR), 4074 msg->msg_control, msg->msg_controllen, 4075 flags); 4076 } 4077 goto done; 4078 } 4079 4080 if (!(so_mode & SM_CONNREQUIRED)) { 4081 /* 4082 * If there is no SO_DONTROUTE to turn off return immediately 4083 * from sosend_dgram. This can allow tail-call optimizations. 4084 */ 4085 if (!dontroute) { 4086 return (sosend_dgram(so, name, namelen, uiop, flags)); 4087 } 4088 error = sosend_dgram(so, name, namelen, uiop, flags); 4089 } else { 4090 t_scalar_t prim; 4091 int sflag; 4092 4093 /* Ignore msg_name in the connected state */ 4094 if (flags & MSG_OOB) { 4095 prim = T_EXDATA_REQ; 4096 /* 4097 * Send down T_EXDATA_REQ even if there is flow 4098 * control for data. 4099 */ 4100 sflag = MSG_IGNFLOW; 4101 } else { 4102 if (so_mode & SM_BYTESTREAM) { 4103 /* Byte stream transport - use write */ 4104 4105 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4106 /* 4107 * If there is no SO_DONTROUTE to turn off 4108 * return immediately from strwrite. This can 4109 * allow tail-call optimizations. 4110 */ 4111 if (!dontroute) 4112 return (strwrite(SOTOV(so), uiop, 4113 CRED())); 4114 error = strwrite(SOTOV(so), uiop, CRED()); 4115 goto done; 4116 } 4117 prim = T_DATA_REQ; 4118 sflag = 0; 4119 } 4120 /* 4121 * If there is no SO_DONTROUTE to turn off return immediately 4122 * from sosend_svc. This can allow tail-call optimizations. 4123 */ 4124 if (!dontroute) 4125 return (sosend_svc(so, uiop, prim, 4126 !(flags & MSG_EOR), sflag)); 4127 error = sosend_svc(so, uiop, prim, 4128 !(flags & MSG_EOR), sflag); 4129 } 4130 ASSERT(dontroute); 4131 done: 4132 if (dontroute) { 4133 uint32_t val; 4134 4135 val = 0; 4136 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4137 &val, (t_uscalar_t)sizeof (val)); 4138 } 4139 return (error); 4140 } 4141 4142 /* 4143 * Update so_faddr by asking the transport (unless AF_UNIX). 4144 */ 4145 int 4146 sotpi_getpeername(struct sonode *so) 4147 { 4148 struct strbuf strbuf; 4149 int error = 0, res; 4150 void *addr; 4151 t_uscalar_t addrlen; 4152 k_sigset_t smask; 4153 4154 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4155 so, pr_state(so->so_state, so->so_mode))); 4156 4157 mutex_enter(&so->so_lock); 4158 so_lock_single(so); /* Set SOLOCKED */ 4159 if (!(so->so_state & SS_ISCONNECTED)) { 4160 error = ENOTCONN; 4161 goto done; 4162 } 4163 /* Added this check for X/Open */ 4164 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4165 error = EINVAL; 4166 if (xnet_check_print) { 4167 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4168 } 4169 goto done; 4170 } 4171 #ifdef DEBUG 4172 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4173 pr_addr(so->so_family, so->so_faddr_sa, 4174 (t_uscalar_t)so->so_faddr_len))); 4175 #endif /* DEBUG */ 4176 4177 if (so->so_family == AF_UNIX || so->so_family == AF_NCA) { 4178 /* Transport has different name space - return local info */ 4179 error = 0; 4180 goto done; 4181 } 4182 4183 ASSERT(so->so_faddr_sa); 4184 /* Allocate local buffer to use with ioctl */ 4185 addrlen = (t_uscalar_t)so->so_faddr_maxlen; 4186 mutex_exit(&so->so_lock); 4187 addr = kmem_alloc(addrlen, KM_SLEEP); 4188 4189 /* 4190 * Issue TI_GETPEERNAME with signals masked. 4191 * Put the result in so_faddr_sa so that getpeername works after 4192 * a shutdown(output). 4193 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4194 * back to the socket. 4195 */ 4196 strbuf.buf = addr; 4197 strbuf.maxlen = addrlen; 4198 strbuf.len = 0; 4199 4200 sigintr(&smask, 0); 4201 res = 0; 4202 ASSERT(CRED()); 4203 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4204 0, K_TO_K, CRED(), &res); 4205 sigunintr(&smask); 4206 4207 mutex_enter(&so->so_lock); 4208 /* 4209 * If there is an error record the error in so_error put don't fail 4210 * the getpeername. Instead fallback on the recorded 4211 * so->so_faddr_sa. 4212 */ 4213 if (error) { 4214 /* 4215 * Various stream head errors can be returned to the ioctl. 4216 * However, it is impossible to determine which ones of 4217 * these are really socket level errors that were incorrectly 4218 * consumed by the ioctl. Thus this code silently ignores the 4219 * error - to code explicitly does not reinstate the error 4220 * using soseterror(). 4221 * Experiments have shows that at least this set of 4222 * errors are reported and should not be reinstated on the 4223 * socket: 4224 * EINVAL E.g. if an I_LINK was in effect when 4225 * getpeername was called. 4226 * EPIPE The ioctl error semantics prefer the write 4227 * side error over the read side error. 4228 * ENOTCONN The transport just got disconnected but 4229 * sockfs had not yet seen the T_DISCON_IND 4230 * when issuing the ioctl. 4231 */ 4232 error = 0; 4233 } else if (res == 0 && strbuf.len > 0 && 4234 (so->so_state & SS_ISCONNECTED)) { 4235 ASSERT(strbuf.len <= (int)so->so_faddr_maxlen); 4236 so->so_faddr_len = (socklen_t)strbuf.len; 4237 bcopy(addr, so->so_faddr_sa, so->so_faddr_len); 4238 so->so_state |= SS_FADDR_VALID; 4239 } 4240 kmem_free(addr, addrlen); 4241 #ifdef DEBUG 4242 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4243 pr_addr(so->so_family, so->so_faddr_sa, 4244 (t_uscalar_t)so->so_faddr_len))); 4245 #endif /* DEBUG */ 4246 done: 4247 so_unlock_single(so, SOLOCKED); 4248 mutex_exit(&so->so_lock); 4249 return (error); 4250 } 4251 4252 /* 4253 * Update so_laddr by asking the transport (unless AF_UNIX). 4254 */ 4255 int 4256 sotpi_getsockname(struct sonode *so) 4257 { 4258 struct strbuf strbuf; 4259 int error = 0, res; 4260 void *addr; 4261 t_uscalar_t addrlen; 4262 k_sigset_t smask; 4263 4264 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4265 so, pr_state(so->so_state, so->so_mode))); 4266 4267 mutex_enter(&so->so_lock); 4268 so_lock_single(so); /* Set SOLOCKED */ 4269 if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) { 4270 /* Return an all zero address except for the family */ 4271 if (so->so_family == AF_INET) 4272 so->so_laddr_len = (socklen_t)sizeof (sin_t); 4273 else if (so->so_family == AF_INET6) 4274 so->so_laddr_len = (socklen_t)sizeof (sin6_t); 4275 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 4276 bzero(so->so_laddr_sa, so->so_laddr_len); 4277 /* 4278 * Can not assume there is a sa_family for all 4279 * protocol families. 4280 */ 4281 if (so->so_family == AF_INET || so->so_family == AF_INET6) 4282 so->so_laddr_sa->sa_family = so->so_family; 4283 } 4284 #ifdef DEBUG 4285 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4286 pr_addr(so->so_family, so->so_laddr_sa, 4287 (t_uscalar_t)so->so_laddr_len))); 4288 #endif /* DEBUG */ 4289 if (so->so_family == AF_UNIX) { 4290 /* Transport has different name space - return local info */ 4291 error = 0; 4292 goto done; 4293 } 4294 /* Allocate local buffer to use with ioctl */ 4295 addrlen = (t_uscalar_t)so->so_laddr_maxlen; 4296 mutex_exit(&so->so_lock); 4297 addr = kmem_alloc(addrlen, KM_SLEEP); 4298 4299 /* 4300 * Issue TI_GETMYNAME with signals masked. 4301 * Put the result in so_laddr_sa so that getsockname works after 4302 * a shutdown(output). 4303 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4304 * back to the socket. 4305 */ 4306 strbuf.buf = addr; 4307 strbuf.maxlen = addrlen; 4308 strbuf.len = 0; 4309 4310 sigintr(&smask, 0); 4311 res = 0; 4312 ASSERT(CRED()); 4313 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4314 0, K_TO_K, CRED(), &res); 4315 sigunintr(&smask); 4316 4317 mutex_enter(&so->so_lock); 4318 /* 4319 * If there is an error record the error in so_error put don't fail 4320 * the getsockname. Instead fallback on the recorded 4321 * so->so_laddr_sa. 4322 */ 4323 if (error) { 4324 /* 4325 * Various stream head errors can be returned to the ioctl. 4326 * However, it is impossible to determine which ones of 4327 * these are really socket level errors that were incorrectly 4328 * consumed by the ioctl. Thus this code silently ignores the 4329 * error - to code explicitly does not reinstate the error 4330 * using soseterror(). 4331 * Experiments have shows that at least this set of 4332 * errors are reported and should not be reinstated on the 4333 * socket: 4334 * EINVAL E.g. if an I_LINK was in effect when 4335 * getsockname was called. 4336 * EPIPE The ioctl error semantics prefer the write 4337 * side error over the read side error. 4338 */ 4339 error = 0; 4340 } else if (res == 0 && strbuf.len > 0 && 4341 (so->so_state & SS_ISBOUND)) { 4342 ASSERT(strbuf.len <= (int)so->so_laddr_maxlen); 4343 so->so_laddr_len = (socklen_t)strbuf.len; 4344 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 4345 so->so_state |= SS_LADDR_VALID; 4346 } 4347 kmem_free(addr, addrlen); 4348 #ifdef DEBUG 4349 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4350 pr_addr(so->so_family, so->so_laddr_sa, 4351 (t_uscalar_t)so->so_laddr_len))); 4352 #endif /* DEBUG */ 4353 done: 4354 so_unlock_single(so, SOLOCKED); 4355 mutex_exit(&so->so_lock); 4356 return (error); 4357 } 4358 4359 /* 4360 * Get socket options. For SOL_SOCKET options some options are handled 4361 * by the sockfs while others use the value recorded in the sonode as a 4362 * fallback should the T_SVR4_OPTMGMT_REQ fail. 4363 * 4364 * On the return most *optlenp bytes are copied to optval. 4365 */ 4366 int 4367 sotpi_getsockopt(struct sonode *so, int level, int option_name, 4368 void *optval, socklen_t *optlenp, int flags) 4369 { 4370 struct T_optmgmt_req optmgmt_req; 4371 struct T_optmgmt_ack *optmgmt_ack; 4372 struct opthdr oh; 4373 struct opthdr *opt_res; 4374 mblk_t *mp = NULL; 4375 int error = 0; 4376 void *option = NULL; /* Set if fallback value */ 4377 t_uscalar_t maxlen = *optlenp; 4378 t_uscalar_t len; 4379 uint32_t value; 4380 4381 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 4382 so, level, option_name, optval, optlenp, 4383 pr_state(so->so_state, so->so_mode))); 4384 4385 mutex_enter(&so->so_lock); 4386 so_lock_single(so); /* Set SOLOCKED */ 4387 4388 /* 4389 * Check for SOL_SOCKET options. 4390 * Certain SOL_SOCKET options are returned directly whereas 4391 * others only provide a default (fallback) value should 4392 * the T_SVR4_OPTMGMT_REQ fail. 4393 */ 4394 if (level == SOL_SOCKET) { 4395 /* Check parameters */ 4396 switch (option_name) { 4397 case SO_TYPE: 4398 case SO_ERROR: 4399 case SO_DEBUG: 4400 case SO_ACCEPTCONN: 4401 case SO_REUSEADDR: 4402 case SO_KEEPALIVE: 4403 case SO_DONTROUTE: 4404 case SO_BROADCAST: 4405 case SO_USELOOPBACK: 4406 case SO_OOBINLINE: 4407 case SO_SNDBUF: 4408 case SO_RCVBUF: 4409 #ifdef notyet 4410 case SO_SNDLOWAT: 4411 case SO_RCVLOWAT: 4412 case SO_SNDTIMEO: 4413 case SO_RCVTIMEO: 4414 #endif /* notyet */ 4415 case SO_DGRAM_ERRIND: 4416 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 4417 error = EINVAL; 4418 eprintsoline(so, error); 4419 goto done2; 4420 } 4421 break; 4422 case SO_LINGER: 4423 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 4424 error = EINVAL; 4425 eprintsoline(so, error); 4426 goto done2; 4427 } 4428 break; 4429 } 4430 4431 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 4432 4433 switch (option_name) { 4434 case SO_TYPE: 4435 value = so->so_type; 4436 option = &value; 4437 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4438 4439 case SO_ERROR: 4440 value = sogeterr(so); 4441 option = &value; 4442 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4443 4444 case SO_ACCEPTCONN: 4445 if (so->so_state & SS_ACCEPTCONN) 4446 value = SO_ACCEPTCONN; 4447 else 4448 value = 0; 4449 #ifdef DEBUG 4450 if (value) { 4451 dprintso(so, 1, 4452 ("sotpi_getsockopt: 0x%x is set\n", 4453 option_name)); 4454 } else { 4455 dprintso(so, 1, 4456 ("sotpi_getsockopt: 0x%x not set\n", 4457 option_name)); 4458 } 4459 #endif /* DEBUG */ 4460 option = &value; 4461 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4462 4463 case SO_DEBUG: 4464 case SO_REUSEADDR: 4465 case SO_KEEPALIVE: 4466 case SO_DONTROUTE: 4467 case SO_BROADCAST: 4468 case SO_USELOOPBACK: 4469 case SO_OOBINLINE: 4470 case SO_DGRAM_ERRIND: 4471 value = (so->so_options & option_name); 4472 #ifdef DEBUG 4473 if (value) { 4474 dprintso(so, 1, 4475 ("sotpi_getsockopt: 0x%x is set\n", 4476 option_name)); 4477 } else { 4478 dprintso(so, 1, 4479 ("sotpi_getsockopt: 0x%x not set\n", 4480 option_name)); 4481 } 4482 #endif /* DEBUG */ 4483 option = &value; 4484 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4485 4486 /* 4487 * The following options are only returned by sockfs when the 4488 * T_SVR4_OPTMGMT_REQ fails. 4489 */ 4490 case SO_LINGER: 4491 option = &so->so_linger; 4492 len = (t_uscalar_t)sizeof (struct linger); 4493 break; 4494 case SO_SNDBUF: { 4495 ssize_t lvalue; 4496 4497 /* 4498 * If the option has not been set then get a default 4499 * value from the read queue. This value is 4500 * returned if the transport fails 4501 * the T_SVR4_OPTMGMT_REQ. 4502 */ 4503 lvalue = so->so_sndbuf; 4504 if (lvalue == 0) { 4505 mutex_exit(&so->so_lock); 4506 (void) strqget(strvp2wq(SOTOV(so))->q_next, 4507 QHIWAT, 0, &lvalue); 4508 mutex_enter(&so->so_lock); 4509 dprintso(so, 1, 4510 ("got SO_SNDBUF %ld from q\n", lvalue)); 4511 } 4512 value = (int)lvalue; 4513 option = &value; 4514 len = (t_uscalar_t)sizeof (so->so_sndbuf); 4515 break; 4516 } 4517 case SO_RCVBUF: { 4518 ssize_t lvalue; 4519 4520 /* 4521 * If the option has not been set then get a default 4522 * value from the read queue. This value is 4523 * returned if the transport fails 4524 * the T_SVR4_OPTMGMT_REQ. 4525 * 4526 * XXX If SO_RCVBUF has been set and this is an 4527 * XPG 4.2 application then do not ask the transport 4528 * since the transport might adjust the value and not 4529 * return exactly what was set by the application. 4530 * For non-XPG 4.2 application we return the value 4531 * that the transport is actually using. 4532 */ 4533 lvalue = so->so_rcvbuf; 4534 if (lvalue == 0) { 4535 mutex_exit(&so->so_lock); 4536 (void) strqget(RD(strvp2wq(SOTOV(so))), 4537 QHIWAT, 0, &lvalue); 4538 mutex_enter(&so->so_lock); 4539 dprintso(so, 1, 4540 ("got SO_RCVBUF %ld from q\n", lvalue)); 4541 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 4542 value = (int)lvalue; 4543 option = &value; 4544 goto copyout; /* skip asking transport */ 4545 } 4546 value = (int)lvalue; 4547 option = &value; 4548 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 4549 break; 4550 } 4551 #ifdef notyet 4552 /* 4553 * We do not implement the semantics of these options 4554 * thus we shouldn't implement the options either. 4555 */ 4556 case SO_SNDLOWAT: 4557 value = so->so_sndlowat; 4558 option = &value; 4559 break; 4560 case SO_RCVLOWAT: 4561 value = so->so_rcvlowat; 4562 option = &value; 4563 break; 4564 case SO_SNDTIMEO: 4565 value = so->so_sndtimeo; 4566 option = &value; 4567 break; 4568 case SO_RCVTIMEO: 4569 value = so->so_rcvtimeo; 4570 option = &value; 4571 break; 4572 #endif /* notyet */ 4573 } 4574 } 4575 4576 if (so->so_family == AF_NCA) { 4577 goto done2; 4578 } 4579 4580 mutex_exit(&so->so_lock); 4581 4582 /* Send request */ 4583 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 4584 optmgmt_req.MGMT_flags = T_CHECK; 4585 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 4586 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 4587 4588 oh.level = level; 4589 oh.name = option_name; 4590 oh.len = maxlen; 4591 4592 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 4593 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 4594 /* Let option management work in the presence of data flow control */ 4595 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 4596 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 4597 mp = NULL; 4598 mutex_enter(&so->so_lock); 4599 if (error) { 4600 eprintsoline(so, error); 4601 goto done2; 4602 } 4603 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 4604 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 4605 if (error) { 4606 if (option != NULL) { 4607 /* We have a fallback value */ 4608 error = 0; 4609 goto copyout; 4610 } 4611 eprintsoline(so, error); 4612 goto done2; 4613 } 4614 ASSERT(mp); 4615 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 4616 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 4617 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 4618 if (opt_res == NULL) { 4619 if (option != NULL) { 4620 /* We have a fallback value */ 4621 error = 0; 4622 goto copyout; 4623 } 4624 error = EPROTO; 4625 eprintsoline(so, error); 4626 goto done; 4627 } 4628 option = &opt_res[1]; 4629 4630 /* check to ensure that the option is within bounds */ 4631 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 4632 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 4633 if (option != NULL) { 4634 /* We have a fallback value */ 4635 error = 0; 4636 goto copyout; 4637 } 4638 error = EPROTO; 4639 eprintsoline(so, error); 4640 goto done; 4641 } 4642 4643 len = opt_res->len; 4644 4645 copyout: { 4646 t_uscalar_t size = MIN(len, maxlen); 4647 bcopy(option, optval, size); 4648 bcopy(&size, optlenp, sizeof (size)); 4649 } 4650 done: 4651 freemsg(mp); 4652 done2: 4653 so_unlock_single(so, SOLOCKED); 4654 mutex_exit(&so->so_lock); 4655 return (error); 4656 } 4657 4658 /* 4659 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 4660 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 4661 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 4662 * setsockopt has to work even if the transport does not support the option. 4663 */ 4664 int 4665 sotpi_setsockopt(struct sonode *so, int level, int option_name, 4666 const void *optval, t_uscalar_t optlen) 4667 { 4668 struct T_optmgmt_req optmgmt_req; 4669 struct opthdr oh; 4670 mblk_t *mp; 4671 int error = 0; 4672 boolean_t handled = B_FALSE; 4673 4674 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 4675 so, level, option_name, optval, optlen, 4676 pr_state(so->so_state, so->so_mode))); 4677 4678 4679 /* X/Open requires this check */ 4680 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4681 if (xnet_check_print) 4682 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 4683 return (EINVAL); 4684 } 4685 4686 /* Caller allocates aligned optval, or passes null */ 4687 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 4688 /* If optval is null optlen is 0, and vice-versa */ 4689 ASSERT(optval != NULL || optlen == 0); 4690 ASSERT(optlen != 0 || optval == NULL); 4691 4692 mutex_enter(&so->so_lock); 4693 so_lock_single(so); /* Set SOLOCKED */ 4694 mutex_exit(&so->so_lock); 4695 4696 if (so->so_family == AF_NCA) { 4697 /* Ignore any flow control problems with the transport. */ 4698 mutex_enter(&so->so_lock); 4699 goto done; 4700 } 4701 4702 /* 4703 * For SOCKET or TCP level options, try to set it here itself 4704 * provided socket has not been popped and we know the tcp 4705 * structure (stored in so_priv). 4706 */ 4707 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 4708 (so->so_family == AF_INET || so->so_family == AF_INET6) && 4709 (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) { 4710 tcp_t *tcp = so->so_priv; 4711 boolean_t onoff; 4712 4713 #define intvalue (*(int32_t *)optval) 4714 4715 switch (level) { 4716 case SOL_SOCKET: 4717 switch (option_name) { /* Check length param */ 4718 case SO_DEBUG: 4719 case SO_REUSEADDR: 4720 case SO_DONTROUTE: 4721 case SO_BROADCAST: 4722 case SO_USELOOPBACK: 4723 case SO_OOBINLINE: 4724 case SO_DGRAM_ERRIND: 4725 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 4726 error = EINVAL; 4727 eprintsoline(so, error); 4728 mutex_enter(&so->so_lock); 4729 goto done2; 4730 } 4731 ASSERT(optval); 4732 onoff = intvalue != 0; 4733 handled = B_TRUE; 4734 break; 4735 case SO_LINGER: 4736 if (optlen != 4737 (t_uscalar_t)sizeof (struct linger)) { 4738 error = EINVAL; 4739 eprintsoline(so, error); 4740 mutex_enter(&so->so_lock); 4741 goto done2; 4742 } 4743 ASSERT(optval); 4744 handled = B_TRUE; 4745 break; 4746 } 4747 4748 switch (option_name) { /* Do actions */ 4749 case SO_LINGER: { 4750 struct linger *lgr = (struct linger *)optval; 4751 4752 if (lgr->l_onoff) { 4753 tcp->tcp_linger = 1; 4754 tcp->tcp_lingertime = lgr->l_linger; 4755 so->so_linger.l_onoff = SO_LINGER; 4756 so->so_options |= SO_LINGER; 4757 } else { 4758 tcp->tcp_linger = 0; 4759 tcp->tcp_lingertime = 0; 4760 so->so_linger.l_onoff = 0; 4761 so->so_options &= ~SO_LINGER; 4762 } 4763 so->so_linger.l_linger = lgr->l_linger; 4764 handled = B_TRUE; 4765 break; 4766 } 4767 case SO_DEBUG: 4768 tcp->tcp_debug = onoff; 4769 #ifdef SOCK_TEST 4770 if (intvalue & 2) 4771 sock_test_timelimit = 10 * hz; 4772 else 4773 sock_test_timelimit = 0; 4774 4775 if (intvalue & 4) 4776 do_useracc = 0; 4777 else 4778 do_useracc = 1; 4779 #endif /* SOCK_TEST */ 4780 break; 4781 case SO_DONTROUTE: 4782 /* 4783 * SO_DONTROUTE, SO_USELOOPBACK and 4784 * SO_BROADCAST are only of interest to IP. 4785 * We track them here only so 4786 * that we can report their current value. 4787 */ 4788 tcp->tcp_dontroute = onoff; 4789 if (onoff) 4790 so->so_options |= option_name; 4791 else 4792 so->so_options &= ~option_name; 4793 break; 4794 case SO_USELOOPBACK: 4795 tcp->tcp_useloopback = onoff; 4796 if (onoff) 4797 so->so_options |= option_name; 4798 else 4799 so->so_options &= ~option_name; 4800 break; 4801 case SO_BROADCAST: 4802 tcp->tcp_broadcast = onoff; 4803 if (onoff) 4804 so->so_options |= option_name; 4805 else 4806 so->so_options &= ~option_name; 4807 break; 4808 case SO_REUSEADDR: 4809 tcp->tcp_reuseaddr = onoff; 4810 if (onoff) 4811 so->so_options |= option_name; 4812 else 4813 so->so_options &= ~option_name; 4814 break; 4815 case SO_OOBINLINE: 4816 tcp->tcp_oobinline = onoff; 4817 if (onoff) 4818 so->so_options |= option_name; 4819 else 4820 so->so_options &= ~option_name; 4821 break; 4822 case SO_DGRAM_ERRIND: 4823 tcp->tcp_dgram_errind = onoff; 4824 if (onoff) 4825 so->so_options |= option_name; 4826 else 4827 so->so_options &= ~option_name; 4828 break; 4829 } 4830 break; 4831 case IPPROTO_TCP: 4832 switch (option_name) { 4833 case TCP_NODELAY: 4834 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 4835 error = EINVAL; 4836 eprintsoline(so, error); 4837 mutex_enter(&so->so_lock); 4838 goto done2; 4839 } 4840 ASSERT(optval); 4841 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 4842 handled = B_TRUE; 4843 break; 4844 } 4845 break; 4846 default: 4847 handled = B_FALSE; 4848 break; 4849 } 4850 } 4851 4852 if (handled) { 4853 mutex_enter(&so->so_lock); 4854 goto done2; 4855 } 4856 4857 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 4858 optmgmt_req.MGMT_flags = T_NEGOTIATE; 4859 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 4860 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 4861 4862 oh.level = level; 4863 oh.name = option_name; 4864 oh.len = optlen; 4865 4866 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 4867 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 4868 /* Let option management work in the presence of data flow control */ 4869 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 4870 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 4871 mp = NULL; 4872 mutex_enter(&so->so_lock); 4873 if (error) { 4874 eprintsoline(so, error); 4875 goto done; 4876 } 4877 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 4878 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 4879 if (error) { 4880 eprintsoline(so, error); 4881 goto done; 4882 } 4883 ASSERT(mp); 4884 /* No need to verify T_optmgmt_ack */ 4885 freemsg(mp); 4886 done: 4887 /* 4888 * Check for SOL_SOCKET options and record their values. 4889 * If we know about a SOL_SOCKET parameter and the transport 4890 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 4891 * EPROTO) we let the setsockopt succeed. 4892 */ 4893 if (level == SOL_SOCKET) { 4894 /* Check parameters */ 4895 switch (option_name) { 4896 case SO_DEBUG: 4897 case SO_REUSEADDR: 4898 case SO_KEEPALIVE: 4899 case SO_DONTROUTE: 4900 case SO_BROADCAST: 4901 case SO_USELOOPBACK: 4902 case SO_OOBINLINE: 4903 case SO_SNDBUF: 4904 case SO_RCVBUF: 4905 #ifdef notyet 4906 case SO_SNDLOWAT: 4907 case SO_RCVLOWAT: 4908 case SO_SNDTIMEO: 4909 case SO_RCVTIMEO: 4910 #endif /* notyet */ 4911 case SO_DGRAM_ERRIND: 4912 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 4913 error = EINVAL; 4914 eprintsoline(so, error); 4915 goto done2; 4916 } 4917 ASSERT(optval); 4918 handled = B_TRUE; 4919 break; 4920 case SO_LINGER: 4921 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 4922 error = EINVAL; 4923 eprintsoline(so, error); 4924 goto done2; 4925 } 4926 ASSERT(optval); 4927 handled = B_TRUE; 4928 break; 4929 } 4930 4931 #define intvalue (*(int32_t *)optval) 4932 4933 switch (option_name) { 4934 case SO_TYPE: 4935 case SO_ERROR: 4936 case SO_ACCEPTCONN: 4937 /* Can't be set */ 4938 error = ENOPROTOOPT; 4939 goto done2; 4940 case SO_LINGER: { 4941 struct linger *l = (struct linger *)optval; 4942 4943 so->so_linger.l_linger = l->l_linger; 4944 if (l->l_onoff) { 4945 so->so_linger.l_onoff = SO_LINGER; 4946 so->so_options |= SO_LINGER; 4947 } else { 4948 so->so_linger.l_onoff = 0; 4949 so->so_options &= ~SO_LINGER; 4950 } 4951 break; 4952 } 4953 4954 case SO_DEBUG: 4955 #ifdef SOCK_TEST 4956 if (intvalue & 2) 4957 sock_test_timelimit = 10 * hz; 4958 else 4959 sock_test_timelimit = 0; 4960 4961 if (intvalue & 4) 4962 do_useracc = 0; 4963 else 4964 do_useracc = 1; 4965 #endif /* SOCK_TEST */ 4966 /* FALLTHRU */ 4967 case SO_REUSEADDR: 4968 case SO_KEEPALIVE: 4969 case SO_DONTROUTE: 4970 case SO_BROADCAST: 4971 case SO_USELOOPBACK: 4972 case SO_OOBINLINE: 4973 case SO_DGRAM_ERRIND: 4974 if (intvalue != 0) { 4975 dprintso(so, 1, 4976 ("sotpi_setsockopt: setting 0x%x\n", 4977 option_name)); 4978 so->so_options |= option_name; 4979 } else { 4980 dprintso(so, 1, 4981 ("sotpi_setsockopt: clearing 0x%x\n", 4982 option_name)); 4983 so->so_options &= ~option_name; 4984 } 4985 break; 4986 /* 4987 * The following options are only returned by us when the 4988 * T_SVR4_OPTMGMT_REQ fails. 4989 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 4990 * since the transport might adjust the value and not 4991 * return exactly what was set by the application. 4992 */ 4993 case SO_SNDBUF: 4994 so->so_sndbuf = intvalue; 4995 break; 4996 case SO_RCVBUF: 4997 so->so_rcvbuf = intvalue; 4998 break; 4999 #ifdef notyet 5000 /* 5001 * We do not implement the semantics of these options 5002 * thus we shouldn't implement the options either. 5003 */ 5004 case SO_SNDLOWAT: 5005 so->so_sndlowat = intvalue; 5006 break; 5007 case SO_RCVLOWAT: 5008 so->so_rcvlowat = intvalue; 5009 break; 5010 case SO_SNDTIMEO: 5011 so->so_sndtimeo = intvalue; 5012 break; 5013 case SO_RCVTIMEO: 5014 so->so_rcvtimeo = intvalue; 5015 break; 5016 #endif /* notyet */ 5017 } 5018 #undef intvalue 5019 5020 if (error) { 5021 if ((error == ENOPROTOOPT || error == EPROTO || 5022 error == EINVAL) && handled) { 5023 dprintso(so, 1, 5024 ("setsockopt: ignoring error %d for 0x%x\n", 5025 error, option_name)); 5026 error = 0; 5027 } 5028 } 5029 } 5030 done2: 5031 ret: 5032 so_unlock_single(so, SOLOCKED); 5033 mutex_exit(&so->so_lock); 5034 return (error); 5035 } 5036