1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/inttypes.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/buf.h> 35 #include <sys/conf.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/sysmacros.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/user.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/esunddi.h> 49 #include <sys/flock.h> 50 #include <sys/modctl.h> 51 #include <sys/vtrace.h> 52 #include <sys/strsun.h> 53 #include <sys/cmn_err.h> 54 #include <sys/proc.h> 55 #include <sys/ddi.h> 56 #include <sys/kmem_impl.h> 57 58 #include <sys/suntpi.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/socketvar.h> 62 #include <netinet/in.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 68 #include <inet/kssl/ksslapi.h> 69 70 #include <c2/audit.h> 71 72 #include <sys/dcopy.h> 73 74 int so_default_version = SOV_SOCKSTREAM; 75 76 #ifdef DEBUG 77 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 78 int sockdebug = 0; 79 80 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 81 int sockprinterr = 0; 82 83 /* 84 * Set so_default_options to SO_DEBUG is all sockets should be created 85 * with SO_DEBUG set. This is needed to get debug printouts from the 86 * socket() call itself. 87 */ 88 int so_default_options = 0; 89 #endif /* DEBUG */ 90 91 #ifdef SOCK_TEST 92 /* 93 * Set to number of ticks to limit cv_waits for code coverage testing. 94 * Set to 1000 when SO_DEBUG is set to 2. 95 */ 96 clock_t sock_test_timelimit = 0; 97 #endif /* SOCK_TEST */ 98 99 /* 100 * For concurrency testing of e.g. opening /dev/ip which does not 101 * handle T_INFO_REQ messages. 102 */ 103 int so_no_tinfo = 0; 104 105 /* 106 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 107 * to simply ignore the T_CAPABILITY_REQ. 108 */ 109 clock_t sock_capability_timeout = 2; /* seconds */ 110 111 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 112 static void so_removehooks(struct sonode *so); 113 114 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 115 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 116 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 117 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 118 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 119 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 120 121 static int tlitosyserr(int terr); 122 123 /* 124 * Sodirect kmem_cache and put/wakeup functions. 125 */ 126 struct kmem_cache *socktpi_sod_cache; 127 static int sodput(sodirect_t *, mblk_t *); 128 static void sodwakeup(sodirect_t *); 129 130 /* 131 * Called by sockinit() when sockfs is loaded. 132 * 133 * Check for uioasync dcopy support and if supported 134 * allocate the sodirect_t kmem_cache socktpi_sod_cache. 135 */ 136 int 137 sostr_init() 138 { 139 if (uioasync.enabled == B_TRUE && modload("misc", "dcopy") == -1) { 140 /* No dcopy KAPI driver, disable uioa */ 141 uioasync.enabled = B_FALSE; 142 } 143 144 if (uioasync.enabled == B_TRUE) { 145 /* Uioasync enabled so sodirect will be used */ 146 socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache", 147 sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 148 } 149 150 return (0); 151 } 152 153 /* 154 * Convert a socket to a stream. Invoked when the illusory sockmod 155 * is popped from the stream. 156 * Change the stream head back to default operation without losing 157 * any messages (T_conn_ind's are moved to the stream head queue). 158 */ 159 int 160 so_sock2stream(struct sonode *so) 161 { 162 struct vnode *vp = SOTOV(so); 163 queue_t *rq; 164 mblk_t *mp; 165 int error = 0; 166 167 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 168 169 mutex_enter(&so->so_lock); 170 so_lock_single(so); 171 172 ASSERT(so->so_version != SOV_STREAM); 173 174 if (so->so_state & SS_DIRECT) { 175 mblk_t **mpp; 176 int rval; 177 178 /* 179 * Tell the transport below that sockmod is being popped 180 */ 181 mutex_exit(&so->so_lock); 182 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(), 183 &rval); 184 mutex_enter(&so->so_lock); 185 if (error != 0) { 186 dprintso(so, 0, ("so_sock2stream(%p): " 187 "_SIOCSOCKFALLBACK failed\n", so)); 188 goto exit; 189 } 190 so->so_state &= ~SS_DIRECT; 191 192 for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL; 193 mpp = &mp->b_next) { 194 struct T_conn_ind *conn_ind; 195 196 /* 197 * strsock_proto() has already verified the length of 198 * this message block. 199 */ 200 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 201 202 conn_ind = (struct T_conn_ind *)mp->b_rptr; 203 if (conn_ind->OPT_length == 0 && 204 conn_ind->OPT_offset == 0) 205 continue; 206 207 if (DB_REF(mp) > 1) { 208 mblk_t *newmp; 209 size_t length; 210 cred_t *cr; 211 212 /* 213 * Copy the message block because it is used 214 * elsewhere, too. 215 */ 216 length = MBLKL(mp); 217 newmp = soallocproto(length, _ALLOC_INTR); 218 if (newmp == NULL) { 219 error = EINTR; 220 goto exit; 221 } 222 bcopy(mp->b_rptr, newmp->b_wptr, length); 223 newmp->b_wptr += length; 224 newmp->b_next = mp->b_next; 225 cr = DB_CRED(mp); 226 if (cr != NULL) 227 mblk_setcred(newmp, cr); 228 DB_CPID(newmp) = DB_CPID(mp); 229 230 /* 231 * Link the new message block into the queue 232 * and free the old one. 233 */ 234 *mpp = newmp; 235 mp->b_next = NULL; 236 freemsg(mp); 237 238 mp = newmp; 239 conn_ind = (struct T_conn_ind *)mp->b_rptr; 240 } 241 242 /* 243 * Remove options added by TCP for accept fast-path. 244 */ 245 conn_ind->OPT_length = 0; 246 conn_ind->OPT_offset = 0; 247 } 248 } 249 250 so->so_version = SOV_STREAM; 251 so->so_priv = NULL; 252 253 /* 254 * Remove the hooks in the stream head to avoid queuing more 255 * packets in sockfs. 256 */ 257 mutex_exit(&so->so_lock); 258 so_removehooks(so); 259 mutex_enter(&so->so_lock); 260 261 /* 262 * Clear any state related to urgent data. Leave any T_EXDATA_IND 263 * on the queue - the behavior of urgent data after a switch is 264 * left undefined. 265 */ 266 so->so_error = so->so_delayed_error = 0; 267 freemsg(so->so_oobmsg); 268 so->so_oobmsg = NULL; 269 so->so_oobsigcnt = so->so_oobcnt = 0; 270 271 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 272 SS_HASCONNIND|SS_SAVEDEOR); 273 ASSERT(so_verify_oobstate(so)); 274 275 freemsg(so->so_ack_mp); 276 so->so_ack_mp = NULL; 277 278 /* 279 * Flush the T_DISCON_IND on so_discon_ind_mp. 280 */ 281 so_flush_discon_ind(so); 282 283 /* 284 * Move any queued T_CONN_IND messages to stream head queue. 285 */ 286 rq = RD(strvp2wq(vp)); 287 while ((mp = so->so_conn_ind_head) != NULL) { 288 so->so_conn_ind_head = mp->b_next; 289 mp->b_next = NULL; 290 if (so->so_conn_ind_head == NULL) { 291 ASSERT(so->so_conn_ind_tail == mp); 292 so->so_conn_ind_tail = NULL; 293 } 294 dprintso(so, 0, 295 ("so_sock2stream(%p): moving T_CONN_IND\n", 296 so)); 297 298 /* Drop lock across put() */ 299 mutex_exit(&so->so_lock); 300 put(rq, mp); 301 mutex_enter(&so->so_lock); 302 } 303 304 exit: 305 ASSERT(MUTEX_HELD(&so->so_lock)); 306 so_unlock_single(so, SOLOCKED); 307 mutex_exit(&so->so_lock); 308 return (error); 309 } 310 311 /* 312 * Covert a stream back to a socket. This is invoked when the illusory 313 * sockmod is pushed on a stream (where the stream was "created" by 314 * popping the illusory sockmod). 315 * This routine can not recreate the socket state (certain aspects of 316 * it like urgent data state and the bound/connected addresses for AF_UNIX 317 * sockets can not be recreated by asking the transport for information). 318 * Thus this routine implicitly assumes that the socket is in an initial 319 * state (as if it was just created). It flushes any messages queued on the 320 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 321 */ 322 void 323 so_stream2sock(struct sonode *so) 324 { 325 struct vnode *vp = SOTOV(so); 326 327 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 328 329 mutex_enter(&so->so_lock); 330 so_lock_single(so); 331 ASSERT(so->so_version == SOV_STREAM); 332 so->so_version = SOV_SOCKSTREAM; 333 so->so_pushcnt = 0; 334 mutex_exit(&so->so_lock); 335 336 /* 337 * Set a permenent error to force any thread in sorecvmsg to 338 * return (and drop SOREADLOCKED). Clear the error once 339 * we have SOREADLOCKED. 340 * This makes a read sleeping during the I_PUSH of sockmod return 341 * EIO. 342 */ 343 strsetrerror(SOTOV(so), EIO, 1, NULL); 344 345 /* 346 * Get the read lock before flushing data to avoid 347 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 348 */ 349 mutex_enter(&so->so_lock); 350 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 351 mutex_exit(&so->so_lock); 352 353 strsetrerror(SOTOV(so), 0, 0, NULL); 354 so_installhooks(so); 355 356 /* 357 * Flush everything on the read queue. 358 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 359 * remain; those types of messages would confuse sockfs. 360 */ 361 strflushrq(vp, FLUSHALL); 362 mutex_enter(&so->so_lock); 363 364 /* 365 * Flush the T_DISCON_IND on so_discon_ind_mp. 366 */ 367 so_flush_discon_ind(so); 368 so_unlock_read(so); /* Clear SOREADLOCKED */ 369 370 so_unlock_single(so, SOLOCKED); 371 mutex_exit(&so->so_lock); 372 } 373 374 /* 375 * Install the hooks in the stream head. 376 */ 377 void 378 so_installhooks(struct sonode *so) 379 { 380 struct vnode *vp = SOTOV(so); 381 382 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 383 strsock_proto, strsock_misc); 384 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 385 } 386 387 /* 388 * Remove the hooks in the stream head. 389 */ 390 static void 391 so_removehooks(struct sonode *so) 392 { 393 struct vnode *vp = SOTOV(so); 394 395 strsetrputhooks(vp, 0, NULL, NULL); 396 strsetwputhooks(vp, 0, STRTIMOUT); 397 /* 398 * Leave read behavior as it would have been for a normal 399 * stream i.e. a read of an M_PROTO will fail. 400 */ 401 } 402 403 /* 404 * Initialize the streams side of a socket including 405 * T_info_req/ack processing. If tso is not NULL its values are used thereby 406 * avoiding the T_INFO_REQ. 407 */ 408 int 409 so_strinit(struct sonode *so, struct sonode *tso) 410 { 411 struct vnode *vp = SOTOV(so); 412 struct stdata *stp; 413 mblk_t *mp; 414 int error; 415 416 dprintso(so, 1, ("so_strinit(%p)\n", so)); 417 418 /* Preallocate an unbind_req message */ 419 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 420 mutex_enter(&so->so_lock); 421 so->so_unbind_mp = mp; 422 #ifdef DEBUG 423 so->so_options = so_default_options; 424 #endif /* DEBUG */ 425 mutex_exit(&so->so_lock); 426 427 so_installhooks(so); 428 429 /* 430 * The T_CAPABILITY_REQ should be the first message sent down because 431 * at least TCP has a fast-path for this which avoids timeouts while 432 * waiting for the T_CAPABILITY_ACK under high system load. 433 */ 434 if (tso == NULL) { 435 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 436 if (error) 437 return (error); 438 } else { 439 mutex_enter(&so->so_lock); 440 so->so_tsdu_size = tso->so_tsdu_size; 441 so->so_etsdu_size = tso->so_etsdu_size; 442 so->so_addr_size = tso->so_addr_size; 443 so->so_opt_size = tso->so_opt_size; 444 so->so_tidu_size = tso->so_tidu_size; 445 so->so_serv_type = tso->so_serv_type; 446 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 447 mutex_exit(&so->so_lock); 448 449 /* the following do_tcapability may update so->so_mode */ 450 if ((tso->so_serv_type != T_CLTS) && 451 !(tso->so_state & SS_DIRECT)) { 452 error = do_tcapability(so, TC1_ACCEPTOR_ID); 453 if (error) 454 return (error); 455 } 456 } 457 /* 458 * If the addr_size is 0 we treat it as already bound 459 * and connected. This is used by the routing socket. 460 * We set the addr_size to something to allocate a the address 461 * structures. 462 */ 463 if (so->so_addr_size == 0) { 464 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 465 /* Address size can vary with address families. */ 466 if (so->so_family == AF_INET6) 467 so->so_addr_size = 468 (t_scalar_t)sizeof (struct sockaddr_in6); 469 else 470 so->so_addr_size = 471 (t_scalar_t)sizeof (struct sockaddr_in); 472 ASSERT(so->so_unbind_mp); 473 } 474 /* 475 * Allocate the addresses. 476 */ 477 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 478 ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0); 479 so->so_laddr_maxlen = so->so_faddr_maxlen = 480 P2ROUNDUP(so->so_addr_size, KMEM_ALIGN); 481 so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP); 482 so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa 483 + so->so_laddr_maxlen); 484 485 if (so->so_family == AF_UNIX) { 486 /* 487 * Initialize AF_UNIX related fields. 488 */ 489 bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr)); 490 bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr)); 491 } 492 493 stp = vp->v_stream; 494 /* 495 * Have to keep minpsz at zero in order to allow write/send of zero 496 * bytes. 497 */ 498 mutex_enter(&stp->sd_lock); 499 if (stp->sd_qn_minpsz == 1) 500 stp->sd_qn_minpsz = 0; 501 mutex_exit(&stp->sd_lock); 502 503 /* 504 * If sodirect capable allocate and initialize sodirect_t. 505 * Note, SS_SODIRECT is set in socktpi_open(). 506 */ 507 if (so->so_state & SS_SODIRECT) { 508 sodirect_t *sodp; 509 510 ASSERT(so->so_direct == NULL); 511 512 sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP); 513 sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT; 514 sodp->sod_want = 0; 515 sodp->sod_q = RD(stp->sd_wrq); 516 sodp->sod_enqueue = sodput; 517 sodp->sod_wakeup = sodwakeup; 518 sodp->sod_uioafh = NULL; 519 sodp->sod_uioaft = NULL; 520 sodp->sod_lock = &stp->sd_lock; 521 /* 522 * Remainder of the sod_uioa members are left uninitialized 523 * but will be initialized later by uioainit() before uioa 524 * is enabled. 525 */ 526 sodp->sod_uioa.uioa_state = UIOA_ALLOC; 527 so->so_direct = sodp; 528 stp->sd_sodirect = sodp; 529 } 530 531 return (0); 532 } 533 534 static void 535 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 536 { 537 so->so_tsdu_size = tia->TSDU_size; 538 so->so_etsdu_size = tia->ETSDU_size; 539 so->so_addr_size = tia->ADDR_size; 540 so->so_opt_size = tia->OPT_size; 541 so->so_tidu_size = tia->TIDU_size; 542 so->so_serv_type = tia->SERV_type; 543 switch (tia->CURRENT_state) { 544 case TS_UNBND: 545 break; 546 case TS_IDLE: 547 so->so_state |= SS_ISBOUND; 548 so->so_laddr_len = 0; 549 so->so_state &= ~SS_LADDR_VALID; 550 break; 551 case TS_DATA_XFER: 552 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 553 so->so_laddr_len = 0; 554 so->so_faddr_len = 0; 555 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 556 break; 557 } 558 559 /* 560 * Heuristics for determining the socket mode flags 561 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 562 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 563 * from the info ack. 564 */ 565 if (so->so_serv_type == T_CLTS) { 566 so->so_mode |= SM_ATOMIC | SM_ADDR; 567 } else { 568 so->so_mode |= SM_CONNREQUIRED; 569 if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2) 570 so->so_mode |= SM_EXDATA; 571 } 572 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 573 /* Semantics are to discard tail end of messages */ 574 so->so_mode |= SM_ATOMIC; 575 } 576 if (so->so_family == AF_UNIX) { 577 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 578 if (so->so_addr_size == -1) { 579 /* MAXPATHLEN + soun_family + nul termination */ 580 so->so_addr_size = (t_scalar_t)(MAXPATHLEN + 581 sizeof (short) + 1); 582 } 583 if (so->so_type == SOCK_STREAM) { 584 /* 585 * Make it into a byte-stream transport. 586 * SOCK_SEQPACKET sockets are unchanged. 587 */ 588 so->so_tsdu_size = 0; 589 } 590 } else if (so->so_addr_size == -1) { 591 /* 592 * Logic extracted from sockmod - have to pick some max address 593 * length in order to preallocate the addresses. 594 */ 595 so->so_addr_size = SOA_DEFSIZE; 596 } 597 if (so->so_tsdu_size == 0) 598 so->so_mode |= SM_BYTESTREAM; 599 } 600 601 static int 602 check_tinfo(struct sonode *so) 603 { 604 /* Consistency checks */ 605 if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) { 606 eprintso(so, ("service type and socket type mismatch\n")); 607 eprintsoline(so, EPROTO); 608 return (EPROTO); 609 } 610 if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) { 611 eprintso(so, ("service type and socket type mismatch\n")); 612 eprintsoline(so, EPROTO); 613 return (EPROTO); 614 } 615 if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) { 616 eprintso(so, ("service type and socket type mismatch\n")); 617 eprintsoline(so, EPROTO); 618 return (EPROTO); 619 } 620 if (so->so_family == AF_INET && 621 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 622 eprintso(so, 623 ("AF_INET must have sockaddr_in address length. Got %d\n", 624 so->so_addr_size)); 625 eprintsoline(so, EMSGSIZE); 626 return (EMSGSIZE); 627 } 628 if (so->so_family == AF_INET6 && 629 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 630 eprintso(so, 631 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 632 so->so_addr_size)); 633 eprintsoline(so, EMSGSIZE); 634 return (EMSGSIZE); 635 } 636 637 dprintso(so, 1, ( 638 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 639 so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size, 640 so->so_addr_size, so->so_opt_size, 641 so->so_tidu_size)); 642 dprintso(so, 1, ("tinfo: so_state %s\n", 643 pr_state(so->so_state, so->so_mode))); 644 return (0); 645 } 646 647 /* 648 * Send down T_info_req and wait for the ack. 649 * Record interesting T_info_ack values in the sonode. 650 */ 651 static int 652 do_tinfo(struct sonode *so) 653 { 654 struct T_info_req tir; 655 mblk_t *mp; 656 int error; 657 658 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 659 660 if (so_no_tinfo) { 661 so->so_addr_size = 0; 662 return (0); 663 } 664 665 dprintso(so, 1, ("do_tinfo(%p)\n", so)); 666 667 /* Send T_INFO_REQ */ 668 tir.PRIM_type = T_INFO_REQ; 669 mp = soallocproto1(&tir, sizeof (tir), 670 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 671 _ALLOC_INTR); 672 if (mp == NULL) { 673 eprintsoline(so, ENOBUFS); 674 return (ENOBUFS); 675 } 676 /* T_INFO_REQ has to be M_PCPROTO */ 677 DB_TYPE(mp) = M_PCPROTO; 678 679 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 680 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 681 if (error) { 682 eprintsoline(so, error); 683 return (error); 684 } 685 mutex_enter(&so->so_lock); 686 /* Wait for T_INFO_ACK */ 687 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 688 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 689 mutex_exit(&so->so_lock); 690 eprintsoline(so, error); 691 return (error); 692 } 693 694 ASSERT(mp); 695 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 696 mutex_exit(&so->so_lock); 697 freemsg(mp); 698 return (check_tinfo(so)); 699 } 700 701 /* 702 * Send down T_capability_req and wait for the ack. 703 * Record interesting T_capability_ack values in the sonode. 704 */ 705 static int 706 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 707 { 708 struct T_capability_req tcr; 709 struct T_capability_ack *tca; 710 mblk_t *mp; 711 int error; 712 713 ASSERT(cap_bits1 != 0); 714 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 715 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 716 717 if (so->so_provinfo->tpi_capability == PI_NO) 718 return (do_tinfo(so)); 719 720 if (so_no_tinfo) { 721 so->so_addr_size = 0; 722 if ((cap_bits1 &= ~TC1_INFO) == 0) 723 return (0); 724 } 725 726 dprintso(so, 1, ("do_tcapability(%p)\n", so)); 727 728 /* Send T_CAPABILITY_REQ */ 729 tcr.PRIM_type = T_CAPABILITY_REQ; 730 tcr.CAP_bits1 = cap_bits1; 731 mp = soallocproto1(&tcr, sizeof (tcr), 732 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 733 _ALLOC_INTR); 734 if (mp == NULL) { 735 eprintsoline(so, ENOBUFS); 736 return (ENOBUFS); 737 } 738 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 739 DB_TYPE(mp) = M_PCPROTO; 740 741 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 742 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 743 if (error) { 744 eprintsoline(so, error); 745 return (error); 746 } 747 mutex_enter(&so->so_lock); 748 /* Wait for T_CAPABILITY_ACK */ 749 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 750 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 751 mutex_exit(&so->so_lock); 752 PI_PROVLOCK(so->so_provinfo); 753 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) 754 so->so_provinfo->tpi_capability = PI_NO; 755 PI_PROVUNLOCK(so->so_provinfo); 756 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 757 if (cap_bits1 & TC1_INFO) { 758 /* 759 * If the T_CAPABILITY_REQ timed out and then a 760 * T_INFO_REQ gets a protocol error, most likely 761 * the capability was slow (vs. unsupported). Return 762 * ENOSR for this case as a best guess. 763 */ 764 if (error == ETIME) { 765 return ((error = do_tinfo(so)) == EPROTO ? 766 ENOSR : error); 767 } 768 return (do_tinfo(so)); 769 } 770 return (0); 771 } 772 773 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) { 774 PI_PROVLOCK(so->so_provinfo); 775 so->so_provinfo->tpi_capability = PI_YES; 776 PI_PROVUNLOCK(so->so_provinfo); 777 } 778 779 ASSERT(mp); 780 tca = (struct T_capability_ack *)mp->b_rptr; 781 782 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 783 784 cap_bits1 = tca->CAP_bits1; 785 786 if (cap_bits1 & TC1_ACCEPTOR_ID) { 787 so->so_acceptor_id = tca->ACCEPTOR_id; 788 so->so_mode |= SM_ACCEPTOR_ID; 789 } 790 791 if (cap_bits1 & TC1_INFO) 792 copy_tinfo(so, &tca->INFO_ack); 793 794 mutex_exit(&so->so_lock); 795 freemsg(mp); 796 797 if (cap_bits1 & TC1_INFO) 798 return (check_tinfo(so)); 799 800 return (0); 801 } 802 803 /* 804 * Retrieve and clear the socket error. 805 */ 806 int 807 sogeterr(struct sonode *so) 808 { 809 int error; 810 811 ASSERT(MUTEX_HELD(&so->so_lock)); 812 813 error = so->so_error; 814 so->so_error = 0; 815 816 return (error); 817 } 818 819 /* 820 * This routine is registered with the stream head to retrieve read 821 * side errors. 822 * It does not clear the socket error for a peeking read side operation. 823 * It the error is to be cleared it sets *clearerr. 824 */ 825 int 826 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 827 { 828 struct sonode *so = VTOSO(vp); 829 int error; 830 831 mutex_enter(&so->so_lock); 832 if (ispeek) { 833 error = so->so_error; 834 *clearerr = 0; 835 } else { 836 error = so->so_error; 837 so->so_error = 0; 838 *clearerr = 1; 839 } 840 mutex_exit(&so->so_lock); 841 return (error); 842 } 843 844 /* 845 * This routine is registered with the stream head to retrieve write 846 * side errors. 847 * It does not clear the socket error for a peeking read side operation. 848 * It the error is to be cleared it sets *clearerr. 849 */ 850 int 851 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 852 { 853 struct sonode *so = VTOSO(vp); 854 int error; 855 856 mutex_enter(&so->so_lock); 857 if (so->so_state & SS_CANTSENDMORE) { 858 error = EPIPE; 859 *clearerr = 0; 860 } else { 861 error = so->so_error; 862 if (ispeek) { 863 *clearerr = 0; 864 } else { 865 so->so_error = 0; 866 *clearerr = 1; 867 } 868 } 869 mutex_exit(&so->so_lock); 870 return (error); 871 } 872 873 /* 874 * Set a nonpersistent read and write error on the socket. 875 * Used when there is a T_uderror_ind for a connected socket. 876 * The caller also needs to call strsetrerror and strsetwerror 877 * after dropping the lock. 878 */ 879 void 880 soseterror(struct sonode *so, int error) 881 { 882 ASSERT(error != 0); 883 884 ASSERT(MUTEX_HELD(&so->so_lock)); 885 so->so_error = (ushort_t)error; 886 } 887 888 void 889 soisconnecting(struct sonode *so) 890 { 891 ASSERT(MUTEX_HELD(&so->so_lock)); 892 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 893 so->so_state |= SS_ISCONNECTING; 894 cv_broadcast(&so->so_state_cv); 895 } 896 897 void 898 soisconnected(struct sonode *so) 899 { 900 ASSERT(MUTEX_HELD(&so->so_lock)); 901 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 902 so->so_state |= SS_ISCONNECTED; 903 cv_broadcast(&so->so_state_cv); 904 } 905 906 /* 907 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 908 */ 909 void 910 soisdisconnected(struct sonode *so, int error) 911 { 912 ASSERT(MUTEX_HELD(&so->so_lock)); 913 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING| 914 SS_LADDR_VALID|SS_FADDR_VALID); 915 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 916 so->so_error = (ushort_t)error; 917 if (so->so_peercred != NULL) { 918 crfree(so->so_peercred); 919 so->so_peercred = NULL; 920 } 921 cv_broadcast(&so->so_state_cv); 922 } 923 924 /* 925 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 926 * Does not affect write side. 927 * The caller also has to call strsetrerror. 928 */ 929 static void 930 sobreakconn(struct sonode *so, int error) 931 { 932 ASSERT(MUTEX_HELD(&so->so_lock)); 933 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 934 so->so_error = (ushort_t)error; 935 cv_broadcast(&so->so_state_cv); 936 } 937 938 /* 939 * Can no longer send. 940 * Caller must also call strsetwerror. 941 * 942 * We mark the peer address as no longer valid for getpeername, but 943 * leave it around for so_unix_close to notify the peer (that 944 * transport has no addressing held at that layer). 945 */ 946 void 947 socantsendmore(struct sonode *so) 948 { 949 ASSERT(MUTEX_HELD(&so->so_lock)); 950 so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE; 951 cv_broadcast(&so->so_state_cv); 952 } 953 954 /* 955 * The caller must call strseteof(,1) as well as this routine 956 * to change the socket state. 957 */ 958 void 959 socantrcvmore(struct sonode *so) 960 { 961 ASSERT(MUTEX_HELD(&so->so_lock)); 962 so->so_state |= SS_CANTRCVMORE; 963 cv_broadcast(&so->so_state_cv); 964 } 965 966 /* 967 * The caller has sent down a "request_prim" primitive and wants to wait for 968 * an ack ("ack_prim") or an T_ERROR_ACK for it. 969 * The specified "ack_prim" can be a T_OK_ACK. 970 * 971 * Assumes that all the TPI acks are M_PCPROTO messages. 972 * 973 * Note that the socket is single-threaded (using so_lock_single) 974 * for all operations that generate TPI ack messages. Since 975 * only TPI ack messages are M_PCPROTO we should never receive 976 * anything except either the ack we are expecting or a T_ERROR_ACK 977 * for the same primitive. 978 */ 979 int 980 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 981 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 982 { 983 mblk_t *mp; 984 union T_primitives *tpr; 985 int error; 986 987 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 988 so, request_prim, ack_prim, min_size, mpp, wait)); 989 990 ASSERT(MUTEX_HELD(&so->so_lock)); 991 992 error = sowaitack(so, &mp, wait); 993 if (error) 994 return (error); 995 996 dprintso(so, 1, ("got msg %p\n", mp)); 997 if (DB_TYPE(mp) != M_PCPROTO || 998 MBLKL(mp) < sizeof (tpr->type)) { 999 freemsg(mp); 1000 eprintsoline(so, EPROTO); 1001 return (EPROTO); 1002 } 1003 tpr = (union T_primitives *)mp->b_rptr; 1004 /* 1005 * Did we get the primitive that we were asking for? 1006 * For T_OK_ACK we also check that it matches the request primitive. 1007 */ 1008 if (tpr->type == ack_prim && 1009 (ack_prim != T_OK_ACK || 1010 tpr->ok_ack.CORRECT_prim == request_prim)) { 1011 if (MBLKL(mp) >= (ssize_t)min_size) { 1012 /* Found what we are looking for */ 1013 *mpp = mp; 1014 return (0); 1015 } 1016 /* Too short */ 1017 freemsg(mp); 1018 eprintsoline(so, EPROTO); 1019 return (EPROTO); 1020 } 1021 1022 if (tpr->type == T_ERROR_ACK && 1023 tpr->error_ack.ERROR_prim == request_prim) { 1024 /* Error to the primitive we were looking for */ 1025 if (tpr->error_ack.TLI_error == TSYSERR) { 1026 error = tpr->error_ack.UNIX_error; 1027 } else { 1028 error = tlitosyserr(tpr->error_ack.TLI_error); 1029 } 1030 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 1031 tpr->error_ack.ERROR_prim, 1032 tpr->error_ack.TLI_error, 1033 tpr->error_ack.UNIX_error, 1034 error)); 1035 freemsg(mp); 1036 return (error); 1037 } 1038 /* 1039 * Wrong primitive or T_ERROR_ACK for the wrong primitive 1040 */ 1041 #ifdef DEBUG 1042 if (tpr->type == T_ERROR_ACK) { 1043 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 1044 tpr->error_ack.ERROR_prim, 1045 tpr->error_ack.TLI_error, 1046 tpr->error_ack.UNIX_error)); 1047 } else if (tpr->type == T_OK_ACK) { 1048 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 1049 tpr->ok_ack.CORRECT_prim, 1050 ack_prim, request_prim)); 1051 } else { 1052 dprintso(so, 0, 1053 ("unexpected primitive %d, expected %d for %d\n", 1054 tpr->type, ack_prim, request_prim)); 1055 } 1056 #endif /* DEBUG */ 1057 1058 freemsg(mp); 1059 eprintsoline(so, EPROTO); 1060 return (EPROTO); 1061 } 1062 1063 /* 1064 * Wait for a T_OK_ACK for the specified primitive. 1065 */ 1066 int 1067 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1068 { 1069 mblk_t *mp; 1070 int error; 1071 1072 error = sowaitprim(so, request_prim, T_OK_ACK, 1073 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1074 if (error) 1075 return (error); 1076 freemsg(mp); 1077 return (0); 1078 } 1079 1080 /* 1081 * Queue a received TPI ack message on so_ack_mp. 1082 */ 1083 void 1084 soqueueack(struct sonode *so, mblk_t *mp) 1085 { 1086 if (DB_TYPE(mp) != M_PCPROTO) { 1087 zcmn_err(getzoneid(), CE_WARN, 1088 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1089 *(t_scalar_t *)mp->b_rptr); 1090 freemsg(mp); 1091 return; 1092 } 1093 1094 mutex_enter(&so->so_lock); 1095 if (so->so_ack_mp != NULL) { 1096 dprintso(so, 1, ("so_ack_mp already set\n")); 1097 freemsg(so->so_ack_mp); 1098 so->so_ack_mp = NULL; 1099 } 1100 so->so_ack_mp = mp; 1101 cv_broadcast(&so->so_ack_cv); 1102 mutex_exit(&so->so_lock); 1103 } 1104 1105 /* 1106 * Wait for a TPI ack ignoring signals and errors. 1107 */ 1108 int 1109 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1110 { 1111 ASSERT(MUTEX_HELD(&so->so_lock)); 1112 1113 while (so->so_ack_mp == NULL) { 1114 #ifdef SOCK_TEST 1115 if (wait == 0 && sock_test_timelimit != 0) 1116 wait = sock_test_timelimit; 1117 #endif 1118 if (wait != 0) { 1119 /* 1120 * Only wait for the time limit. 1121 */ 1122 clock_t now; 1123 1124 time_to_wait(&now, wait); 1125 if (cv_timedwait(&so->so_ack_cv, &so->so_lock, 1126 now) == -1) { 1127 eprintsoline(so, ETIME); 1128 return (ETIME); 1129 } 1130 } 1131 else 1132 cv_wait(&so->so_ack_cv, &so->so_lock); 1133 } 1134 *mpp = so->so_ack_mp; 1135 #ifdef DEBUG 1136 { 1137 union T_primitives *tpr; 1138 mblk_t *mp = *mpp; 1139 1140 tpr = (union T_primitives *)mp->b_rptr; 1141 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1142 ASSERT(tpr->type == T_OK_ACK || 1143 tpr->type == T_ERROR_ACK || 1144 tpr->type == T_BIND_ACK || 1145 tpr->type == T_CAPABILITY_ACK || 1146 tpr->type == T_INFO_ACK || 1147 tpr->type == T_OPTMGMT_ACK); 1148 } 1149 #endif /* DEBUG */ 1150 so->so_ack_mp = NULL; 1151 return (0); 1152 } 1153 1154 /* 1155 * Queue a received T_CONN_IND message on so_conn_ind_head/tail. 1156 */ 1157 void 1158 soqueueconnind(struct sonode *so, mblk_t *mp) 1159 { 1160 if (DB_TYPE(mp) != M_PROTO) { 1161 zcmn_err(getzoneid(), CE_WARN, 1162 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1163 freemsg(mp); 1164 return; 1165 } 1166 1167 mutex_enter(&so->so_lock); 1168 ASSERT(mp->b_next == NULL); 1169 if (so->so_conn_ind_head == NULL) { 1170 so->so_conn_ind_head = mp; 1171 so->so_state |= SS_HASCONNIND; 1172 } else { 1173 ASSERT(so->so_state & SS_HASCONNIND); 1174 ASSERT(so->so_conn_ind_tail->b_next == NULL); 1175 so->so_conn_ind_tail->b_next = mp; 1176 } 1177 so->so_conn_ind_tail = mp; 1178 /* Wakeup a single consumer of the T_CONN_IND */ 1179 cv_signal(&so->so_connind_cv); 1180 mutex_exit(&so->so_lock); 1181 } 1182 1183 /* 1184 * Wait for a T_CONN_IND. 1185 * Don't wait if nonblocking. 1186 * Accept signals and socket errors. 1187 */ 1188 int 1189 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1190 { 1191 mblk_t *mp; 1192 int error = 0; 1193 1194 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1195 mutex_enter(&so->so_lock); 1196 check_error: 1197 if (so->so_error) { 1198 error = sogeterr(so); 1199 if (error) { 1200 mutex_exit(&so->so_lock); 1201 return (error); 1202 } 1203 } 1204 1205 if (so->so_conn_ind_head == NULL) { 1206 if (fmode & (FNDELAY|FNONBLOCK)) { 1207 error = EWOULDBLOCK; 1208 goto done; 1209 } 1210 if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) { 1211 error = EINTR; 1212 goto done; 1213 } 1214 goto check_error; 1215 } 1216 mp = so->so_conn_ind_head; 1217 so->so_conn_ind_head = mp->b_next; 1218 mp->b_next = NULL; 1219 if (so->so_conn_ind_head == NULL) { 1220 ASSERT(so->so_conn_ind_tail == mp); 1221 so->so_conn_ind_tail = NULL; 1222 so->so_state &= ~SS_HASCONNIND; 1223 } 1224 *mpp = mp; 1225 done: 1226 mutex_exit(&so->so_lock); 1227 return (error); 1228 } 1229 1230 /* 1231 * Flush a T_CONN_IND matching the sequence number from the list. 1232 * Return zero if found; non-zero otherwise. 1233 * This is called very infrequently thus it is ok to do a linear search. 1234 */ 1235 int 1236 soflushconnind(struct sonode *so, t_scalar_t seqno) 1237 { 1238 mblk_t *prevmp, *mp; 1239 struct T_conn_ind *tci; 1240 1241 mutex_enter(&so->so_lock); 1242 for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL; 1243 prevmp = mp, mp = mp->b_next) { 1244 tci = (struct T_conn_ind *)mp->b_rptr; 1245 if (tci->SEQ_number == seqno) { 1246 dprintso(so, 1, 1247 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1248 /* Deleting last? */ 1249 if (so->so_conn_ind_tail == mp) { 1250 so->so_conn_ind_tail = prevmp; 1251 } 1252 if (prevmp == NULL) { 1253 /* Deleting first */ 1254 so->so_conn_ind_head = mp->b_next; 1255 } else { 1256 prevmp->b_next = mp->b_next; 1257 } 1258 mp->b_next = NULL; 1259 if (so->so_conn_ind_head == NULL) { 1260 ASSERT(so->so_conn_ind_tail == NULL); 1261 so->so_state &= ~SS_HASCONNIND; 1262 } else { 1263 ASSERT(so->so_conn_ind_tail != NULL); 1264 } 1265 so->so_error = ECONNABORTED; 1266 mutex_exit(&so->so_lock); 1267 1268 /* 1269 * T_KSSL_PROXY_CONN_IND may carry a handle for 1270 * an SSL context, and needs to be released. 1271 */ 1272 if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) && 1273 (mp->b_cont != NULL)) { 1274 kssl_ctx_t kssl_ctx; 1275 1276 ASSERT(MBLKL(mp->b_cont) == 1277 sizeof (kssl_ctx_t)); 1278 kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr); 1279 kssl_release_ctx(kssl_ctx); 1280 } 1281 freemsg(mp); 1282 return (0); 1283 } 1284 } 1285 mutex_exit(&so->so_lock); 1286 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1287 return (-1); 1288 } 1289 1290 /* 1291 * Wait until the socket is connected or there is an error. 1292 * fmode should contain any nonblocking flags. nosig should be 1293 * set if the caller does not want the wait to be interrupted by a signal. 1294 */ 1295 int 1296 sowaitconnected(struct sonode *so, int fmode, int nosig) 1297 { 1298 int error; 1299 1300 ASSERT(MUTEX_HELD(&so->so_lock)); 1301 1302 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1303 SS_ISCONNECTING && so->so_error == 0) { 1304 1305 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", so)); 1306 if (fmode & (FNDELAY|FNONBLOCK)) 1307 return (EINPROGRESS); 1308 1309 if (nosig) 1310 cv_wait(&so->so_state_cv, &so->so_lock); 1311 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1312 /* 1313 * Return EINTR and let the application use 1314 * nonblocking techniques for detecting when 1315 * the connection has been established. 1316 */ 1317 return (EINTR); 1318 } 1319 dprintso(so, 1, ("awoken on %p\n", so)); 1320 } 1321 1322 if (so->so_error != 0) { 1323 error = sogeterr(so); 1324 ASSERT(error != 0); 1325 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1326 return (error); 1327 } 1328 if (!(so->so_state & SS_ISCONNECTED)) { 1329 /* 1330 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1331 * zero errno. Or another thread could have consumed so_error 1332 * e.g. by calling read. 1333 */ 1334 error = ECONNREFUSED; 1335 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1336 return (error); 1337 } 1338 return (0); 1339 } 1340 1341 1342 /* 1343 * Handle the signal generation aspect of urgent data. 1344 */ 1345 static void 1346 so_oob_sig(struct sonode *so, int extrasig, 1347 strsigset_t *signals, strpollset_t *pollwakeups) 1348 { 1349 ASSERT(MUTEX_HELD(&so->so_lock)); 1350 1351 ASSERT(so_verify_oobstate(so)); 1352 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1353 if (so->so_oobsigcnt > so->so_oobcnt) { 1354 /* 1355 * Signal has already been generated once for this 1356 * urgent "event". However, since TCP can receive updated 1357 * urgent pointers we still generate a signal. 1358 */ 1359 ASSERT(so->so_state & SS_OOBPEND); 1360 if (extrasig) { 1361 *signals |= S_RDBAND; 1362 *pollwakeups |= POLLRDBAND; 1363 } 1364 return; 1365 } 1366 1367 so->so_oobsigcnt++; 1368 ASSERT(so->so_oobsigcnt > 0); /* Wraparound */ 1369 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1370 1371 /* 1372 * Record (for select/poll) that urgent data is pending. 1373 */ 1374 so->so_state |= SS_OOBPEND; 1375 /* 1376 * New urgent data on the way so forget about any old 1377 * urgent data. 1378 */ 1379 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1380 if (so->so_oobmsg != NULL) { 1381 dprintso(so, 1, ("sock: discarding old oob\n")); 1382 freemsg(so->so_oobmsg); 1383 so->so_oobmsg = NULL; 1384 } 1385 *signals |= S_RDBAND; 1386 *pollwakeups |= POLLRDBAND; 1387 ASSERT(so_verify_oobstate(so)); 1388 } 1389 1390 /* 1391 * Handle the processing of the T_EXDATA_IND with urgent data. 1392 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1393 */ 1394 /* ARGSUSED2 */ 1395 static mblk_t * 1396 so_oob_exdata(struct sonode *so, mblk_t *mp, 1397 strsigset_t *signals, strpollset_t *pollwakeups) 1398 { 1399 ASSERT(MUTEX_HELD(&so->so_lock)); 1400 1401 ASSERT(so_verify_oobstate(so)); 1402 1403 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1404 1405 so->so_oobcnt++; 1406 ASSERT(so->so_oobcnt > 0); /* wraparound? */ 1407 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1408 1409 /* 1410 * Set MSGMARK for SIOCATMARK. 1411 */ 1412 mp->b_flag |= MSGMARK; 1413 1414 ASSERT(so_verify_oobstate(so)); 1415 return (mp); 1416 } 1417 1418 /* 1419 * Handle the processing of the actual urgent data. 1420 * Returns the data mblk if it should be queued on the read queue. 1421 */ 1422 static mblk_t * 1423 so_oob_data(struct sonode *so, mblk_t *mp, 1424 strsigset_t *signals, strpollset_t *pollwakeups) 1425 { 1426 ASSERT(MUTEX_HELD(&so->so_lock)); 1427 1428 ASSERT(so_verify_oobstate(so)); 1429 1430 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1431 ASSERT(mp != NULL); 1432 /* 1433 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1434 * Otherwise we store it in so_oobmsg. 1435 */ 1436 ASSERT(so->so_oobmsg == NULL); 1437 if (so->so_options & SO_OOBINLINE) { 1438 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1439 *signals |= S_INPUT | S_RDNORM; 1440 } else { 1441 *pollwakeups |= POLLRDBAND; 1442 so->so_state |= SS_HAVEOOBDATA; 1443 so->so_oobmsg = mp; 1444 mp = NULL; 1445 } 1446 ASSERT(so_verify_oobstate(so)); 1447 return (mp); 1448 } 1449 1450 /* 1451 * Caller must hold the mutex. 1452 * For delayed processing, save the T_DISCON_IND received 1453 * from below on so_discon_ind_mp. 1454 * When the message is processed the framework will call: 1455 * (*func)(so, mp); 1456 */ 1457 static void 1458 so_save_discon_ind(struct sonode *so, 1459 mblk_t *mp, 1460 void (*func)(struct sonode *so, mblk_t *)) 1461 { 1462 ASSERT(MUTEX_HELD(&so->so_lock)); 1463 1464 /* 1465 * Discard new T_DISCON_IND if we have already received another. 1466 * Currently the earlier message can either be on so_discon_ind_mp 1467 * or being processed. 1468 */ 1469 if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1470 zcmn_err(getzoneid(), CE_WARN, 1471 "sockfs: received unexpected additional T_DISCON_IND\n"); 1472 freemsg(mp); 1473 return; 1474 } 1475 mp->b_prev = (mblk_t *)func; 1476 mp->b_next = NULL; 1477 so->so_discon_ind_mp = mp; 1478 } 1479 1480 /* 1481 * Caller must hold the mutex and make sure that either SOLOCKED 1482 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1483 * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp. 1484 * Need to ensure that strsock_proto() will not end up sleeping for 1485 * SOASYNC_UNBIND, while executing this function. 1486 */ 1487 void 1488 so_drain_discon_ind(struct sonode *so) 1489 { 1490 mblk_t *bp; 1491 void (*func)(struct sonode *so, mblk_t *); 1492 1493 ASSERT(MUTEX_HELD(&so->so_lock)); 1494 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1495 1496 /* Process T_DISCON_IND on so_discon_ind_mp */ 1497 if ((bp = so->so_discon_ind_mp) != NULL) { 1498 so->so_discon_ind_mp = NULL; 1499 func = (void (*)())bp->b_prev; 1500 bp->b_prev = NULL; 1501 1502 /* 1503 * This (*func) is supposed to generate a message downstream 1504 * and we need to have a flag set until the corresponding 1505 * upstream message reaches stream head. 1506 * When processing T_DISCON_IND in strsock_discon_ind 1507 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1508 * drop the flag after we get the ACK in strsock_proto. 1509 */ 1510 (void) (*func)(so, bp); 1511 } 1512 } 1513 1514 /* 1515 * Caller must hold the mutex. 1516 * Remove the T_DISCON_IND on so_discon_ind_mp. 1517 */ 1518 void 1519 so_flush_discon_ind(struct sonode *so) 1520 { 1521 mblk_t *bp; 1522 1523 ASSERT(MUTEX_HELD(&so->so_lock)); 1524 1525 /* 1526 * Remove T_DISCON_IND mblk at so_discon_ind_mp. 1527 */ 1528 if ((bp = so->so_discon_ind_mp) != NULL) { 1529 so->so_discon_ind_mp = NULL; 1530 bp->b_prev = NULL; 1531 freemsg(bp); 1532 } 1533 } 1534 1535 /* 1536 * Caller must hold the mutex. 1537 * 1538 * This function is used to process the T_DISCON_IND message. It does 1539 * immediate processing when called from strsock_proto and delayed 1540 * processing of discon_ind saved on so_discon_ind_mp when called from 1541 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1542 * so_discon_ind_mp for delayed processing, this function is registered 1543 * as the callback function to process the message. 1544 * 1545 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1546 * unbind operation, and should be released only after we receive the ACK 1547 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1548 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1549 * sent from either this function or tcp_unbind(), flushing away any TPI 1550 * message that is being sent down and stays in a lower module's queue. 1551 * 1552 * This function drops so_lock and grabs it again. 1553 */ 1554 static void 1555 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1556 { 1557 struct vnode *vp; 1558 struct stdata *stp; 1559 union T_primitives *tpr; 1560 struct T_unbind_req *ubr; 1561 mblk_t *mp; 1562 int error; 1563 1564 ASSERT(MUTEX_HELD(&so->so_lock)); 1565 ASSERT(discon_mp); 1566 ASSERT(discon_mp->b_rptr); 1567 1568 tpr = (union T_primitives *)discon_mp->b_rptr; 1569 ASSERT(tpr->type == T_DISCON_IND); 1570 1571 vp = SOTOV(so); 1572 stp = vp->v_stream; 1573 ASSERT(stp); 1574 1575 /* 1576 * Not a listener 1577 */ 1578 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1579 1580 /* 1581 * This assumes that the name space for DISCON_reason 1582 * is the errno name space. 1583 */ 1584 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1585 1586 /* 1587 * Unbind with the transport without blocking. 1588 * If we've already received a T_DISCON_IND do not unbind. 1589 * 1590 * If there is no preallocated unbind message, we have already 1591 * unbound with the transport 1592 * 1593 * If the socket is not bound, no need to unbind. 1594 */ 1595 mp = so->so_unbind_mp; 1596 if (mp == NULL) { 1597 ASSERT(!(so->so_state & SS_ISBOUND)); 1598 mutex_exit(&so->so_lock); 1599 } else if (!(so->so_state & SS_ISBOUND)) { 1600 mutex_exit(&so->so_lock); 1601 } else { 1602 so->so_unbind_mp = NULL; 1603 1604 /* 1605 * Is another T_DISCON_IND being processed. 1606 */ 1607 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1608 1609 /* 1610 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1611 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1612 * only after we receive the ACK in strsock_proto. 1613 */ 1614 so->so_flag |= SOASYNC_UNBIND; 1615 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1616 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1617 mutex_exit(&so->so_lock); 1618 1619 /* 1620 * Send down T_UNBIND_REQ ignoring flow control. 1621 * XXX Assumes that MSG_IGNFLOW implies that this thread 1622 * does not run service procedures. 1623 */ 1624 ASSERT(DB_TYPE(mp) == M_PROTO); 1625 ubr = (struct T_unbind_req *)mp->b_rptr; 1626 mp->b_wptr += sizeof (*ubr); 1627 ubr->PRIM_type = T_UNBIND_REQ; 1628 1629 /* 1630 * Flush the read and write side (except stream head read queue) 1631 * and send down T_UNBIND_REQ. 1632 */ 1633 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1634 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1635 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1636 /* LINTED - warning: statement has no consequent: if */ 1637 if (error) { 1638 eprintsoline(so, error); 1639 } 1640 } 1641 1642 if (tpr->discon_ind.DISCON_reason != 0) 1643 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1644 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1645 strseteof(SOTOV(so), 1); 1646 /* 1647 * strseteof takes care of read side wakeups, 1648 * pollwakeups, and signals. 1649 */ 1650 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1651 freemsg(discon_mp); 1652 1653 1654 pollwakeup(&stp->sd_pollist, POLLOUT); 1655 mutex_enter(&stp->sd_lock); 1656 1657 /* 1658 * Wake sleeping write 1659 */ 1660 if (stp->sd_flag & WSLEEP) { 1661 stp->sd_flag &= ~WSLEEP; 1662 cv_broadcast(&stp->sd_wrq->q_wait); 1663 } 1664 1665 /* 1666 * strsendsig can handle multiple signals with a 1667 * single call. Send SIGPOLL for S_OUTPUT event. 1668 */ 1669 if (stp->sd_sigflags & S_OUTPUT) 1670 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1671 1672 mutex_exit(&stp->sd_lock); 1673 mutex_enter(&so->so_lock); 1674 } 1675 1676 /* 1677 * This routine is registered with the stream head to receive M_PROTO 1678 * and M_PCPROTO messages. 1679 * 1680 * Returns NULL if the message was consumed. 1681 * Returns an mblk to make that mblk be processed (and queued) by the stream 1682 * head. 1683 * 1684 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1685 * *pollwakeups) for the stream head to take action on. Note that since 1686 * sockets always deliver SIGIO for every new piece of data this routine 1687 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1688 * 1689 * This routine handles all data related TPI messages independent of 1690 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1691 * arrive on a SOCK_STREAM. 1692 */ 1693 static mblk_t * 1694 strsock_proto(vnode_t *vp, mblk_t *mp, 1695 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1696 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1697 { 1698 union T_primitives *tpr; 1699 struct sonode *so; 1700 1701 so = VTOSO(vp); 1702 1703 dprintso(so, 1, ("strsock_proto(%p, %p)\n", vp, mp)); 1704 1705 /* Set default return values */ 1706 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1707 1708 ASSERT(DB_TYPE(mp) == M_PROTO || 1709 DB_TYPE(mp) == M_PCPROTO); 1710 1711 if (MBLKL(mp) < sizeof (tpr->type)) { 1712 /* The message is too short to even contain the primitive */ 1713 zcmn_err(getzoneid(), CE_WARN, 1714 "sockfs: Too short TPI message received. Len = %ld\n", 1715 (ptrdiff_t)(MBLKL(mp))); 1716 freemsg(mp); 1717 return (NULL); 1718 } 1719 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1720 /* The read pointer is not aligned correctly for TPI */ 1721 zcmn_err(getzoneid(), CE_WARN, 1722 "sockfs: Unaligned TPI message received. rptr = %p\n", 1723 (void *)mp->b_rptr); 1724 freemsg(mp); 1725 return (NULL); 1726 } 1727 tpr = (union T_primitives *)mp->b_rptr; 1728 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1729 1730 switch (tpr->type) { 1731 1732 case T_DATA_IND: 1733 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1734 zcmn_err(getzoneid(), CE_WARN, 1735 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1736 (ptrdiff_t)(MBLKL(mp))); 1737 freemsg(mp); 1738 return (NULL); 1739 } 1740 /* 1741 * Ignore zero-length T_DATA_IND messages. These might be 1742 * generated by some transports. 1743 * This is needed to prevent read (which skips the M_PROTO 1744 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1745 * on a non-blocking socket after select/poll has indicated 1746 * that data is available). 1747 */ 1748 if (msgdsize(mp->b_cont) == 0) { 1749 dprintso(so, 0, 1750 ("strsock_proto: zero length T_DATA_IND\n")); 1751 freemsg(mp); 1752 return (NULL); 1753 } 1754 *allmsgsigs = S_INPUT | S_RDNORM; 1755 *pollwakeups = POLLIN | POLLRDNORM; 1756 *wakeups = RSLEEP; 1757 return (mp); 1758 1759 case T_UNITDATA_IND: { 1760 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1761 void *addr; 1762 t_uscalar_t addrlen; 1763 1764 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1765 zcmn_err(getzoneid(), CE_WARN, 1766 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1767 (ptrdiff_t)(MBLKL(mp))); 1768 freemsg(mp); 1769 return (NULL); 1770 } 1771 1772 /* Is this is not a connected datagram socket? */ 1773 if ((so->so_mode & SM_CONNREQUIRED) || 1774 !(so->so_state & SS_ISCONNECTED)) { 1775 /* 1776 * Not a connected datagram socket. Look for 1777 * the SO_UNIX_CLOSE option. If such an option is found 1778 * discard the message (since it has no meaning 1779 * unless connected). 1780 */ 1781 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1782 tudi->OPT_length != 0) { 1783 void *opt; 1784 t_uscalar_t optlen = tudi->OPT_length; 1785 1786 opt = sogetoff(mp, tudi->OPT_offset, 1787 optlen, __TPI_ALIGN_SIZE); 1788 if (opt == NULL) { 1789 /* The len/off falls outside mp */ 1790 freemsg(mp); 1791 mutex_enter(&so->so_lock); 1792 soseterror(so, EPROTO); 1793 mutex_exit(&so->so_lock); 1794 zcmn_err(getzoneid(), CE_WARN, 1795 "sockfs: T_unidata_ind with " 1796 "invalid optlen/offset %u/%d\n", 1797 optlen, tudi->OPT_offset); 1798 return (NULL); 1799 } 1800 if (so_getopt_unix_close(opt, optlen)) { 1801 freemsg(mp); 1802 return (NULL); 1803 } 1804 } 1805 *allmsgsigs = S_INPUT | S_RDNORM; 1806 *pollwakeups = POLLIN | POLLRDNORM; 1807 *wakeups = RSLEEP; 1808 if (audit_active) 1809 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1810 mp, 0); 1811 return (mp); 1812 } 1813 1814 /* 1815 * A connect datagram socket. For AF_INET{,6} we verify that 1816 * the source address matches the "connected to" address. 1817 * The semantics of AF_UNIX sockets is to not verify 1818 * the source address. 1819 * Note that this source address verification is transport 1820 * specific. Thus the real fix would be to extent TPI 1821 * to allow T_CONN_REQ messages to be send to connectionless 1822 * transport providers and always let the transport provider 1823 * do whatever filtering is needed. 1824 * 1825 * The verification/filtering semantics for transports 1826 * other than AF_INET and AF_UNIX are unknown. The choice 1827 * would be to either filter using bcmp or let all messages 1828 * get through. This code does not filter other address 1829 * families since this at least allows the application to 1830 * work around any missing filtering. 1831 * 1832 * XXX Should we move filtering to UDP/ICMP??? 1833 * That would require passing e.g. a T_DISCON_REQ to UDP 1834 * when the socket becomes unconnected. 1835 */ 1836 addrlen = tudi->SRC_length; 1837 /* 1838 * The alignment restriction is really to strict but 1839 * we want enough alignment to inspect the fields of 1840 * a sockaddr_in. 1841 */ 1842 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1843 __TPI_ALIGN_SIZE); 1844 if (addr == NULL) { 1845 freemsg(mp); 1846 mutex_enter(&so->so_lock); 1847 soseterror(so, EPROTO); 1848 mutex_exit(&so->so_lock); 1849 zcmn_err(getzoneid(), CE_WARN, 1850 "sockfs: T_unidata_ind with invalid " 1851 "addrlen/offset %u/%d\n", 1852 addrlen, tudi->SRC_offset); 1853 return (NULL); 1854 } 1855 1856 if (so->so_family == AF_INET) { 1857 /* 1858 * For AF_INET we allow wildcarding both sin_addr 1859 * and sin_port. 1860 */ 1861 struct sockaddr_in *faddr, *sin; 1862 1863 /* Prevent so_faddr_sa from changing while accessed */ 1864 mutex_enter(&so->so_lock); 1865 ASSERT(so->so_faddr_len == 1866 (socklen_t)sizeof (struct sockaddr_in)); 1867 faddr = (struct sockaddr_in *)so->so_faddr_sa; 1868 sin = (struct sockaddr_in *)addr; 1869 if (addrlen != 1870 (t_uscalar_t)sizeof (struct sockaddr_in) || 1871 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1872 faddr->sin_addr.s_addr != INADDR_ANY) || 1873 (so->so_type != SOCK_RAW && 1874 sin->sin_port != faddr->sin_port && 1875 faddr->sin_port != 0)) { 1876 #ifdef DEBUG 1877 dprintso(so, 0, 1878 ("sockfs: T_UNITDATA_IND mismatch: %s", 1879 pr_addr(so->so_family, 1880 (struct sockaddr *)addr, 1881 addrlen))); 1882 dprintso(so, 0, (" - %s\n", 1883 pr_addr(so->so_family, so->so_faddr_sa, 1884 (t_uscalar_t)so->so_faddr_len))); 1885 #endif /* DEBUG */ 1886 mutex_exit(&so->so_lock); 1887 freemsg(mp); 1888 return (NULL); 1889 } 1890 mutex_exit(&so->so_lock); 1891 } else if (so->so_family == AF_INET6) { 1892 /* 1893 * For AF_INET6 we allow wildcarding both sin6_addr 1894 * and sin6_port. 1895 */ 1896 struct sockaddr_in6 *faddr6, *sin6; 1897 static struct in6_addr zeroes; /* inits to all zeros */ 1898 1899 /* Prevent so_faddr_sa from changing while accessed */ 1900 mutex_enter(&so->so_lock); 1901 ASSERT(so->so_faddr_len == 1902 (socklen_t)sizeof (struct sockaddr_in6)); 1903 faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa; 1904 sin6 = (struct sockaddr_in6 *)addr; 1905 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1906 if (addrlen != 1907 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1908 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1909 &faddr6->sin6_addr) && 1910 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1911 (so->so_type != SOCK_RAW && 1912 sin6->sin6_port != faddr6->sin6_port && 1913 faddr6->sin6_port != 0)) { 1914 #ifdef DEBUG 1915 dprintso(so, 0, 1916 ("sockfs: T_UNITDATA_IND mismatch: %s", 1917 pr_addr(so->so_family, 1918 (struct sockaddr *)addr, 1919 addrlen))); 1920 dprintso(so, 0, (" - %s\n", 1921 pr_addr(so->so_family, so->so_faddr_sa, 1922 (t_uscalar_t)so->so_faddr_len))); 1923 #endif /* DEBUG */ 1924 mutex_exit(&so->so_lock); 1925 freemsg(mp); 1926 return (NULL); 1927 } 1928 mutex_exit(&so->so_lock); 1929 } else if (so->so_family == AF_UNIX && 1930 msgdsize(mp->b_cont) == 0 && 1931 tudi->OPT_length != 0) { 1932 /* 1933 * Attempt to extract AF_UNIX 1934 * SO_UNIX_CLOSE indication from options. 1935 */ 1936 void *opt; 1937 t_uscalar_t optlen = tudi->OPT_length; 1938 1939 opt = sogetoff(mp, tudi->OPT_offset, 1940 optlen, __TPI_ALIGN_SIZE); 1941 if (opt == NULL) { 1942 /* The len/off falls outside mp */ 1943 freemsg(mp); 1944 mutex_enter(&so->so_lock); 1945 soseterror(so, EPROTO); 1946 mutex_exit(&so->so_lock); 1947 zcmn_err(getzoneid(), CE_WARN, 1948 "sockfs: T_unidata_ind with invalid " 1949 "optlen/offset %u/%d\n", 1950 optlen, tudi->OPT_offset); 1951 return (NULL); 1952 } 1953 /* 1954 * If we received a unix close indication mark the 1955 * socket and discard this message. 1956 */ 1957 if (so_getopt_unix_close(opt, optlen)) { 1958 mutex_enter(&so->so_lock); 1959 sobreakconn(so, ECONNRESET); 1960 mutex_exit(&so->so_lock); 1961 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1962 freemsg(mp); 1963 *pollwakeups = POLLIN | POLLRDNORM; 1964 *allmsgsigs = S_INPUT | S_RDNORM; 1965 *wakeups = RSLEEP; 1966 return (NULL); 1967 } 1968 } 1969 *allmsgsigs = S_INPUT | S_RDNORM; 1970 *pollwakeups = POLLIN | POLLRDNORM; 1971 *wakeups = RSLEEP; 1972 return (mp); 1973 } 1974 1975 case T_OPTDATA_IND: { 1976 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1977 1978 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1979 zcmn_err(getzoneid(), CE_WARN, 1980 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1981 (ptrdiff_t)(MBLKL(mp))); 1982 freemsg(mp); 1983 return (NULL); 1984 } 1985 /* 1986 * Allow zero-length messages carrying options. 1987 * This is used when carrying the SO_UNIX_CLOSE option. 1988 */ 1989 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 1990 tdi->OPT_length != 0) { 1991 /* 1992 * Attempt to extract AF_UNIX close indication 1993 * from the options. Ignore any other options - 1994 * those are handled once the message is removed 1995 * from the queue. 1996 * The close indication message should not carry data. 1997 */ 1998 void *opt; 1999 t_uscalar_t optlen = tdi->OPT_length; 2000 2001 opt = sogetoff(mp, tdi->OPT_offset, 2002 optlen, __TPI_ALIGN_SIZE); 2003 if (opt == NULL) { 2004 /* The len/off falls outside mp */ 2005 freemsg(mp); 2006 mutex_enter(&so->so_lock); 2007 soseterror(so, EPROTO); 2008 mutex_exit(&so->so_lock); 2009 zcmn_err(getzoneid(), CE_WARN, 2010 "sockfs: T_optdata_ind with invalid " 2011 "optlen/offset %u/%d\n", 2012 optlen, tdi->OPT_offset); 2013 return (NULL); 2014 } 2015 /* 2016 * If we received a close indication mark the 2017 * socket and discard this message. 2018 */ 2019 if (so_getopt_unix_close(opt, optlen)) { 2020 mutex_enter(&so->so_lock); 2021 socantsendmore(so); 2022 mutex_exit(&so->so_lock); 2023 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2024 freemsg(mp); 2025 return (NULL); 2026 } 2027 } 2028 *allmsgsigs = S_INPUT | S_RDNORM; 2029 *pollwakeups = POLLIN | POLLRDNORM; 2030 *wakeups = RSLEEP; 2031 return (mp); 2032 } 2033 2034 case T_EXDATA_IND: { 2035 mblk_t *mctl, *mdata; 2036 mblk_t *lbp; 2037 union T_primitives *tprp; 2038 struct stdata *stp; 2039 queue_t *qp; 2040 2041 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 2042 zcmn_err(getzoneid(), CE_WARN, 2043 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 2044 (ptrdiff_t)(MBLKL(mp))); 2045 freemsg(mp); 2046 return (NULL); 2047 } 2048 /* 2049 * Ignore zero-length T_EXDATA_IND messages. These might be 2050 * generated by some transports. 2051 * 2052 * This is needed to prevent read (which skips the M_PROTO 2053 * part) to unexpectedly return 0 (or return EWOULDBLOCK 2054 * on a non-blocking socket after select/poll has indicated 2055 * that data is available). 2056 */ 2057 dprintso(so, 1, 2058 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 2059 vp, so->so_oobsigcnt, so->so_oobcnt, 2060 pr_state(so->so_state, so->so_mode))); 2061 2062 if (msgdsize(mp->b_cont) == 0) { 2063 dprintso(so, 0, 2064 ("strsock_proto: zero length T_EXDATA_IND\n")); 2065 freemsg(mp); 2066 return (NULL); 2067 } 2068 2069 /* 2070 * Split into the T_EXDATA_IND and the M_DATA part. 2071 * We process these three pieces separately: 2072 * signal generation 2073 * handling T_EXDATA_IND 2074 * handling M_DATA component 2075 */ 2076 mctl = mp; 2077 mdata = mctl->b_cont; 2078 mctl->b_cont = NULL; 2079 mutex_enter(&so->so_lock); 2080 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2081 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2082 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2083 2084 stp = vp->v_stream; 2085 ASSERT(stp != NULL); 2086 qp = _RD(stp->sd_wrq); 2087 2088 mutex_enter(QLOCK(qp)); 2089 lbp = qp->q_last; 2090 2091 /* 2092 * We want to avoid queueing up a string of T_EXDATA_IND 2093 * messages with no intervening data messages at the stream 2094 * head. These messages contribute to the total message 2095 * count. Eventually this can lead to STREAMS flow contol 2096 * and also cause TCP to advertise a zero window condition 2097 * to the peer. This can happen in the degenerate case where 2098 * the sender and receiver exchange only OOB data. The sender 2099 * only sends messages with MSG_OOB flag and the receiver 2100 * receives only MSG_OOB messages and does not use SO_OOBINLINE. 2101 * An example of this scenario has been reported in applications 2102 * that use OOB data to exchange heart beats. Flow control 2103 * relief will never happen if the application only reads OOB 2104 * data which is done directly by sorecvoob() and the 2105 * T_EXDATA_IND messages at the streamhead won't be consumed. 2106 * Note that there is no correctness issue in compressing the 2107 * string of T_EXDATA_IND messages into a single T_EXDATA_IND 2108 * message. A single read that does not specify MSG_OOB will 2109 * read across all the marks in a loop in sotpi_recvmsg(). 2110 * Each mark is individually distinguishable only if the 2111 * T_EXDATA_IND messages are separated by data messages. 2112 */ 2113 if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) { 2114 tprp = (union T_primitives *)lbp->b_rptr; 2115 if ((tprp->type == T_EXDATA_IND) && 2116 !(so->so_options & SO_OOBINLINE)) { 2117 2118 /* 2119 * free the new M_PROTO message 2120 */ 2121 freemsg(mctl); 2122 2123 /* 2124 * adjust the OOB count and OOB signal count 2125 * just incremented for the new OOB data. 2126 */ 2127 so->so_oobcnt--; 2128 so->so_oobsigcnt--; 2129 mutex_exit(QLOCK(qp)); 2130 mutex_exit(&so->so_lock); 2131 return (NULL); 2132 } 2133 } 2134 mutex_exit(QLOCK(qp)); 2135 2136 /* 2137 * Pass the T_EXDATA_IND and the M_DATA back separately 2138 * by using b_next linkage. (The stream head will queue any 2139 * b_next linked messages separately.) This is needed 2140 * since MSGMARK applies to the last by of the message 2141 * hence we can not have any M_DATA component attached 2142 * to the marked T_EXDATA_IND. Note that the stream head 2143 * will not consolidate M_DATA messages onto an MSGMARK'ed 2144 * message in order to preserve the constraint that 2145 * the T_EXDATA_IND always is a separate message. 2146 */ 2147 ASSERT(mctl != NULL); 2148 mctl->b_next = mdata; 2149 mp = mctl; 2150 #ifdef DEBUG 2151 if (mdata == NULL) { 2152 dprintso(so, 1, 2153 ("after outofline T_EXDATA_IND(%p): " 2154 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2155 vp, so->so_oobsigcnt, 2156 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2157 pr_state(so->so_state, so->so_mode))); 2158 } else { 2159 dprintso(so, 1, 2160 ("after inline T_EXDATA_IND(%p): " 2161 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2162 vp, so->so_oobsigcnt, 2163 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2164 pr_state(so->so_state, so->so_mode))); 2165 } 2166 #endif /* DEBUG */ 2167 mutex_exit(&so->so_lock); 2168 *wakeups = RSLEEP; 2169 return (mp); 2170 } 2171 2172 case T_CONN_CON: { 2173 struct T_conn_con *conn_con; 2174 void *addr; 2175 t_uscalar_t addrlen; 2176 2177 /* 2178 * Verify the state, update the state to ISCONNECTED, 2179 * record the potentially new address in the message, 2180 * and drop the message. 2181 */ 2182 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2183 zcmn_err(getzoneid(), CE_WARN, 2184 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2185 (ptrdiff_t)(MBLKL(mp))); 2186 freemsg(mp); 2187 return (NULL); 2188 } 2189 2190 mutex_enter(&so->so_lock); 2191 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2192 SS_ISCONNECTING) { 2193 mutex_exit(&so->so_lock); 2194 dprintso(so, 1, 2195 ("T_CONN_CON: state %x\n", so->so_state)); 2196 freemsg(mp); 2197 return (NULL); 2198 } 2199 2200 conn_con = &tpr->conn_con; 2201 addrlen = conn_con->RES_length; 2202 /* 2203 * Allow the address to be of different size than sent down 2204 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2205 * For AF_UNIX require the identical length. 2206 */ 2207 if (so->so_family == AF_UNIX ? 2208 addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) : 2209 addrlen > (t_uscalar_t)so->so_faddr_maxlen) { 2210 zcmn_err(getzoneid(), CE_WARN, 2211 "sockfs: T_conn_con with different " 2212 "length %u/%d\n", 2213 addrlen, conn_con->RES_length); 2214 soisdisconnected(so, EPROTO); 2215 mutex_exit(&so->so_lock); 2216 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2217 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2218 strseteof(SOTOV(so), 1); 2219 freemsg(mp); 2220 /* 2221 * strseteof takes care of read side wakeups, 2222 * pollwakeups, and signals. 2223 */ 2224 *wakeups = WSLEEP; 2225 *allmsgsigs = S_OUTPUT; 2226 *pollwakeups = POLLOUT; 2227 return (NULL); 2228 } 2229 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2230 if (addr == NULL) { 2231 zcmn_err(getzoneid(), CE_WARN, 2232 "sockfs: T_conn_con with invalid " 2233 "addrlen/offset %u/%d\n", 2234 addrlen, conn_con->RES_offset); 2235 mutex_exit(&so->so_lock); 2236 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2237 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2238 strseteof(SOTOV(so), 1); 2239 freemsg(mp); 2240 /* 2241 * strseteof takes care of read side wakeups, 2242 * pollwakeups, and signals. 2243 */ 2244 *wakeups = WSLEEP; 2245 *allmsgsigs = S_OUTPUT; 2246 *pollwakeups = POLLOUT; 2247 return (NULL); 2248 } 2249 2250 /* 2251 * Save for getpeername. 2252 */ 2253 if (so->so_family != AF_UNIX) { 2254 so->so_faddr_len = (socklen_t)addrlen; 2255 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2256 bcopy(addr, so->so_faddr_sa, addrlen); 2257 so->so_state |= SS_FADDR_VALID; 2258 } 2259 2260 if (so->so_peercred != NULL) 2261 crfree(so->so_peercred); 2262 so->so_peercred = DB_CRED(mp); 2263 so->so_cpid = DB_CPID(mp); 2264 if (so->so_peercred != NULL) 2265 crhold(so->so_peercred); 2266 2267 /* Wakeup anybody sleeping in sowaitconnected */ 2268 soisconnected(so); 2269 mutex_exit(&so->so_lock); 2270 2271 /* 2272 * The socket is now available for sending data. 2273 */ 2274 *wakeups = WSLEEP; 2275 *allmsgsigs = S_OUTPUT; 2276 *pollwakeups = POLLOUT; 2277 freemsg(mp); 2278 return (NULL); 2279 } 2280 2281 /* 2282 * Extra processing in case of an SSL proxy, before queuing or 2283 * forwarding to the fallback endpoint 2284 */ 2285 case T_SSL_PROXY_CONN_IND: 2286 case T_CONN_IND: 2287 /* 2288 * Verify the min size and queue the message on 2289 * the so_conn_ind_head/tail list. 2290 */ 2291 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2292 zcmn_err(getzoneid(), CE_WARN, 2293 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2294 (ptrdiff_t)(MBLKL(mp))); 2295 freemsg(mp); 2296 return (NULL); 2297 } 2298 2299 if (audit_active) 2300 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2301 if (!(so->so_state & SS_ACCEPTCONN)) { 2302 zcmn_err(getzoneid(), CE_WARN, 2303 "sockfs: T_conn_ind on non-listening socket\n"); 2304 freemsg(mp); 2305 return (NULL); 2306 } 2307 2308 if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) { 2309 /* No context: need to fall back */ 2310 struct sonode *fbso; 2311 stdata_t *fbstp; 2312 2313 tpr->type = T_CONN_IND; 2314 2315 fbso = kssl_find_fallback(so->so_kssl_ent); 2316 2317 /* 2318 * No fallback: the remote will timeout and 2319 * disconnect. 2320 */ 2321 if (fbso == NULL) { 2322 freemsg(mp); 2323 return (NULL); 2324 } 2325 fbstp = SOTOV(fbso)->v_stream; 2326 qreply(fbstp->sd_wrq->q_next, mp); 2327 return (NULL); 2328 } 2329 soqueueconnind(so, mp); 2330 *allmsgsigs = S_INPUT | S_RDNORM; 2331 *pollwakeups = POLLIN | POLLRDNORM; 2332 *wakeups = RSLEEP; 2333 return (NULL); 2334 2335 case T_ORDREL_IND: 2336 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2337 zcmn_err(getzoneid(), CE_WARN, 2338 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2339 (ptrdiff_t)(MBLKL(mp))); 2340 freemsg(mp); 2341 return (NULL); 2342 } 2343 2344 /* 2345 * Some providers send this when not fully connected. 2346 * SunLink X.25 needs to retrieve disconnect reason after 2347 * disconnect for compatibility. It uses T_ORDREL_IND 2348 * instead of T_DISCON_IND so that it may use the 2349 * endpoint after a connect failure to retrieve the 2350 * reason using an ioctl. Thus we explicitly clear 2351 * SS_ISCONNECTING here for SunLink X.25. 2352 * This is a needed TPI violation. 2353 */ 2354 mutex_enter(&so->so_lock); 2355 so->so_state &= ~SS_ISCONNECTING; 2356 socantrcvmore(so); 2357 mutex_exit(&so->so_lock); 2358 strseteof(SOTOV(so), 1); 2359 /* 2360 * strseteof takes care of read side wakeups, 2361 * pollwakeups, and signals. 2362 */ 2363 freemsg(mp); 2364 return (NULL); 2365 2366 case T_DISCON_IND: 2367 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2368 zcmn_err(getzoneid(), CE_WARN, 2369 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2370 (ptrdiff_t)(MBLKL(mp))); 2371 freemsg(mp); 2372 return (NULL); 2373 } 2374 if (so->so_state & SS_ACCEPTCONN) { 2375 /* 2376 * This is a listener. Look for a queued T_CONN_IND 2377 * with a matching sequence number and remove it 2378 * from the list. 2379 * It is normal to not find the sequence number since 2380 * the soaccept might have already dequeued it 2381 * (in which case the T_CONN_RES will fail with 2382 * TBADSEQ). 2383 */ 2384 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2385 freemsg(mp); 2386 return (0); 2387 } 2388 2389 /* 2390 * Not a listener 2391 * 2392 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2393 * Such a discon_ind appears when the peer has first done 2394 * a shutdown() followed by a close() in which case we just 2395 * want to record socantsendmore. 2396 * In this case sockfs first receives a T_ORDREL_IND followed 2397 * by a T_DISCON_IND. 2398 * Note that for other transports (e.g. TCP) we need to handle 2399 * the discon_ind in this case since it signals an error. 2400 */ 2401 mutex_enter(&so->so_lock); 2402 if ((so->so_state & SS_CANTRCVMORE) && 2403 (so->so_family == AF_UNIX)) { 2404 socantsendmore(so); 2405 mutex_exit(&so->so_lock); 2406 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2407 dprintso(so, 1, 2408 ("T_DISCON_IND: error %d\n", so->so_error)); 2409 freemsg(mp); 2410 /* 2411 * Set these variables for caller to process them. 2412 * For the else part where T_DISCON_IND is processed, 2413 * this will be done in the function being called 2414 * (strsock_discon_ind()) 2415 */ 2416 *wakeups = WSLEEP; 2417 *allmsgsigs = S_OUTPUT; 2418 *pollwakeups = POLLOUT; 2419 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2420 /* 2421 * Deferred processing of T_DISCON_IND 2422 */ 2423 so_save_discon_ind(so, mp, strsock_discon_ind); 2424 mutex_exit(&so->so_lock); 2425 } else { 2426 /* 2427 * Process T_DISCON_IND now 2428 */ 2429 (void) strsock_discon_ind(so, mp); 2430 mutex_exit(&so->so_lock); 2431 } 2432 return (NULL); 2433 2434 case T_UDERROR_IND: { 2435 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2436 void *addr; 2437 t_uscalar_t addrlen; 2438 int error; 2439 2440 dprintso(so, 0, 2441 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2442 2443 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2444 zcmn_err(getzoneid(), CE_WARN, 2445 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2446 (ptrdiff_t)(MBLKL(mp))); 2447 freemsg(mp); 2448 return (NULL); 2449 } 2450 /* Ignore on connection-oriented transports */ 2451 if (so->so_mode & SM_CONNREQUIRED) { 2452 freemsg(mp); 2453 eprintsoline(so, 0); 2454 zcmn_err(getzoneid(), CE_WARN, 2455 "sockfs: T_uderror_ind on connection-oriented " 2456 "transport\n"); 2457 return (NULL); 2458 } 2459 addrlen = tudi->DEST_length; 2460 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2461 if (addr == NULL) { 2462 zcmn_err(getzoneid(), CE_WARN, 2463 "sockfs: T_uderror_ind with invalid " 2464 "addrlen/offset %u/%d\n", 2465 addrlen, tudi->DEST_offset); 2466 freemsg(mp); 2467 return (NULL); 2468 } 2469 2470 /* Verify source address for connected socket. */ 2471 mutex_enter(&so->so_lock); 2472 if (so->so_state & SS_ISCONNECTED) { 2473 void *faddr; 2474 t_uscalar_t faddr_len; 2475 boolean_t match = B_FALSE; 2476 2477 switch (so->so_family) { 2478 case AF_INET: { 2479 /* Compare just IP address and port */ 2480 struct sockaddr_in *sin1, *sin2; 2481 2482 sin1 = (struct sockaddr_in *)so->so_faddr_sa; 2483 sin2 = (struct sockaddr_in *)addr; 2484 if (addrlen == sizeof (struct sockaddr_in) && 2485 sin1->sin_port == sin2->sin_port && 2486 sin1->sin_addr.s_addr == 2487 sin2->sin_addr.s_addr) 2488 match = B_TRUE; 2489 break; 2490 } 2491 case AF_INET6: { 2492 /* Compare just IP address and port. Not flow */ 2493 struct sockaddr_in6 *sin1, *sin2; 2494 2495 sin1 = (struct sockaddr_in6 *)so->so_faddr_sa; 2496 sin2 = (struct sockaddr_in6 *)addr; 2497 if (addrlen == sizeof (struct sockaddr_in6) && 2498 sin1->sin6_port == sin2->sin6_port && 2499 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2500 &sin2->sin6_addr)) 2501 match = B_TRUE; 2502 break; 2503 } 2504 case AF_UNIX: 2505 faddr = &so->so_ux_faddr; 2506 faddr_len = 2507 (t_uscalar_t)sizeof (so->so_ux_faddr); 2508 if (faddr_len == addrlen && 2509 bcmp(addr, faddr, addrlen) == 0) 2510 match = B_TRUE; 2511 break; 2512 default: 2513 faddr = so->so_faddr_sa; 2514 faddr_len = (t_uscalar_t)so->so_faddr_len; 2515 if (faddr_len == addrlen && 2516 bcmp(addr, faddr, addrlen) == 0) 2517 match = B_TRUE; 2518 break; 2519 } 2520 2521 if (!match) { 2522 #ifdef DEBUG 2523 dprintso(so, 0, 2524 ("sockfs: T_UDERR_IND mismatch: %s - ", 2525 pr_addr(so->so_family, 2526 (struct sockaddr *)addr, 2527 addrlen))); 2528 dprintso(so, 0, ("%s\n", 2529 pr_addr(so->so_family, so->so_faddr_sa, 2530 so->so_faddr_len))); 2531 #endif /* DEBUG */ 2532 mutex_exit(&so->so_lock); 2533 freemsg(mp); 2534 return (NULL); 2535 } 2536 /* 2537 * Make the write error nonpersistent. If the error 2538 * is zero we use ECONNRESET. 2539 * This assumes that the name space for ERROR_type 2540 * is the errno name space. 2541 */ 2542 if (tudi->ERROR_type != 0) 2543 error = tudi->ERROR_type; 2544 else 2545 error = ECONNRESET; 2546 2547 soseterror(so, error); 2548 mutex_exit(&so->so_lock); 2549 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2550 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2551 *wakeups = RSLEEP | WSLEEP; 2552 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2553 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2554 freemsg(mp); 2555 return (NULL); 2556 } 2557 /* 2558 * If the application asked for delayed errors 2559 * record the T_UDERROR_IND so_eaddr_mp and the reason in 2560 * so_delayed_error for delayed error posting. If the reason 2561 * is zero use ECONNRESET. 2562 * Note that delayed error indications do not make sense for 2563 * AF_UNIX sockets since sendto checks that the destination 2564 * address is valid at the time of the sendto. 2565 */ 2566 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2567 mutex_exit(&so->so_lock); 2568 freemsg(mp); 2569 return (NULL); 2570 } 2571 if (so->so_eaddr_mp != NULL) 2572 freemsg(so->so_eaddr_mp); 2573 2574 so->so_eaddr_mp = mp; 2575 if (tudi->ERROR_type != 0) 2576 error = tudi->ERROR_type; 2577 else 2578 error = ECONNRESET; 2579 so->so_delayed_error = (ushort_t)error; 2580 mutex_exit(&so->so_lock); 2581 return (NULL); 2582 } 2583 2584 case T_ERROR_ACK: 2585 dprintso(so, 0, 2586 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2587 tpr->error_ack.ERROR_prim, 2588 tpr->error_ack.TLI_error, 2589 tpr->error_ack.UNIX_error)); 2590 2591 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2592 zcmn_err(getzoneid(), CE_WARN, 2593 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2594 (ptrdiff_t)(MBLKL(mp))); 2595 freemsg(mp); 2596 return (NULL); 2597 } 2598 /* 2599 * Check if we were waiting for the async message 2600 */ 2601 mutex_enter(&so->so_lock); 2602 if ((so->so_flag & SOASYNC_UNBIND) && 2603 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2604 so_unlock_single(so, SOASYNC_UNBIND); 2605 mutex_exit(&so->so_lock); 2606 freemsg(mp); 2607 return (NULL); 2608 } 2609 mutex_exit(&so->so_lock); 2610 soqueueack(so, mp); 2611 return (NULL); 2612 2613 case T_OK_ACK: 2614 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2615 zcmn_err(getzoneid(), CE_WARN, 2616 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2617 (ptrdiff_t)(MBLKL(mp))); 2618 freemsg(mp); 2619 return (NULL); 2620 } 2621 /* 2622 * Check if we were waiting for the async message 2623 */ 2624 mutex_enter(&so->so_lock); 2625 if ((so->so_flag & SOASYNC_UNBIND) && 2626 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2627 dprintso(so, 1, 2628 ("strsock_proto: T_OK_ACK async unbind\n")); 2629 so_unlock_single(so, SOASYNC_UNBIND); 2630 mutex_exit(&so->so_lock); 2631 freemsg(mp); 2632 return (NULL); 2633 } 2634 mutex_exit(&so->so_lock); 2635 soqueueack(so, mp); 2636 return (NULL); 2637 2638 case T_INFO_ACK: 2639 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2640 zcmn_err(getzoneid(), CE_WARN, 2641 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2642 (ptrdiff_t)(MBLKL(mp))); 2643 freemsg(mp); 2644 return (NULL); 2645 } 2646 soqueueack(so, mp); 2647 return (NULL); 2648 2649 case T_CAPABILITY_ACK: 2650 /* 2651 * A T_capability_ack need only be large enough to hold 2652 * the PRIM_type and CAP_bits1 fields; checking for anything 2653 * larger might reject a correct response from an older 2654 * provider. 2655 */ 2656 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2657 zcmn_err(getzoneid(), CE_WARN, 2658 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2659 (ptrdiff_t)(MBLKL(mp))); 2660 freemsg(mp); 2661 return (NULL); 2662 } 2663 soqueueack(so, mp); 2664 return (NULL); 2665 2666 case T_BIND_ACK: 2667 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2668 zcmn_err(getzoneid(), CE_WARN, 2669 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2670 (ptrdiff_t)(MBLKL(mp))); 2671 freemsg(mp); 2672 return (NULL); 2673 } 2674 soqueueack(so, mp); 2675 return (NULL); 2676 2677 case T_OPTMGMT_ACK: 2678 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2679 zcmn_err(getzoneid(), CE_WARN, 2680 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2681 (ptrdiff_t)(MBLKL(mp))); 2682 freemsg(mp); 2683 return (NULL); 2684 } 2685 soqueueack(so, mp); 2686 return (NULL); 2687 default: 2688 #ifdef DEBUG 2689 zcmn_err(getzoneid(), CE_WARN, 2690 "sockfs: unknown TPI primitive %d received\n", 2691 tpr->type); 2692 #endif /* DEBUG */ 2693 freemsg(mp); 2694 return (NULL); 2695 } 2696 } 2697 2698 /* 2699 * This routine is registered with the stream head to receive other 2700 * (non-data, and non-proto) messages. 2701 * 2702 * Returns NULL if the message was consumed. 2703 * Returns an mblk to make that mblk be processed by the stream head. 2704 * 2705 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2706 * *pollwakeups) for the stream head to take action on. 2707 */ 2708 static mblk_t * 2709 strsock_misc(vnode_t *vp, mblk_t *mp, 2710 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2711 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2712 { 2713 struct sonode *so; 2714 2715 so = VTOSO(vp); 2716 2717 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2718 vp, mp, DB_TYPE(mp))); 2719 2720 /* Set default return values */ 2721 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2722 2723 switch (DB_TYPE(mp)) { 2724 case M_PCSIG: 2725 /* 2726 * This assumes that an M_PCSIG for the urgent data arrives 2727 * before the corresponding T_EXDATA_IND. 2728 * 2729 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2730 * awoken before the urgent data shows up. 2731 * For OOBINLINE this can result in select returning 2732 * only exceptions as opposed to except|read. 2733 */ 2734 if (*mp->b_rptr == SIGURG) { 2735 mutex_enter(&so->so_lock); 2736 dprintso(so, 1, 2737 ("SIGURG(%p): counts %d/%d state %s\n", 2738 vp, so->so_oobsigcnt, 2739 so->so_oobcnt, 2740 pr_state(so->so_state, so->so_mode))); 2741 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2742 dprintso(so, 1, 2743 ("after SIGURG(%p): counts %d/%d " 2744 " poll 0x%x sig 0x%x state %s\n", 2745 vp, so->so_oobsigcnt, 2746 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2747 pr_state(so->so_state, so->so_mode))); 2748 mutex_exit(&so->so_lock); 2749 } 2750 freemsg(mp); 2751 return (NULL); 2752 2753 case M_SIG: 2754 case M_HANGUP: 2755 case M_UNHANGUP: 2756 case M_ERROR: 2757 /* M_ERRORs etc are ignored */ 2758 freemsg(mp); 2759 return (NULL); 2760 2761 case M_FLUSH: 2762 /* 2763 * Do not flush read queue. If the M_FLUSH 2764 * arrives because of an impending T_discon_ind 2765 * we still have to keep any queued data - this is part of 2766 * socket semantics. 2767 */ 2768 if (*mp->b_rptr & FLUSHW) { 2769 *mp->b_rptr &= ~FLUSHR; 2770 return (mp); 2771 } 2772 freemsg(mp); 2773 return (NULL); 2774 2775 default: 2776 return (mp); 2777 } 2778 } 2779 2780 2781 /* Register to receive signals for certain events */ 2782 int 2783 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2784 { 2785 struct strsigset ss; 2786 int32_t rval; 2787 2788 /* 2789 * Note that SOLOCKED will be set except for the call from soaccept(). 2790 */ 2791 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2792 ss.ss_pid = pgrp; 2793 ss.ss_events = events; 2794 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2795 &rval)); 2796 } 2797 2798 2799 /* Register for events matching the SS_ASYNC flag */ 2800 int 2801 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2802 { 2803 int events = so->so_state & SS_ASYNC ? 2804 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2805 S_RDBAND | S_BANDURG; 2806 2807 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2808 } 2809 2810 2811 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2812 int 2813 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2814 { 2815 ASSERT(mutex_owned(&so->so_lock)); 2816 if (so->so_pgrp != 0) { 2817 int error; 2818 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2819 S_RDBAND | S_BANDURG : /* New sigs */ 2820 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2821 2822 so_lock_single(so); 2823 mutex_exit(&so->so_lock); 2824 2825 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2826 2827 mutex_enter(&so->so_lock); 2828 so_unlock_single(so, SOLOCKED); 2829 if (error) 2830 return (error); 2831 } 2832 so->so_state ^= SS_ASYNC; 2833 return (0); 2834 } 2835 2836 /* 2837 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2838 * any existing one. If passed zero, just clear the existing one. 2839 */ 2840 int 2841 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2842 { 2843 int events = so->so_state & SS_ASYNC ? 2844 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2845 S_RDBAND | S_BANDURG; 2846 int error; 2847 2848 ASSERT(mutex_owned(&so->so_lock)); 2849 2850 /* 2851 * Change socket process (group). 2852 * 2853 * strioctl (via so_set_asyncsigs) will perform permission check and 2854 * also keep a PID_HOLD to prevent the pid from being reused. 2855 */ 2856 so_lock_single(so); 2857 mutex_exit(&so->so_lock); 2858 2859 if (pgrp != 0) { 2860 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2861 pgrp, events)); 2862 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2863 if (error != 0) { 2864 eprintsoline(so, error); 2865 goto bad; 2866 } 2867 } 2868 /* Remove the previously registered process/group */ 2869 if (so->so_pgrp != 0) { 2870 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2871 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2872 if (error != 0) { 2873 eprintsoline(so, error); 2874 error = 0; 2875 } 2876 } 2877 mutex_enter(&so->so_lock); 2878 so_unlock_single(so, SOLOCKED); 2879 so->so_pgrp = pgrp; 2880 return (0); 2881 bad: 2882 mutex_enter(&so->so_lock); 2883 so_unlock_single(so, SOLOCKED); 2884 return (error); 2885 } 2886 2887 2888 2889 /* 2890 * Translate a TLI(/XTI) error into a system error as best we can. 2891 */ 2892 static const int tli_errs[] = { 2893 0, /* no error */ 2894 EADDRNOTAVAIL, /* TBADADDR */ 2895 ENOPROTOOPT, /* TBADOPT */ 2896 EACCES, /* TACCES */ 2897 EBADF, /* TBADF */ 2898 EADDRNOTAVAIL, /* TNOADDR */ 2899 EPROTO, /* TOUTSTATE */ 2900 ECONNABORTED, /* TBADSEQ */ 2901 0, /* TSYSERR - will never get */ 2902 EPROTO, /* TLOOK - should never be sent by transport */ 2903 EMSGSIZE, /* TBADDATA */ 2904 EMSGSIZE, /* TBUFOVFLW */ 2905 EPROTO, /* TFLOW */ 2906 EWOULDBLOCK, /* TNODATA */ 2907 EPROTO, /* TNODIS */ 2908 EPROTO, /* TNOUDERR */ 2909 EINVAL, /* TBADFLAG */ 2910 EPROTO, /* TNOREL */ 2911 EOPNOTSUPP, /* TNOTSUPPORT */ 2912 EPROTO, /* TSTATECHNG */ 2913 /* following represent error namespace expansion with XTI */ 2914 EPROTO, /* TNOSTRUCTYPE - never sent by transport */ 2915 EPROTO, /* TBADNAME - never sent by transport */ 2916 EPROTO, /* TBADQLEN - never sent by transport */ 2917 EADDRINUSE, /* TADDRBUSY */ 2918 EBADF, /* TINDOUT */ 2919 EBADF, /* TPROVMISMATCH */ 2920 EBADF, /* TRESQLEN */ 2921 EBADF, /* TRESADDR */ 2922 EPROTO, /* TQFULL - never sent by transport */ 2923 EPROTO, /* TPROTO */ 2924 }; 2925 2926 static int 2927 tlitosyserr(int terr) 2928 { 2929 ASSERT(terr != TSYSERR); 2930 if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0]))) 2931 return (EPROTO); 2932 else 2933 return (tli_errs[terr]); 2934 } 2935 2936 /* 2937 * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable 2938 * transport driver/module with an mblk_t chain. 2939 * 2940 * Note, we in-line putq() for the fast-path cases of q is empty, q_last and 2941 * bp are of type M_DATA. All other cases we call putq(). 2942 * 2943 * On success a zero will be return, else an errno will be returned. 2944 */ 2945 int 2946 sodput(sodirect_t *sodp, mblk_t *bp) 2947 { 2948 queue_t *q = sodp->sod_q; 2949 struct stdata *stp = (struct stdata *)q->q_ptr; 2950 mblk_t *nbp; 2951 int ret; 2952 mblk_t *last = q->q_last; 2953 int bytecnt = 0; 2954 int mblkcnt = 0; 2955 2956 2957 ASSERT(MUTEX_HELD(sodp->sod_lock)); 2958 2959 if (stp->sd_flag == STREOF) { 2960 ret = 0; 2961 goto error; 2962 } 2963 2964 if (q->q_first == NULL) { 2965 /* Q empty, really fast fast-path */ 2966 bp->b_prev = NULL; 2967 bp->b_next = NULL; 2968 q->q_first = bp; 2969 q->q_last = bp; 2970 2971 } else if (last->b_datap->db_type == M_DATA && 2972 bp->b_datap->db_type == M_DATA) { 2973 /* 2974 * Last mblk_t chain and bp are both type M_DATA so 2975 * in-line putq() here, if the DBLK_UIOA state match 2976 * add bp to the end of the current last chain, else 2977 * start a new last chain with bp. 2978 */ 2979 if ((last->b_datap->db_flags & DBLK_UIOA) == 2980 (bp->b_datap->db_flags & DBLK_UIOA)) { 2981 /* Added to end */ 2982 while ((nbp = last->b_cont) != NULL) 2983 last = nbp; 2984 last->b_cont = bp; 2985 } else { 2986 /* New last */ 2987 last->b_next = bp; 2988 bp->b_next = NULL; 2989 bp->b_prev = last; 2990 q->q_last = bp; 2991 } 2992 } else { 2993 /* 2994 * Can't use q_last so just call putq(). 2995 */ 2996 (void) putq(q, bp); 2997 return (0); 2998 } 2999 3000 /* Count bytes and mblk_t's */ 3001 do { 3002 bytecnt += MBLKL(bp); 3003 mblkcnt++; 3004 } while ((bp = bp->b_cont) != NULL); 3005 q->q_count += bytecnt; 3006 q->q_mblkcnt += mblkcnt; 3007 3008 /* Check for QFULL */ 3009 if (q->q_count >= q->q_hiwat + sodp->sod_want || 3010 q->q_mblkcnt >= q->q_hiwat) { 3011 q->q_flag |= QFULL; 3012 } 3013 3014 return (0); 3015 3016 error: 3017 do { 3018 if ((nbp = bp->b_next) != NULL) 3019 bp->b_next = NULL; 3020 freemsg(bp); 3021 } while ((bp = nbp) != NULL); 3022 3023 return (ret); 3024 } 3025 3026 /* 3027 * Sockfs sodirect read wakeup. Called from a sodirect enabled transport 3028 * driver/module to indicate that read-side data is available. 3029 * 3030 * On return the sodirect_t.lock mutex will be exited so this must be the 3031 * last sodirect_t call to guarantee atomic access of *sodp. 3032 */ 3033 void 3034 sodwakeup(sodirect_t *sodp) 3035 { 3036 queue_t *q = sodp->sod_q; 3037 struct stdata *stp = (struct stdata *)q->q_ptr; 3038 3039 ASSERT(MUTEX_HELD(sodp->sod_lock)); 3040 3041 if (stp->sd_flag & RSLEEP) { 3042 stp->sd_flag &= ~RSLEEP; 3043 cv_broadcast(&q->q_wait); 3044 } 3045 3046 if (stp->sd_rput_opt & SR_POLLIN) { 3047 stp->sd_rput_opt &= ~SR_POLLIN; 3048 mutex_exit(sodp->sod_lock); 3049 pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM); 3050 } else 3051 mutex_exit(sodp->sod_lock); 3052 } 3053