1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/inttypes.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/buf.h> 35 #include <sys/conf.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/sysmacros.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/user.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/esunddi.h> 49 #include <sys/flock.h> 50 #include <sys/modctl.h> 51 #include <sys/vtrace.h> 52 #include <sys/strsun.h> 53 #include <sys/cmn_err.h> 54 #include <sys/proc.h> 55 #include <sys/ddi.h> 56 #include <sys/kmem_impl.h> 57 58 #include <sys/suntpi.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/socketvar.h> 62 #include <netinet/in.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 68 #include <inet/kssl/ksslapi.h> 69 70 #include <c2/audit.h> 71 72 #include <sys/dcopy.h> 73 74 int so_default_version = SOV_SOCKSTREAM; 75 76 #ifdef DEBUG 77 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 78 int sockdebug = 0; 79 80 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 81 int sockprinterr = 0; 82 83 /* 84 * Set so_default_options to SO_DEBUG is all sockets should be created 85 * with SO_DEBUG set. This is needed to get debug printouts from the 86 * socket() call itself. 87 */ 88 int so_default_options = 0; 89 #endif /* DEBUG */ 90 91 #ifdef SOCK_TEST 92 /* 93 * Set to number of ticks to limit cv_waits for code coverage testing. 94 * Set to 1000 when SO_DEBUG is set to 2. 95 */ 96 clock_t sock_test_timelimit = 0; 97 #endif /* SOCK_TEST */ 98 99 /* 100 * For concurrency testing of e.g. opening /dev/ip which does not 101 * handle T_INFO_REQ messages. 102 */ 103 int so_no_tinfo = 0; 104 105 /* 106 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 107 * to simply ignore the T_CAPABILITY_REQ. 108 */ 109 clock_t sock_capability_timeout = 2; /* seconds */ 110 111 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 112 static void so_removehooks(struct sonode *so); 113 114 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 115 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 116 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 117 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 118 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 119 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 120 121 static int tlitosyserr(int terr); 122 123 /* 124 * Sodirect kmem_cache and put/wakeup functions. 125 */ 126 struct kmem_cache *socktpi_sod_cache; 127 static int sodput(sodirect_t *, mblk_t *); 128 static void sodwakeup(sodirect_t *); 129 130 /* 131 * Called by sockinit() when sockfs is loaded. 132 */ 133 int 134 sostr_init() 135 { 136 /* Allocate sodirect_t kmem_cache */ 137 socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache", 138 sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 139 140 return (0); 141 } 142 143 /* 144 * Convert a socket to a stream. Invoked when the illusory sockmod 145 * is popped from the stream. 146 * Change the stream head back to default operation without losing 147 * any messages (T_conn_ind's are moved to the stream head queue). 148 */ 149 int 150 so_sock2stream(struct sonode *so) 151 { 152 struct vnode *vp = SOTOV(so); 153 queue_t *rq; 154 mblk_t *mp; 155 int error = 0; 156 157 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 158 159 mutex_enter(&so->so_lock); 160 so_lock_single(so); 161 162 ASSERT(so->so_version != SOV_STREAM); 163 164 if (so->so_state & SS_DIRECT) { 165 mblk_t **mpp; 166 int rval; 167 168 /* 169 * Tell the transport below that sockmod is being popped 170 */ 171 mutex_exit(&so->so_lock); 172 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(), 173 &rval); 174 mutex_enter(&so->so_lock); 175 if (error != 0) { 176 dprintso(so, 0, ("so_sock2stream(%p): " 177 "_SIOCSOCKFALLBACK failed\n", so)); 178 goto exit; 179 } 180 so->so_state &= ~SS_DIRECT; 181 182 for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL; 183 mpp = &mp->b_next) { 184 struct T_conn_ind *conn_ind; 185 186 /* 187 * strsock_proto() has already verified the length of 188 * this message block. 189 */ 190 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 191 192 conn_ind = (struct T_conn_ind *)mp->b_rptr; 193 if (conn_ind->OPT_length == 0 && 194 conn_ind->OPT_offset == 0) 195 continue; 196 197 if (DB_REF(mp) > 1) { 198 mblk_t *newmp; 199 size_t length; 200 cred_t *cr; 201 202 /* 203 * Copy the message block because it is used 204 * elsewhere, too. 205 */ 206 length = MBLKL(mp); 207 newmp = soallocproto(length, _ALLOC_INTR); 208 if (newmp == NULL) { 209 error = EINTR; 210 goto exit; 211 } 212 bcopy(mp->b_rptr, newmp->b_wptr, length); 213 newmp->b_wptr += length; 214 newmp->b_next = mp->b_next; 215 cr = DB_CRED(mp); 216 if (cr != NULL) 217 mblk_setcred(newmp, cr); 218 DB_CPID(newmp) = DB_CPID(mp); 219 220 /* 221 * Link the new message block into the queue 222 * and free the old one. 223 */ 224 *mpp = newmp; 225 mp->b_next = NULL; 226 freemsg(mp); 227 228 mp = newmp; 229 conn_ind = (struct T_conn_ind *)mp->b_rptr; 230 } 231 232 /* 233 * Remove options added by TCP for accept fast-path. 234 */ 235 conn_ind->OPT_length = 0; 236 conn_ind->OPT_offset = 0; 237 } 238 } 239 240 so->so_version = SOV_STREAM; 241 so->so_priv = NULL; 242 243 /* 244 * Remove the hooks in the stream head to avoid queuing more 245 * packets in sockfs. 246 */ 247 mutex_exit(&so->so_lock); 248 so_removehooks(so); 249 mutex_enter(&so->so_lock); 250 251 /* 252 * Clear any state related to urgent data. Leave any T_EXDATA_IND 253 * on the queue - the behavior of urgent data after a switch is 254 * left undefined. 255 */ 256 so->so_error = so->so_delayed_error = 0; 257 freemsg(so->so_oobmsg); 258 so->so_oobmsg = NULL; 259 so->so_oobsigcnt = so->so_oobcnt = 0; 260 261 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 262 SS_HASCONNIND|SS_SAVEDEOR); 263 ASSERT(so_verify_oobstate(so)); 264 265 freemsg(so->so_ack_mp); 266 so->so_ack_mp = NULL; 267 268 /* 269 * Flush the T_DISCON_IND on so_discon_ind_mp. 270 */ 271 so_flush_discon_ind(so); 272 273 /* 274 * Move any queued T_CONN_IND messages to stream head queue. 275 */ 276 rq = RD(strvp2wq(vp)); 277 while ((mp = so->so_conn_ind_head) != NULL) { 278 so->so_conn_ind_head = mp->b_next; 279 mp->b_next = NULL; 280 if (so->so_conn_ind_head == NULL) { 281 ASSERT(so->so_conn_ind_tail == mp); 282 so->so_conn_ind_tail = NULL; 283 } 284 dprintso(so, 0, 285 ("so_sock2stream(%p): moving T_CONN_IND\n", 286 so)); 287 288 /* Drop lock across put() */ 289 mutex_exit(&so->so_lock); 290 put(rq, mp); 291 mutex_enter(&so->so_lock); 292 } 293 294 exit: 295 ASSERT(MUTEX_HELD(&so->so_lock)); 296 so_unlock_single(so, SOLOCKED); 297 mutex_exit(&so->so_lock); 298 return (error); 299 } 300 301 /* 302 * Covert a stream back to a socket. This is invoked when the illusory 303 * sockmod is pushed on a stream (where the stream was "created" by 304 * popping the illusory sockmod). 305 * This routine can not recreate the socket state (certain aspects of 306 * it like urgent data state and the bound/connected addresses for AF_UNIX 307 * sockets can not be recreated by asking the transport for information). 308 * Thus this routine implicitly assumes that the socket is in an initial 309 * state (as if it was just created). It flushes any messages queued on the 310 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 311 */ 312 void 313 so_stream2sock(struct sonode *so) 314 { 315 struct vnode *vp = SOTOV(so); 316 317 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 318 319 mutex_enter(&so->so_lock); 320 so_lock_single(so); 321 ASSERT(so->so_version == SOV_STREAM); 322 so->so_version = SOV_SOCKSTREAM; 323 so->so_pushcnt = 0; 324 mutex_exit(&so->so_lock); 325 326 /* 327 * Set a permenent error to force any thread in sorecvmsg to 328 * return (and drop SOREADLOCKED). Clear the error once 329 * we have SOREADLOCKED. 330 * This makes a read sleeping during the I_PUSH of sockmod return 331 * EIO. 332 */ 333 strsetrerror(SOTOV(so), EIO, 1, NULL); 334 335 /* 336 * Get the read lock before flushing data to avoid 337 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 338 */ 339 mutex_enter(&so->so_lock); 340 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 341 mutex_exit(&so->so_lock); 342 343 strsetrerror(SOTOV(so), 0, 0, NULL); 344 so_installhooks(so); 345 346 /* 347 * Flush everything on the read queue. 348 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 349 * remain; those types of messages would confuse sockfs. 350 */ 351 strflushrq(vp, FLUSHALL); 352 mutex_enter(&so->so_lock); 353 354 /* 355 * Flush the T_DISCON_IND on so_discon_ind_mp. 356 */ 357 so_flush_discon_ind(so); 358 so_unlock_read(so); /* Clear SOREADLOCKED */ 359 360 so_unlock_single(so, SOLOCKED); 361 mutex_exit(&so->so_lock); 362 } 363 364 /* 365 * Install the hooks in the stream head. 366 */ 367 void 368 so_installhooks(struct sonode *so) 369 { 370 struct vnode *vp = SOTOV(so); 371 372 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 373 strsock_proto, strsock_misc); 374 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 375 } 376 377 /* 378 * Remove the hooks in the stream head. 379 */ 380 static void 381 so_removehooks(struct sonode *so) 382 { 383 struct vnode *vp = SOTOV(so); 384 385 strsetrputhooks(vp, 0, NULL, NULL); 386 strsetwputhooks(vp, 0, STRTIMOUT); 387 /* 388 * Leave read behavior as it would have been for a normal 389 * stream i.e. a read of an M_PROTO will fail. 390 */ 391 } 392 393 /* 394 * Initialize the streams side of a socket including 395 * T_info_req/ack processing. If tso is not NULL its values are used thereby 396 * avoiding the T_INFO_REQ. 397 */ 398 int 399 so_strinit(struct sonode *so, struct sonode *tso) 400 { 401 struct vnode *vp = SOTOV(so); 402 struct stdata *stp; 403 mblk_t *mp; 404 int error; 405 406 dprintso(so, 1, ("so_strinit(%p)\n", so)); 407 408 /* Preallocate an unbind_req message */ 409 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 410 mutex_enter(&so->so_lock); 411 so->so_unbind_mp = mp; 412 #ifdef DEBUG 413 so->so_options = so_default_options; 414 #endif /* DEBUG */ 415 mutex_exit(&so->so_lock); 416 417 so_installhooks(so); 418 419 /* 420 * The T_CAPABILITY_REQ should be the first message sent down because 421 * at least TCP has a fast-path for this which avoids timeouts while 422 * waiting for the T_CAPABILITY_ACK under high system load. 423 */ 424 if (tso == NULL) { 425 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 426 if (error) 427 return (error); 428 } else { 429 mutex_enter(&so->so_lock); 430 so->so_tsdu_size = tso->so_tsdu_size; 431 so->so_etsdu_size = tso->so_etsdu_size; 432 so->so_addr_size = tso->so_addr_size; 433 so->so_opt_size = tso->so_opt_size; 434 so->so_tidu_size = tso->so_tidu_size; 435 so->so_serv_type = tso->so_serv_type; 436 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 437 mutex_exit(&so->so_lock); 438 439 /* the following do_tcapability may update so->so_mode */ 440 if ((tso->so_serv_type != T_CLTS) && 441 !(tso->so_state & SS_DIRECT)) { 442 error = do_tcapability(so, TC1_ACCEPTOR_ID); 443 if (error) 444 return (error); 445 } 446 } 447 /* 448 * If the addr_size is 0 we treat it as already bound 449 * and connected. This is used by the routing socket. 450 * We set the addr_size to something to allocate a the address 451 * structures. 452 */ 453 if (so->so_addr_size == 0) { 454 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 455 /* Address size can vary with address families. */ 456 if (so->so_family == AF_INET6) 457 so->so_addr_size = 458 (t_scalar_t)sizeof (struct sockaddr_in6); 459 else 460 so->so_addr_size = 461 (t_scalar_t)sizeof (struct sockaddr_in); 462 ASSERT(so->so_unbind_mp); 463 } 464 /* 465 * Allocate the addresses. 466 */ 467 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 468 ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0); 469 so->so_laddr_maxlen = so->so_faddr_maxlen = 470 P2ROUNDUP(so->so_addr_size, KMEM_ALIGN); 471 so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP); 472 so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa 473 + so->so_laddr_maxlen); 474 475 if (so->so_family == AF_UNIX) { 476 /* 477 * Initialize AF_UNIX related fields. 478 */ 479 bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr)); 480 bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr)); 481 } 482 483 stp = vp->v_stream; 484 /* 485 * Have to keep minpsz at zero in order to allow write/send of zero 486 * bytes. 487 */ 488 mutex_enter(&stp->sd_lock); 489 if (stp->sd_qn_minpsz == 1) 490 stp->sd_qn_minpsz = 0; 491 mutex_exit(&stp->sd_lock); 492 493 /* 494 * If sodirect capable allocate and initialize sodirect_t. 495 * Note, SS_SODIRECT is set in socktpi_open(). 496 */ 497 if (so->so_state & SS_SODIRECT) { 498 sodirect_t *sodp; 499 500 ASSERT(so->so_direct == NULL); 501 502 sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP); 503 sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT; 504 sodp->sod_want = 0; 505 sodp->sod_q = RD(stp->sd_wrq); 506 sodp->sod_enqueue = sodput; 507 sodp->sod_wakeup = sodwakeup; 508 sodp->sod_uioafh = NULL; 509 sodp->sod_uioaft = NULL; 510 sodp->sod_lock = &stp->sd_lock; 511 /* 512 * Remainder of the sod_uioa members are left uninitialized 513 * but will be initialized later by uioainit() before uioa 514 * is enabled. 515 */ 516 sodp->sod_uioa.uioa_state = UIOA_ALLOC; 517 so->so_direct = sodp; 518 stp->sd_sodirect = sodp; 519 } 520 521 return (0); 522 } 523 524 static void 525 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 526 { 527 so->so_tsdu_size = tia->TSDU_size; 528 so->so_etsdu_size = tia->ETSDU_size; 529 so->so_addr_size = tia->ADDR_size; 530 so->so_opt_size = tia->OPT_size; 531 so->so_tidu_size = tia->TIDU_size; 532 so->so_serv_type = tia->SERV_type; 533 switch (tia->CURRENT_state) { 534 case TS_UNBND: 535 break; 536 case TS_IDLE: 537 so->so_state |= SS_ISBOUND; 538 so->so_laddr_len = 0; 539 so->so_state &= ~SS_LADDR_VALID; 540 break; 541 case TS_DATA_XFER: 542 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 543 so->so_laddr_len = 0; 544 so->so_faddr_len = 0; 545 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 546 break; 547 } 548 549 /* 550 * Heuristics for determining the socket mode flags 551 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 552 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 553 * from the info ack. 554 */ 555 if (so->so_serv_type == T_CLTS) { 556 so->so_mode |= SM_ATOMIC | SM_ADDR; 557 } else { 558 so->so_mode |= SM_CONNREQUIRED; 559 if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2) 560 so->so_mode |= SM_EXDATA; 561 } 562 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 563 /* Semantics are to discard tail end of messages */ 564 so->so_mode |= SM_ATOMIC; 565 } 566 if (so->so_family == AF_UNIX) { 567 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 568 if (so->so_addr_size == -1) { 569 /* MAXPATHLEN + soun_family + nul termination */ 570 so->so_addr_size = (t_scalar_t)(MAXPATHLEN + 571 sizeof (short) + 1); 572 } 573 if (so->so_type == SOCK_STREAM) { 574 /* 575 * Make it into a byte-stream transport. 576 * SOCK_SEQPACKET sockets are unchanged. 577 */ 578 so->so_tsdu_size = 0; 579 } 580 } else if (so->so_addr_size == -1) { 581 /* 582 * Logic extracted from sockmod - have to pick some max address 583 * length in order to preallocate the addresses. 584 */ 585 so->so_addr_size = SOA_DEFSIZE; 586 } 587 if (so->so_tsdu_size == 0) 588 so->so_mode |= SM_BYTESTREAM; 589 } 590 591 static int 592 check_tinfo(struct sonode *so) 593 { 594 /* Consistency checks */ 595 if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) { 596 eprintso(so, ("service type and socket type mismatch\n")); 597 eprintsoline(so, EPROTO); 598 return (EPROTO); 599 } 600 if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) { 601 eprintso(so, ("service type and socket type mismatch\n")); 602 eprintsoline(so, EPROTO); 603 return (EPROTO); 604 } 605 if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) { 606 eprintso(so, ("service type and socket type mismatch\n")); 607 eprintsoline(so, EPROTO); 608 return (EPROTO); 609 } 610 if (so->so_family == AF_INET && 611 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 612 eprintso(so, 613 ("AF_INET must have sockaddr_in address length. Got %d\n", 614 so->so_addr_size)); 615 eprintsoline(so, EMSGSIZE); 616 return (EMSGSIZE); 617 } 618 if (so->so_family == AF_INET6 && 619 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 620 eprintso(so, 621 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 622 so->so_addr_size)); 623 eprintsoline(so, EMSGSIZE); 624 return (EMSGSIZE); 625 } 626 627 dprintso(so, 1, ( 628 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 629 so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size, 630 so->so_addr_size, so->so_opt_size, 631 so->so_tidu_size)); 632 dprintso(so, 1, ("tinfo: so_state %s\n", 633 pr_state(so->so_state, so->so_mode))); 634 return (0); 635 } 636 637 /* 638 * Send down T_info_req and wait for the ack. 639 * Record interesting T_info_ack values in the sonode. 640 */ 641 static int 642 do_tinfo(struct sonode *so) 643 { 644 struct T_info_req tir; 645 mblk_t *mp; 646 int error; 647 648 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 649 650 if (so_no_tinfo) { 651 so->so_addr_size = 0; 652 return (0); 653 } 654 655 dprintso(so, 1, ("do_tinfo(%p)\n", so)); 656 657 /* Send T_INFO_REQ */ 658 tir.PRIM_type = T_INFO_REQ; 659 mp = soallocproto1(&tir, sizeof (tir), 660 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 661 _ALLOC_INTR); 662 if (mp == NULL) { 663 eprintsoline(so, ENOBUFS); 664 return (ENOBUFS); 665 } 666 /* T_INFO_REQ has to be M_PCPROTO */ 667 DB_TYPE(mp) = M_PCPROTO; 668 669 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 670 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 671 if (error) { 672 eprintsoline(so, error); 673 return (error); 674 } 675 mutex_enter(&so->so_lock); 676 /* Wait for T_INFO_ACK */ 677 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 678 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 679 mutex_exit(&so->so_lock); 680 eprintsoline(so, error); 681 return (error); 682 } 683 684 ASSERT(mp); 685 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 686 mutex_exit(&so->so_lock); 687 freemsg(mp); 688 return (check_tinfo(so)); 689 } 690 691 /* 692 * Send down T_capability_req and wait for the ack. 693 * Record interesting T_capability_ack values in the sonode. 694 */ 695 static int 696 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 697 { 698 struct T_capability_req tcr; 699 struct T_capability_ack *tca; 700 mblk_t *mp; 701 int error; 702 703 ASSERT(cap_bits1 != 0); 704 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 705 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 706 707 if (so->so_provinfo->tpi_capability == PI_NO) 708 return (do_tinfo(so)); 709 710 if (so_no_tinfo) { 711 so->so_addr_size = 0; 712 if ((cap_bits1 &= ~TC1_INFO) == 0) 713 return (0); 714 } 715 716 dprintso(so, 1, ("do_tcapability(%p)\n", so)); 717 718 /* Send T_CAPABILITY_REQ */ 719 tcr.PRIM_type = T_CAPABILITY_REQ; 720 tcr.CAP_bits1 = cap_bits1; 721 mp = soallocproto1(&tcr, sizeof (tcr), 722 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 723 _ALLOC_INTR); 724 if (mp == NULL) { 725 eprintsoline(so, ENOBUFS); 726 return (ENOBUFS); 727 } 728 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 729 DB_TYPE(mp) = M_PCPROTO; 730 731 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 732 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 733 if (error) { 734 eprintsoline(so, error); 735 return (error); 736 } 737 mutex_enter(&so->so_lock); 738 /* Wait for T_CAPABILITY_ACK */ 739 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 740 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 741 mutex_exit(&so->so_lock); 742 PI_PROVLOCK(so->so_provinfo); 743 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) 744 so->so_provinfo->tpi_capability = PI_NO; 745 PI_PROVUNLOCK(so->so_provinfo); 746 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 747 if (cap_bits1 & TC1_INFO) { 748 /* 749 * If the T_CAPABILITY_REQ timed out and then a 750 * T_INFO_REQ gets a protocol error, most likely 751 * the capability was slow (vs. unsupported). Return 752 * ENOSR for this case as a best guess. 753 */ 754 if (error == ETIME) { 755 return ((error = do_tinfo(so)) == EPROTO ? 756 ENOSR : error); 757 } 758 return (do_tinfo(so)); 759 } 760 return (0); 761 } 762 763 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) { 764 PI_PROVLOCK(so->so_provinfo); 765 so->so_provinfo->tpi_capability = PI_YES; 766 PI_PROVUNLOCK(so->so_provinfo); 767 } 768 769 ASSERT(mp); 770 tca = (struct T_capability_ack *)mp->b_rptr; 771 772 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 773 774 cap_bits1 = tca->CAP_bits1; 775 776 if (cap_bits1 & TC1_ACCEPTOR_ID) { 777 so->so_acceptor_id = tca->ACCEPTOR_id; 778 so->so_mode |= SM_ACCEPTOR_ID; 779 } 780 781 if (cap_bits1 & TC1_INFO) 782 copy_tinfo(so, &tca->INFO_ack); 783 784 mutex_exit(&so->so_lock); 785 freemsg(mp); 786 787 if (cap_bits1 & TC1_INFO) 788 return (check_tinfo(so)); 789 790 return (0); 791 } 792 793 /* 794 * Retrieve and clear the socket error. 795 */ 796 int 797 sogeterr(struct sonode *so) 798 { 799 int error; 800 801 ASSERT(MUTEX_HELD(&so->so_lock)); 802 803 error = so->so_error; 804 so->so_error = 0; 805 806 return (error); 807 } 808 809 /* 810 * This routine is registered with the stream head to retrieve read 811 * side errors. 812 * It does not clear the socket error for a peeking read side operation. 813 * It the error is to be cleared it sets *clearerr. 814 */ 815 int 816 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 817 { 818 struct sonode *so = VTOSO(vp); 819 int error; 820 821 mutex_enter(&so->so_lock); 822 if (ispeek) { 823 error = so->so_error; 824 *clearerr = 0; 825 } else { 826 error = so->so_error; 827 so->so_error = 0; 828 *clearerr = 1; 829 } 830 mutex_exit(&so->so_lock); 831 return (error); 832 } 833 834 /* 835 * This routine is registered with the stream head to retrieve write 836 * side errors. 837 * It does not clear the socket error for a peeking read side operation. 838 * It the error is to be cleared it sets *clearerr. 839 */ 840 int 841 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 842 { 843 struct sonode *so = VTOSO(vp); 844 int error; 845 846 mutex_enter(&so->so_lock); 847 if (so->so_state & SS_CANTSENDMORE) { 848 error = EPIPE; 849 *clearerr = 0; 850 } else { 851 error = so->so_error; 852 if (ispeek) { 853 *clearerr = 0; 854 } else { 855 so->so_error = 0; 856 *clearerr = 1; 857 } 858 } 859 mutex_exit(&so->so_lock); 860 return (error); 861 } 862 863 /* 864 * Set a nonpersistent read and write error on the socket. 865 * Used when there is a T_uderror_ind for a connected socket. 866 * The caller also needs to call strsetrerror and strsetwerror 867 * after dropping the lock. 868 */ 869 void 870 soseterror(struct sonode *so, int error) 871 { 872 ASSERT(error != 0); 873 874 ASSERT(MUTEX_HELD(&so->so_lock)); 875 so->so_error = (ushort_t)error; 876 } 877 878 void 879 soisconnecting(struct sonode *so) 880 { 881 ASSERT(MUTEX_HELD(&so->so_lock)); 882 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 883 so->so_state |= SS_ISCONNECTING; 884 cv_broadcast(&so->so_state_cv); 885 } 886 887 void 888 soisconnected(struct sonode *so) 889 { 890 ASSERT(MUTEX_HELD(&so->so_lock)); 891 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 892 so->so_state |= SS_ISCONNECTED; 893 cv_broadcast(&so->so_state_cv); 894 } 895 896 /* 897 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 898 */ 899 void 900 soisdisconnected(struct sonode *so, int error) 901 { 902 ASSERT(MUTEX_HELD(&so->so_lock)); 903 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING| 904 SS_LADDR_VALID|SS_FADDR_VALID); 905 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 906 so->so_error = (ushort_t)error; 907 if (so->so_peercred != NULL) { 908 crfree(so->so_peercred); 909 so->so_peercred = NULL; 910 } 911 cv_broadcast(&so->so_state_cv); 912 } 913 914 /* 915 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 916 * Does not affect write side. 917 * The caller also has to call strsetrerror. 918 */ 919 static void 920 sobreakconn(struct sonode *so, int error) 921 { 922 ASSERT(MUTEX_HELD(&so->so_lock)); 923 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 924 so->so_error = (ushort_t)error; 925 cv_broadcast(&so->so_state_cv); 926 } 927 928 /* 929 * Can no longer send. 930 * Caller must also call strsetwerror. 931 * 932 * We mark the peer address as no longer valid for getpeername, but 933 * leave it around for so_unix_close to notify the peer (that 934 * transport has no addressing held at that layer). 935 */ 936 void 937 socantsendmore(struct sonode *so) 938 { 939 ASSERT(MUTEX_HELD(&so->so_lock)); 940 so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE; 941 cv_broadcast(&so->so_state_cv); 942 } 943 944 /* 945 * The caller must call strseteof(,1) as well as this routine 946 * to change the socket state. 947 */ 948 void 949 socantrcvmore(struct sonode *so) 950 { 951 ASSERT(MUTEX_HELD(&so->so_lock)); 952 so->so_state |= SS_CANTRCVMORE; 953 cv_broadcast(&so->so_state_cv); 954 } 955 956 /* 957 * The caller has sent down a "request_prim" primitive and wants to wait for 958 * an ack ("ack_prim") or an T_ERROR_ACK for it. 959 * The specified "ack_prim" can be a T_OK_ACK. 960 * 961 * Assumes that all the TPI acks are M_PCPROTO messages. 962 * 963 * Note that the socket is single-threaded (using so_lock_single) 964 * for all operations that generate TPI ack messages. Since 965 * only TPI ack messages are M_PCPROTO we should never receive 966 * anything except either the ack we are expecting or a T_ERROR_ACK 967 * for the same primitive. 968 */ 969 int 970 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 971 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 972 { 973 mblk_t *mp; 974 union T_primitives *tpr; 975 int error; 976 977 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 978 so, request_prim, ack_prim, min_size, mpp, wait)); 979 980 ASSERT(MUTEX_HELD(&so->so_lock)); 981 982 error = sowaitack(so, &mp, wait); 983 if (error) 984 return (error); 985 986 dprintso(so, 1, ("got msg %p\n", mp)); 987 if (DB_TYPE(mp) != M_PCPROTO || 988 MBLKL(mp) < sizeof (tpr->type)) { 989 freemsg(mp); 990 eprintsoline(so, EPROTO); 991 return (EPROTO); 992 } 993 tpr = (union T_primitives *)mp->b_rptr; 994 /* 995 * Did we get the primitive that we were asking for? 996 * For T_OK_ACK we also check that it matches the request primitive. 997 */ 998 if (tpr->type == ack_prim && 999 (ack_prim != T_OK_ACK || 1000 tpr->ok_ack.CORRECT_prim == request_prim)) { 1001 if (MBLKL(mp) >= (ssize_t)min_size) { 1002 /* Found what we are looking for */ 1003 *mpp = mp; 1004 return (0); 1005 } 1006 /* Too short */ 1007 freemsg(mp); 1008 eprintsoline(so, EPROTO); 1009 return (EPROTO); 1010 } 1011 1012 if (tpr->type == T_ERROR_ACK && 1013 tpr->error_ack.ERROR_prim == request_prim) { 1014 /* Error to the primitive we were looking for */ 1015 if (tpr->error_ack.TLI_error == TSYSERR) { 1016 error = tpr->error_ack.UNIX_error; 1017 } else { 1018 error = tlitosyserr(tpr->error_ack.TLI_error); 1019 } 1020 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 1021 tpr->error_ack.ERROR_prim, 1022 tpr->error_ack.TLI_error, 1023 tpr->error_ack.UNIX_error, 1024 error)); 1025 freemsg(mp); 1026 return (error); 1027 } 1028 /* 1029 * Wrong primitive or T_ERROR_ACK for the wrong primitive 1030 */ 1031 #ifdef DEBUG 1032 if (tpr->type == T_ERROR_ACK) { 1033 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 1034 tpr->error_ack.ERROR_prim, 1035 tpr->error_ack.TLI_error, 1036 tpr->error_ack.UNIX_error)); 1037 } else if (tpr->type == T_OK_ACK) { 1038 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 1039 tpr->ok_ack.CORRECT_prim, 1040 ack_prim, request_prim)); 1041 } else { 1042 dprintso(so, 0, 1043 ("unexpected primitive %d, expected %d for %d\n", 1044 tpr->type, ack_prim, request_prim)); 1045 } 1046 #endif /* DEBUG */ 1047 1048 freemsg(mp); 1049 eprintsoline(so, EPROTO); 1050 return (EPROTO); 1051 } 1052 1053 /* 1054 * Wait for a T_OK_ACK for the specified primitive. 1055 */ 1056 int 1057 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1058 { 1059 mblk_t *mp; 1060 int error; 1061 1062 error = sowaitprim(so, request_prim, T_OK_ACK, 1063 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1064 if (error) 1065 return (error); 1066 freemsg(mp); 1067 return (0); 1068 } 1069 1070 /* 1071 * Queue a received TPI ack message on so_ack_mp. 1072 */ 1073 void 1074 soqueueack(struct sonode *so, mblk_t *mp) 1075 { 1076 if (DB_TYPE(mp) != M_PCPROTO) { 1077 zcmn_err(getzoneid(), CE_WARN, 1078 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1079 *(t_scalar_t *)mp->b_rptr); 1080 freemsg(mp); 1081 return; 1082 } 1083 1084 mutex_enter(&so->so_lock); 1085 if (so->so_ack_mp != NULL) { 1086 dprintso(so, 1, ("so_ack_mp already set\n")); 1087 freemsg(so->so_ack_mp); 1088 so->so_ack_mp = NULL; 1089 } 1090 so->so_ack_mp = mp; 1091 cv_broadcast(&so->so_ack_cv); 1092 mutex_exit(&so->so_lock); 1093 } 1094 1095 /* 1096 * Wait for a TPI ack ignoring signals and errors. 1097 */ 1098 int 1099 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1100 { 1101 ASSERT(MUTEX_HELD(&so->so_lock)); 1102 1103 while (so->so_ack_mp == NULL) { 1104 #ifdef SOCK_TEST 1105 if (wait == 0 && sock_test_timelimit != 0) 1106 wait = sock_test_timelimit; 1107 #endif 1108 if (wait != 0) { 1109 /* 1110 * Only wait for the time limit. 1111 */ 1112 clock_t now; 1113 1114 time_to_wait(&now, wait); 1115 if (cv_timedwait(&so->so_ack_cv, &so->so_lock, 1116 now) == -1) { 1117 eprintsoline(so, ETIME); 1118 return (ETIME); 1119 } 1120 } 1121 else 1122 cv_wait(&so->so_ack_cv, &so->so_lock); 1123 } 1124 *mpp = so->so_ack_mp; 1125 #ifdef DEBUG 1126 { 1127 union T_primitives *tpr; 1128 mblk_t *mp = *mpp; 1129 1130 tpr = (union T_primitives *)mp->b_rptr; 1131 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1132 ASSERT(tpr->type == T_OK_ACK || 1133 tpr->type == T_ERROR_ACK || 1134 tpr->type == T_BIND_ACK || 1135 tpr->type == T_CAPABILITY_ACK || 1136 tpr->type == T_INFO_ACK || 1137 tpr->type == T_OPTMGMT_ACK); 1138 } 1139 #endif /* DEBUG */ 1140 so->so_ack_mp = NULL; 1141 return (0); 1142 } 1143 1144 /* 1145 * Queue a received T_CONN_IND message on so_conn_ind_head/tail. 1146 */ 1147 void 1148 soqueueconnind(struct sonode *so, mblk_t *mp) 1149 { 1150 if (DB_TYPE(mp) != M_PROTO) { 1151 zcmn_err(getzoneid(), CE_WARN, 1152 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1153 freemsg(mp); 1154 return; 1155 } 1156 1157 mutex_enter(&so->so_lock); 1158 ASSERT(mp->b_next == NULL); 1159 if (so->so_conn_ind_head == NULL) { 1160 so->so_conn_ind_head = mp; 1161 so->so_state |= SS_HASCONNIND; 1162 } else { 1163 ASSERT(so->so_state & SS_HASCONNIND); 1164 ASSERT(so->so_conn_ind_tail->b_next == NULL); 1165 so->so_conn_ind_tail->b_next = mp; 1166 } 1167 so->so_conn_ind_tail = mp; 1168 /* Wakeup a single consumer of the T_CONN_IND */ 1169 cv_signal(&so->so_connind_cv); 1170 mutex_exit(&so->so_lock); 1171 } 1172 1173 /* 1174 * Wait for a T_CONN_IND. 1175 * Don't wait if nonblocking. 1176 * Accept signals and socket errors. 1177 */ 1178 int 1179 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1180 { 1181 mblk_t *mp; 1182 int error = 0; 1183 1184 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1185 mutex_enter(&so->so_lock); 1186 check_error: 1187 if (so->so_error) { 1188 error = sogeterr(so); 1189 if (error) { 1190 mutex_exit(&so->so_lock); 1191 return (error); 1192 } 1193 } 1194 1195 if (so->so_conn_ind_head == NULL) { 1196 if (fmode & (FNDELAY|FNONBLOCK)) { 1197 error = EWOULDBLOCK; 1198 goto done; 1199 } 1200 if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) { 1201 error = EINTR; 1202 goto done; 1203 } 1204 goto check_error; 1205 } 1206 mp = so->so_conn_ind_head; 1207 so->so_conn_ind_head = mp->b_next; 1208 mp->b_next = NULL; 1209 if (so->so_conn_ind_head == NULL) { 1210 ASSERT(so->so_conn_ind_tail == mp); 1211 so->so_conn_ind_tail = NULL; 1212 so->so_state &= ~SS_HASCONNIND; 1213 } 1214 *mpp = mp; 1215 done: 1216 mutex_exit(&so->so_lock); 1217 return (error); 1218 } 1219 1220 /* 1221 * Flush a T_CONN_IND matching the sequence number from the list. 1222 * Return zero if found; non-zero otherwise. 1223 * This is called very infrequently thus it is ok to do a linear search. 1224 */ 1225 int 1226 soflushconnind(struct sonode *so, t_scalar_t seqno) 1227 { 1228 mblk_t *prevmp, *mp; 1229 struct T_conn_ind *tci; 1230 1231 mutex_enter(&so->so_lock); 1232 for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL; 1233 prevmp = mp, mp = mp->b_next) { 1234 tci = (struct T_conn_ind *)mp->b_rptr; 1235 if (tci->SEQ_number == seqno) { 1236 dprintso(so, 1, 1237 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1238 /* Deleting last? */ 1239 if (so->so_conn_ind_tail == mp) { 1240 so->so_conn_ind_tail = prevmp; 1241 } 1242 if (prevmp == NULL) { 1243 /* Deleting first */ 1244 so->so_conn_ind_head = mp->b_next; 1245 } else { 1246 prevmp->b_next = mp->b_next; 1247 } 1248 mp->b_next = NULL; 1249 if (so->so_conn_ind_head == NULL) { 1250 ASSERT(so->so_conn_ind_tail == NULL); 1251 so->so_state &= ~SS_HASCONNIND; 1252 } else { 1253 ASSERT(so->so_conn_ind_tail != NULL); 1254 } 1255 so->so_error = ECONNABORTED; 1256 mutex_exit(&so->so_lock); 1257 1258 /* 1259 * T_KSSL_PROXY_CONN_IND may carry a handle for 1260 * an SSL context, and needs to be released. 1261 */ 1262 if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) && 1263 (mp->b_cont != NULL)) { 1264 kssl_ctx_t kssl_ctx; 1265 1266 ASSERT(MBLKL(mp->b_cont) == 1267 sizeof (kssl_ctx_t)); 1268 kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr); 1269 kssl_release_ctx(kssl_ctx); 1270 } 1271 freemsg(mp); 1272 return (0); 1273 } 1274 } 1275 mutex_exit(&so->so_lock); 1276 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1277 return (-1); 1278 } 1279 1280 /* 1281 * Wait until the socket is connected or there is an error. 1282 * fmode should contain any nonblocking flags. nosig should be 1283 * set if the caller does not want the wait to be interrupted by a signal. 1284 */ 1285 int 1286 sowaitconnected(struct sonode *so, int fmode, int nosig) 1287 { 1288 int error; 1289 1290 ASSERT(MUTEX_HELD(&so->so_lock)); 1291 1292 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1293 SS_ISCONNECTING && so->so_error == 0) { 1294 1295 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", so)); 1296 if (fmode & (FNDELAY|FNONBLOCK)) 1297 return (EINPROGRESS); 1298 1299 if (nosig) 1300 cv_wait(&so->so_state_cv, &so->so_lock); 1301 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1302 /* 1303 * Return EINTR and let the application use 1304 * nonblocking techniques for detecting when 1305 * the connection has been established. 1306 */ 1307 return (EINTR); 1308 } 1309 dprintso(so, 1, ("awoken on %p\n", so)); 1310 } 1311 1312 if (so->so_error != 0) { 1313 error = sogeterr(so); 1314 ASSERT(error != 0); 1315 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1316 return (error); 1317 } 1318 if (!(so->so_state & SS_ISCONNECTED)) { 1319 /* 1320 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1321 * zero errno. Or another thread could have consumed so_error 1322 * e.g. by calling read. 1323 */ 1324 error = ECONNREFUSED; 1325 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1326 return (error); 1327 } 1328 return (0); 1329 } 1330 1331 1332 /* 1333 * Handle the signal generation aspect of urgent data. 1334 */ 1335 static void 1336 so_oob_sig(struct sonode *so, int extrasig, 1337 strsigset_t *signals, strpollset_t *pollwakeups) 1338 { 1339 ASSERT(MUTEX_HELD(&so->so_lock)); 1340 1341 ASSERT(so_verify_oobstate(so)); 1342 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1343 if (so->so_oobsigcnt > so->so_oobcnt) { 1344 /* 1345 * Signal has already been generated once for this 1346 * urgent "event". However, since TCP can receive updated 1347 * urgent pointers we still generate a signal. 1348 */ 1349 ASSERT(so->so_state & SS_OOBPEND); 1350 if (extrasig) { 1351 *signals |= S_RDBAND; 1352 *pollwakeups |= POLLRDBAND; 1353 } 1354 return; 1355 } 1356 1357 so->so_oobsigcnt++; 1358 ASSERT(so->so_oobsigcnt > 0); /* Wraparound */ 1359 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1360 1361 /* 1362 * Record (for select/poll) that urgent data is pending. 1363 */ 1364 so->so_state |= SS_OOBPEND; 1365 /* 1366 * New urgent data on the way so forget about any old 1367 * urgent data. 1368 */ 1369 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1370 if (so->so_oobmsg != NULL) { 1371 dprintso(so, 1, ("sock: discarding old oob\n")); 1372 freemsg(so->so_oobmsg); 1373 so->so_oobmsg = NULL; 1374 } 1375 *signals |= S_RDBAND; 1376 *pollwakeups |= POLLRDBAND; 1377 ASSERT(so_verify_oobstate(so)); 1378 } 1379 1380 /* 1381 * Handle the processing of the T_EXDATA_IND with urgent data. 1382 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1383 */ 1384 /* ARGSUSED2 */ 1385 static mblk_t * 1386 so_oob_exdata(struct sonode *so, mblk_t *mp, 1387 strsigset_t *signals, strpollset_t *pollwakeups) 1388 { 1389 ASSERT(MUTEX_HELD(&so->so_lock)); 1390 1391 ASSERT(so_verify_oobstate(so)); 1392 1393 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1394 1395 so->so_oobcnt++; 1396 ASSERT(so->so_oobcnt > 0); /* wraparound? */ 1397 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1398 1399 /* 1400 * Set MSGMARK for SIOCATMARK. 1401 */ 1402 mp->b_flag |= MSGMARK; 1403 1404 ASSERT(so_verify_oobstate(so)); 1405 return (mp); 1406 } 1407 1408 /* 1409 * Handle the processing of the actual urgent data. 1410 * Returns the data mblk if it should be queued on the read queue. 1411 */ 1412 static mblk_t * 1413 so_oob_data(struct sonode *so, mblk_t *mp, 1414 strsigset_t *signals, strpollset_t *pollwakeups) 1415 { 1416 ASSERT(MUTEX_HELD(&so->so_lock)); 1417 1418 ASSERT(so_verify_oobstate(so)); 1419 1420 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1421 ASSERT(mp != NULL); 1422 /* 1423 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1424 * Otherwise we store it in so_oobmsg. 1425 */ 1426 ASSERT(so->so_oobmsg == NULL); 1427 if (so->so_options & SO_OOBINLINE) { 1428 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1429 *signals |= S_INPUT | S_RDNORM; 1430 } else { 1431 *pollwakeups |= POLLRDBAND; 1432 so->so_state |= SS_HAVEOOBDATA; 1433 so->so_oobmsg = mp; 1434 mp = NULL; 1435 } 1436 ASSERT(so_verify_oobstate(so)); 1437 return (mp); 1438 } 1439 1440 /* 1441 * Caller must hold the mutex. 1442 * For delayed processing, save the T_DISCON_IND received 1443 * from below on so_discon_ind_mp. 1444 * When the message is processed the framework will call: 1445 * (*func)(so, mp); 1446 */ 1447 static void 1448 so_save_discon_ind(struct sonode *so, 1449 mblk_t *mp, 1450 void (*func)(struct sonode *so, mblk_t *)) 1451 { 1452 ASSERT(MUTEX_HELD(&so->so_lock)); 1453 1454 /* 1455 * Discard new T_DISCON_IND if we have already received another. 1456 * Currently the earlier message can either be on so_discon_ind_mp 1457 * or being processed. 1458 */ 1459 if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1460 zcmn_err(getzoneid(), CE_WARN, 1461 "sockfs: received unexpected additional T_DISCON_IND\n"); 1462 freemsg(mp); 1463 return; 1464 } 1465 mp->b_prev = (mblk_t *)func; 1466 mp->b_next = NULL; 1467 so->so_discon_ind_mp = mp; 1468 } 1469 1470 /* 1471 * Caller must hold the mutex and make sure that either SOLOCKED 1472 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1473 * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp. 1474 * Need to ensure that strsock_proto() will not end up sleeping for 1475 * SOASYNC_UNBIND, while executing this function. 1476 */ 1477 void 1478 so_drain_discon_ind(struct sonode *so) 1479 { 1480 mblk_t *bp; 1481 void (*func)(struct sonode *so, mblk_t *); 1482 1483 ASSERT(MUTEX_HELD(&so->so_lock)); 1484 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1485 1486 /* Process T_DISCON_IND on so_discon_ind_mp */ 1487 if ((bp = so->so_discon_ind_mp) != NULL) { 1488 so->so_discon_ind_mp = NULL; 1489 func = (void (*)())bp->b_prev; 1490 bp->b_prev = NULL; 1491 1492 /* 1493 * This (*func) is supposed to generate a message downstream 1494 * and we need to have a flag set until the corresponding 1495 * upstream message reaches stream head. 1496 * When processing T_DISCON_IND in strsock_discon_ind 1497 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1498 * drop the flag after we get the ACK in strsock_proto. 1499 */ 1500 (void) (*func)(so, bp); 1501 } 1502 } 1503 1504 /* 1505 * Caller must hold the mutex. 1506 * Remove the T_DISCON_IND on so_discon_ind_mp. 1507 */ 1508 void 1509 so_flush_discon_ind(struct sonode *so) 1510 { 1511 mblk_t *bp; 1512 1513 ASSERT(MUTEX_HELD(&so->so_lock)); 1514 1515 /* 1516 * Remove T_DISCON_IND mblk at so_discon_ind_mp. 1517 */ 1518 if ((bp = so->so_discon_ind_mp) != NULL) { 1519 so->so_discon_ind_mp = NULL; 1520 bp->b_prev = NULL; 1521 freemsg(bp); 1522 } 1523 } 1524 1525 /* 1526 * Caller must hold the mutex. 1527 * 1528 * This function is used to process the T_DISCON_IND message. It does 1529 * immediate processing when called from strsock_proto and delayed 1530 * processing of discon_ind saved on so_discon_ind_mp when called from 1531 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1532 * so_discon_ind_mp for delayed processing, this function is registered 1533 * as the callback function to process the message. 1534 * 1535 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1536 * unbind operation, and should be released only after we receive the ACK 1537 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1538 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1539 * sent from either this function or tcp_unbind(), flushing away any TPI 1540 * message that is being sent down and stays in a lower module's queue. 1541 * 1542 * This function drops so_lock and grabs it again. 1543 */ 1544 static void 1545 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1546 { 1547 struct vnode *vp; 1548 struct stdata *stp; 1549 union T_primitives *tpr; 1550 struct T_unbind_req *ubr; 1551 mblk_t *mp; 1552 int error; 1553 1554 ASSERT(MUTEX_HELD(&so->so_lock)); 1555 ASSERT(discon_mp); 1556 ASSERT(discon_mp->b_rptr); 1557 1558 tpr = (union T_primitives *)discon_mp->b_rptr; 1559 ASSERT(tpr->type == T_DISCON_IND); 1560 1561 vp = SOTOV(so); 1562 stp = vp->v_stream; 1563 ASSERT(stp); 1564 1565 /* 1566 * Not a listener 1567 */ 1568 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1569 1570 /* 1571 * This assumes that the name space for DISCON_reason 1572 * is the errno name space. 1573 */ 1574 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1575 1576 /* 1577 * Unbind with the transport without blocking. 1578 * If we've already received a T_DISCON_IND do not unbind. 1579 * 1580 * If there is no preallocated unbind message, we have already 1581 * unbound with the transport 1582 * 1583 * If the socket is not bound, no need to unbind. 1584 */ 1585 mp = so->so_unbind_mp; 1586 if (mp == NULL) { 1587 ASSERT(!(so->so_state & SS_ISBOUND)); 1588 mutex_exit(&so->so_lock); 1589 } else if (!(so->so_state & SS_ISBOUND)) { 1590 mutex_exit(&so->so_lock); 1591 } else { 1592 so->so_unbind_mp = NULL; 1593 1594 /* 1595 * Is another T_DISCON_IND being processed. 1596 */ 1597 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1598 1599 /* 1600 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1601 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1602 * only after we receive the ACK in strsock_proto. 1603 */ 1604 so->so_flag |= SOASYNC_UNBIND; 1605 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1606 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1607 mutex_exit(&so->so_lock); 1608 1609 /* 1610 * Send down T_UNBIND_REQ ignoring flow control. 1611 * XXX Assumes that MSG_IGNFLOW implies that this thread 1612 * does not run service procedures. 1613 */ 1614 ASSERT(DB_TYPE(mp) == M_PROTO); 1615 ubr = (struct T_unbind_req *)mp->b_rptr; 1616 mp->b_wptr += sizeof (*ubr); 1617 ubr->PRIM_type = T_UNBIND_REQ; 1618 1619 /* 1620 * Flush the read and write side (except stream head read queue) 1621 * and send down T_UNBIND_REQ. 1622 */ 1623 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1624 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1625 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1626 /* LINTED - warning: statement has no consequent: if */ 1627 if (error) { 1628 eprintsoline(so, error); 1629 } 1630 } 1631 1632 if (tpr->discon_ind.DISCON_reason != 0) 1633 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1634 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1635 strseteof(SOTOV(so), 1); 1636 /* 1637 * strseteof takes care of read side wakeups, 1638 * pollwakeups, and signals. 1639 */ 1640 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1641 freemsg(discon_mp); 1642 1643 1644 pollwakeup(&stp->sd_pollist, POLLOUT); 1645 mutex_enter(&stp->sd_lock); 1646 1647 /* 1648 * Wake sleeping write 1649 */ 1650 if (stp->sd_flag & WSLEEP) { 1651 stp->sd_flag &= ~WSLEEP; 1652 cv_broadcast(&stp->sd_wrq->q_wait); 1653 } 1654 1655 /* 1656 * strsendsig can handle multiple signals with a 1657 * single call. Send SIGPOLL for S_OUTPUT event. 1658 */ 1659 if (stp->sd_sigflags & S_OUTPUT) 1660 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1661 1662 mutex_exit(&stp->sd_lock); 1663 mutex_enter(&so->so_lock); 1664 } 1665 1666 /* 1667 * This routine is registered with the stream head to receive M_PROTO 1668 * and M_PCPROTO messages. 1669 * 1670 * Returns NULL if the message was consumed. 1671 * Returns an mblk to make that mblk be processed (and queued) by the stream 1672 * head. 1673 * 1674 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1675 * *pollwakeups) for the stream head to take action on. Note that since 1676 * sockets always deliver SIGIO for every new piece of data this routine 1677 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1678 * 1679 * This routine handles all data related TPI messages independent of 1680 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1681 * arrive on a SOCK_STREAM. 1682 */ 1683 static mblk_t * 1684 strsock_proto(vnode_t *vp, mblk_t *mp, 1685 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1686 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1687 { 1688 union T_primitives *tpr; 1689 struct sonode *so; 1690 1691 so = VTOSO(vp); 1692 1693 dprintso(so, 1, ("strsock_proto(%p, %p)\n", vp, mp)); 1694 1695 /* Set default return values */ 1696 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1697 1698 ASSERT(DB_TYPE(mp) == M_PROTO || 1699 DB_TYPE(mp) == M_PCPROTO); 1700 1701 if (MBLKL(mp) < sizeof (tpr->type)) { 1702 /* The message is too short to even contain the primitive */ 1703 zcmn_err(getzoneid(), CE_WARN, 1704 "sockfs: Too short TPI message received. Len = %ld\n", 1705 (ptrdiff_t)(MBLKL(mp))); 1706 freemsg(mp); 1707 return (NULL); 1708 } 1709 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1710 /* The read pointer is not aligned correctly for TPI */ 1711 zcmn_err(getzoneid(), CE_WARN, 1712 "sockfs: Unaligned TPI message received. rptr = %p\n", 1713 (void *)mp->b_rptr); 1714 freemsg(mp); 1715 return (NULL); 1716 } 1717 tpr = (union T_primitives *)mp->b_rptr; 1718 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1719 1720 switch (tpr->type) { 1721 1722 case T_DATA_IND: 1723 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1724 zcmn_err(getzoneid(), CE_WARN, 1725 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1726 (ptrdiff_t)(MBLKL(mp))); 1727 freemsg(mp); 1728 return (NULL); 1729 } 1730 /* 1731 * Ignore zero-length T_DATA_IND messages. These might be 1732 * generated by some transports. 1733 * This is needed to prevent read (which skips the M_PROTO 1734 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1735 * on a non-blocking socket after select/poll has indicated 1736 * that data is available). 1737 */ 1738 if (msgdsize(mp->b_cont) == 0) { 1739 dprintso(so, 0, 1740 ("strsock_proto: zero length T_DATA_IND\n")); 1741 freemsg(mp); 1742 return (NULL); 1743 } 1744 *allmsgsigs = S_INPUT | S_RDNORM; 1745 *pollwakeups = POLLIN | POLLRDNORM; 1746 *wakeups = RSLEEP; 1747 return (mp); 1748 1749 case T_UNITDATA_IND: { 1750 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1751 void *addr; 1752 t_uscalar_t addrlen; 1753 1754 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1755 zcmn_err(getzoneid(), CE_WARN, 1756 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1757 (ptrdiff_t)(MBLKL(mp))); 1758 freemsg(mp); 1759 return (NULL); 1760 } 1761 1762 /* Is this is not a connected datagram socket? */ 1763 if ((so->so_mode & SM_CONNREQUIRED) || 1764 !(so->so_state & SS_ISCONNECTED)) { 1765 /* 1766 * Not a connected datagram socket. Look for 1767 * the SO_UNIX_CLOSE option. If such an option is found 1768 * discard the message (since it has no meaning 1769 * unless connected). 1770 */ 1771 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1772 tudi->OPT_length != 0) { 1773 void *opt; 1774 t_uscalar_t optlen = tudi->OPT_length; 1775 1776 opt = sogetoff(mp, tudi->OPT_offset, 1777 optlen, __TPI_ALIGN_SIZE); 1778 if (opt == NULL) { 1779 /* The len/off falls outside mp */ 1780 freemsg(mp); 1781 mutex_enter(&so->so_lock); 1782 soseterror(so, EPROTO); 1783 mutex_exit(&so->so_lock); 1784 zcmn_err(getzoneid(), CE_WARN, 1785 "sockfs: T_unidata_ind with " 1786 "invalid optlen/offset %u/%d\n", 1787 optlen, tudi->OPT_offset); 1788 return (NULL); 1789 } 1790 if (so_getopt_unix_close(opt, optlen)) { 1791 freemsg(mp); 1792 return (NULL); 1793 } 1794 } 1795 *allmsgsigs = S_INPUT | S_RDNORM; 1796 *pollwakeups = POLLIN | POLLRDNORM; 1797 *wakeups = RSLEEP; 1798 if (audit_active) 1799 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1800 mp, 0); 1801 return (mp); 1802 } 1803 1804 /* 1805 * A connect datagram socket. For AF_INET{,6} we verify that 1806 * the source address matches the "connected to" address. 1807 * The semantics of AF_UNIX sockets is to not verify 1808 * the source address. 1809 * Note that this source address verification is transport 1810 * specific. Thus the real fix would be to extent TPI 1811 * to allow T_CONN_REQ messages to be send to connectionless 1812 * transport providers and always let the transport provider 1813 * do whatever filtering is needed. 1814 * 1815 * The verification/filtering semantics for transports 1816 * other than AF_INET and AF_UNIX are unknown. The choice 1817 * would be to either filter using bcmp or let all messages 1818 * get through. This code does not filter other address 1819 * families since this at least allows the application to 1820 * work around any missing filtering. 1821 * 1822 * XXX Should we move filtering to UDP/ICMP??? 1823 * That would require passing e.g. a T_DISCON_REQ to UDP 1824 * when the socket becomes unconnected. 1825 */ 1826 addrlen = tudi->SRC_length; 1827 /* 1828 * The alignment restriction is really to strict but 1829 * we want enough alignment to inspect the fields of 1830 * a sockaddr_in. 1831 */ 1832 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1833 __TPI_ALIGN_SIZE); 1834 if (addr == NULL) { 1835 freemsg(mp); 1836 mutex_enter(&so->so_lock); 1837 soseterror(so, EPROTO); 1838 mutex_exit(&so->so_lock); 1839 zcmn_err(getzoneid(), CE_WARN, 1840 "sockfs: T_unidata_ind with invalid " 1841 "addrlen/offset %u/%d\n", 1842 addrlen, tudi->SRC_offset); 1843 return (NULL); 1844 } 1845 1846 if (so->so_family == AF_INET) { 1847 /* 1848 * For AF_INET we allow wildcarding both sin_addr 1849 * and sin_port. 1850 */ 1851 struct sockaddr_in *faddr, *sin; 1852 1853 /* Prevent so_faddr_sa from changing while accessed */ 1854 mutex_enter(&so->so_lock); 1855 ASSERT(so->so_faddr_len == 1856 (socklen_t)sizeof (struct sockaddr_in)); 1857 faddr = (struct sockaddr_in *)so->so_faddr_sa; 1858 sin = (struct sockaddr_in *)addr; 1859 if (addrlen != 1860 (t_uscalar_t)sizeof (struct sockaddr_in) || 1861 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1862 faddr->sin_addr.s_addr != INADDR_ANY) || 1863 (so->so_type != SOCK_RAW && 1864 sin->sin_port != faddr->sin_port && 1865 faddr->sin_port != 0)) { 1866 #ifdef DEBUG 1867 dprintso(so, 0, 1868 ("sockfs: T_UNITDATA_IND mismatch: %s", 1869 pr_addr(so->so_family, 1870 (struct sockaddr *)addr, 1871 addrlen))); 1872 dprintso(so, 0, (" - %s\n", 1873 pr_addr(so->so_family, so->so_faddr_sa, 1874 (t_uscalar_t)so->so_faddr_len))); 1875 #endif /* DEBUG */ 1876 mutex_exit(&so->so_lock); 1877 freemsg(mp); 1878 return (NULL); 1879 } 1880 mutex_exit(&so->so_lock); 1881 } else if (so->so_family == AF_INET6) { 1882 /* 1883 * For AF_INET6 we allow wildcarding both sin6_addr 1884 * and sin6_port. 1885 */ 1886 struct sockaddr_in6 *faddr6, *sin6; 1887 static struct in6_addr zeroes; /* inits to all zeros */ 1888 1889 /* Prevent so_faddr_sa from changing while accessed */ 1890 mutex_enter(&so->so_lock); 1891 ASSERT(so->so_faddr_len == 1892 (socklen_t)sizeof (struct sockaddr_in6)); 1893 faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa; 1894 sin6 = (struct sockaddr_in6 *)addr; 1895 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1896 if (addrlen != 1897 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1898 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1899 &faddr6->sin6_addr) && 1900 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1901 (so->so_type != SOCK_RAW && 1902 sin6->sin6_port != faddr6->sin6_port && 1903 faddr6->sin6_port != 0)) { 1904 #ifdef DEBUG 1905 dprintso(so, 0, 1906 ("sockfs: T_UNITDATA_IND mismatch: %s", 1907 pr_addr(so->so_family, 1908 (struct sockaddr *)addr, 1909 addrlen))); 1910 dprintso(so, 0, (" - %s\n", 1911 pr_addr(so->so_family, so->so_faddr_sa, 1912 (t_uscalar_t)so->so_faddr_len))); 1913 #endif /* DEBUG */ 1914 mutex_exit(&so->so_lock); 1915 freemsg(mp); 1916 return (NULL); 1917 } 1918 mutex_exit(&so->so_lock); 1919 } else if (so->so_family == AF_UNIX && 1920 msgdsize(mp->b_cont) == 0 && 1921 tudi->OPT_length != 0) { 1922 /* 1923 * Attempt to extract AF_UNIX 1924 * SO_UNIX_CLOSE indication from options. 1925 */ 1926 void *opt; 1927 t_uscalar_t optlen = tudi->OPT_length; 1928 1929 opt = sogetoff(mp, tudi->OPT_offset, 1930 optlen, __TPI_ALIGN_SIZE); 1931 if (opt == NULL) { 1932 /* The len/off falls outside mp */ 1933 freemsg(mp); 1934 mutex_enter(&so->so_lock); 1935 soseterror(so, EPROTO); 1936 mutex_exit(&so->so_lock); 1937 zcmn_err(getzoneid(), CE_WARN, 1938 "sockfs: T_unidata_ind with invalid " 1939 "optlen/offset %u/%d\n", 1940 optlen, tudi->OPT_offset); 1941 return (NULL); 1942 } 1943 /* 1944 * If we received a unix close indication mark the 1945 * socket and discard this message. 1946 */ 1947 if (so_getopt_unix_close(opt, optlen)) { 1948 mutex_enter(&so->so_lock); 1949 sobreakconn(so, ECONNRESET); 1950 mutex_exit(&so->so_lock); 1951 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1952 freemsg(mp); 1953 *pollwakeups = POLLIN | POLLRDNORM; 1954 *allmsgsigs = S_INPUT | S_RDNORM; 1955 *wakeups = RSLEEP; 1956 return (NULL); 1957 } 1958 } 1959 *allmsgsigs = S_INPUT | S_RDNORM; 1960 *pollwakeups = POLLIN | POLLRDNORM; 1961 *wakeups = RSLEEP; 1962 return (mp); 1963 } 1964 1965 case T_OPTDATA_IND: { 1966 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1967 1968 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1969 zcmn_err(getzoneid(), CE_WARN, 1970 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1971 (ptrdiff_t)(MBLKL(mp))); 1972 freemsg(mp); 1973 return (NULL); 1974 } 1975 /* 1976 * Allow zero-length messages carrying options. 1977 * This is used when carrying the SO_UNIX_CLOSE option. 1978 */ 1979 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 1980 tdi->OPT_length != 0) { 1981 /* 1982 * Attempt to extract AF_UNIX close indication 1983 * from the options. Ignore any other options - 1984 * those are handled once the message is removed 1985 * from the queue. 1986 * The close indication message should not carry data. 1987 */ 1988 void *opt; 1989 t_uscalar_t optlen = tdi->OPT_length; 1990 1991 opt = sogetoff(mp, tdi->OPT_offset, 1992 optlen, __TPI_ALIGN_SIZE); 1993 if (opt == NULL) { 1994 /* The len/off falls outside mp */ 1995 freemsg(mp); 1996 mutex_enter(&so->so_lock); 1997 soseterror(so, EPROTO); 1998 mutex_exit(&so->so_lock); 1999 zcmn_err(getzoneid(), CE_WARN, 2000 "sockfs: T_optdata_ind with invalid " 2001 "optlen/offset %u/%d\n", 2002 optlen, tdi->OPT_offset); 2003 return (NULL); 2004 } 2005 /* 2006 * If we received a close indication mark the 2007 * socket and discard this message. 2008 */ 2009 if (so_getopt_unix_close(opt, optlen)) { 2010 mutex_enter(&so->so_lock); 2011 socantsendmore(so); 2012 mutex_exit(&so->so_lock); 2013 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2014 freemsg(mp); 2015 return (NULL); 2016 } 2017 } 2018 *allmsgsigs = S_INPUT | S_RDNORM; 2019 *pollwakeups = POLLIN | POLLRDNORM; 2020 *wakeups = RSLEEP; 2021 return (mp); 2022 } 2023 2024 case T_EXDATA_IND: { 2025 mblk_t *mctl, *mdata; 2026 mblk_t *lbp; 2027 union T_primitives *tprp; 2028 struct stdata *stp; 2029 queue_t *qp; 2030 2031 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 2032 zcmn_err(getzoneid(), CE_WARN, 2033 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 2034 (ptrdiff_t)(MBLKL(mp))); 2035 freemsg(mp); 2036 return (NULL); 2037 } 2038 /* 2039 * Ignore zero-length T_EXDATA_IND messages. These might be 2040 * generated by some transports. 2041 * 2042 * This is needed to prevent read (which skips the M_PROTO 2043 * part) to unexpectedly return 0 (or return EWOULDBLOCK 2044 * on a non-blocking socket after select/poll has indicated 2045 * that data is available). 2046 */ 2047 dprintso(so, 1, 2048 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 2049 vp, so->so_oobsigcnt, so->so_oobcnt, 2050 pr_state(so->so_state, so->so_mode))); 2051 2052 if (msgdsize(mp->b_cont) == 0) { 2053 dprintso(so, 0, 2054 ("strsock_proto: zero length T_EXDATA_IND\n")); 2055 freemsg(mp); 2056 return (NULL); 2057 } 2058 2059 /* 2060 * Split into the T_EXDATA_IND and the M_DATA part. 2061 * We process these three pieces separately: 2062 * signal generation 2063 * handling T_EXDATA_IND 2064 * handling M_DATA component 2065 */ 2066 mctl = mp; 2067 mdata = mctl->b_cont; 2068 mctl->b_cont = NULL; 2069 mutex_enter(&so->so_lock); 2070 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2071 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2072 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2073 2074 stp = vp->v_stream; 2075 ASSERT(stp != NULL); 2076 qp = _RD(stp->sd_wrq); 2077 2078 mutex_enter(QLOCK(qp)); 2079 lbp = qp->q_last; 2080 2081 /* 2082 * We want to avoid queueing up a string of T_EXDATA_IND 2083 * messages with no intervening data messages at the stream 2084 * head. These messages contribute to the total message 2085 * count. Eventually this can lead to STREAMS flow contol 2086 * and also cause TCP to advertise a zero window condition 2087 * to the peer. This can happen in the degenerate case where 2088 * the sender and receiver exchange only OOB data. The sender 2089 * only sends messages with MSG_OOB flag and the receiver 2090 * receives only MSG_OOB messages and does not use SO_OOBINLINE. 2091 * An example of this scenario has been reported in applications 2092 * that use OOB data to exchange heart beats. Flow control 2093 * relief will never happen if the application only reads OOB 2094 * data which is done directly by sorecvoob() and the 2095 * T_EXDATA_IND messages at the streamhead won't be consumed. 2096 * Note that there is no correctness issue in compressing the 2097 * string of T_EXDATA_IND messages into a single T_EXDATA_IND 2098 * message. A single read that does not specify MSG_OOB will 2099 * read across all the marks in a loop in sotpi_recvmsg(). 2100 * Each mark is individually distinguishable only if the 2101 * T_EXDATA_IND messages are separated by data messages. 2102 */ 2103 if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) { 2104 tprp = (union T_primitives *)lbp->b_rptr; 2105 if ((tprp->type == T_EXDATA_IND) && 2106 !(so->so_options & SO_OOBINLINE)) { 2107 2108 /* 2109 * free the new M_PROTO message 2110 */ 2111 freemsg(mctl); 2112 2113 /* 2114 * adjust the OOB count and OOB signal count 2115 * just incremented for the new OOB data. 2116 */ 2117 so->so_oobcnt--; 2118 so->so_oobsigcnt--; 2119 mutex_exit(QLOCK(qp)); 2120 mutex_exit(&so->so_lock); 2121 return (NULL); 2122 } 2123 } 2124 mutex_exit(QLOCK(qp)); 2125 2126 /* 2127 * Pass the T_EXDATA_IND and the M_DATA back separately 2128 * by using b_next linkage. (The stream head will queue any 2129 * b_next linked messages separately.) This is needed 2130 * since MSGMARK applies to the last by of the message 2131 * hence we can not have any M_DATA component attached 2132 * to the marked T_EXDATA_IND. Note that the stream head 2133 * will not consolidate M_DATA messages onto an MSGMARK'ed 2134 * message in order to preserve the constraint that 2135 * the T_EXDATA_IND always is a separate message. 2136 */ 2137 ASSERT(mctl != NULL); 2138 mctl->b_next = mdata; 2139 mp = mctl; 2140 #ifdef DEBUG 2141 if (mdata == NULL) { 2142 dprintso(so, 1, 2143 ("after outofline T_EXDATA_IND(%p): " 2144 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2145 vp, so->so_oobsigcnt, 2146 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2147 pr_state(so->so_state, so->so_mode))); 2148 } else { 2149 dprintso(so, 1, 2150 ("after inline T_EXDATA_IND(%p): " 2151 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2152 vp, so->so_oobsigcnt, 2153 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2154 pr_state(so->so_state, so->so_mode))); 2155 } 2156 #endif /* DEBUG */ 2157 mutex_exit(&so->so_lock); 2158 *wakeups = RSLEEP; 2159 return (mp); 2160 } 2161 2162 case T_CONN_CON: { 2163 struct T_conn_con *conn_con; 2164 void *addr; 2165 t_uscalar_t addrlen; 2166 2167 /* 2168 * Verify the state, update the state to ISCONNECTED, 2169 * record the potentially new address in the message, 2170 * and drop the message. 2171 */ 2172 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2173 zcmn_err(getzoneid(), CE_WARN, 2174 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2175 (ptrdiff_t)(MBLKL(mp))); 2176 freemsg(mp); 2177 return (NULL); 2178 } 2179 2180 mutex_enter(&so->so_lock); 2181 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2182 SS_ISCONNECTING) { 2183 mutex_exit(&so->so_lock); 2184 dprintso(so, 1, 2185 ("T_CONN_CON: state %x\n", so->so_state)); 2186 freemsg(mp); 2187 return (NULL); 2188 } 2189 2190 conn_con = &tpr->conn_con; 2191 addrlen = conn_con->RES_length; 2192 /* 2193 * Allow the address to be of different size than sent down 2194 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2195 * For AF_UNIX require the identical length. 2196 */ 2197 if (so->so_family == AF_UNIX ? 2198 addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) : 2199 addrlen > (t_uscalar_t)so->so_faddr_maxlen) { 2200 zcmn_err(getzoneid(), CE_WARN, 2201 "sockfs: T_conn_con with different " 2202 "length %u/%d\n", 2203 addrlen, conn_con->RES_length); 2204 soisdisconnected(so, EPROTO); 2205 mutex_exit(&so->so_lock); 2206 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2207 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2208 strseteof(SOTOV(so), 1); 2209 freemsg(mp); 2210 /* 2211 * strseteof takes care of read side wakeups, 2212 * pollwakeups, and signals. 2213 */ 2214 *wakeups = WSLEEP; 2215 *allmsgsigs = S_OUTPUT; 2216 *pollwakeups = POLLOUT; 2217 return (NULL); 2218 } 2219 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2220 if (addr == NULL) { 2221 zcmn_err(getzoneid(), CE_WARN, 2222 "sockfs: T_conn_con with invalid " 2223 "addrlen/offset %u/%d\n", 2224 addrlen, conn_con->RES_offset); 2225 mutex_exit(&so->so_lock); 2226 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2227 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2228 strseteof(SOTOV(so), 1); 2229 freemsg(mp); 2230 /* 2231 * strseteof takes care of read side wakeups, 2232 * pollwakeups, and signals. 2233 */ 2234 *wakeups = WSLEEP; 2235 *allmsgsigs = S_OUTPUT; 2236 *pollwakeups = POLLOUT; 2237 return (NULL); 2238 } 2239 2240 /* 2241 * Save for getpeername. 2242 */ 2243 if (so->so_family != AF_UNIX) { 2244 so->so_faddr_len = (socklen_t)addrlen; 2245 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2246 bcopy(addr, so->so_faddr_sa, addrlen); 2247 so->so_state |= SS_FADDR_VALID; 2248 } 2249 2250 if (so->so_peercred != NULL) 2251 crfree(so->so_peercred); 2252 so->so_peercred = DB_CRED(mp); 2253 so->so_cpid = DB_CPID(mp); 2254 if (so->so_peercred != NULL) 2255 crhold(so->so_peercred); 2256 2257 /* Wakeup anybody sleeping in sowaitconnected */ 2258 soisconnected(so); 2259 mutex_exit(&so->so_lock); 2260 2261 /* 2262 * The socket is now available for sending data. 2263 */ 2264 *wakeups = WSLEEP; 2265 *allmsgsigs = S_OUTPUT; 2266 *pollwakeups = POLLOUT; 2267 freemsg(mp); 2268 return (NULL); 2269 } 2270 2271 /* 2272 * Extra processing in case of an SSL proxy, before queuing or 2273 * forwarding to the fallback endpoint 2274 */ 2275 case T_SSL_PROXY_CONN_IND: 2276 case T_CONN_IND: 2277 /* 2278 * Verify the min size and queue the message on 2279 * the so_conn_ind_head/tail list. 2280 */ 2281 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2282 zcmn_err(getzoneid(), CE_WARN, 2283 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2284 (ptrdiff_t)(MBLKL(mp))); 2285 freemsg(mp); 2286 return (NULL); 2287 } 2288 2289 if (audit_active) 2290 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2291 if (!(so->so_state & SS_ACCEPTCONN)) { 2292 zcmn_err(getzoneid(), CE_WARN, 2293 "sockfs: T_conn_ind on non-listening socket\n"); 2294 freemsg(mp); 2295 return (NULL); 2296 } 2297 2298 if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) { 2299 /* No context: need to fall back */ 2300 struct sonode *fbso; 2301 stdata_t *fbstp; 2302 2303 tpr->type = T_CONN_IND; 2304 2305 fbso = kssl_find_fallback(so->so_kssl_ent); 2306 2307 /* 2308 * No fallback: the remote will timeout and 2309 * disconnect. 2310 */ 2311 if (fbso == NULL) { 2312 freemsg(mp); 2313 return (NULL); 2314 } 2315 fbstp = SOTOV(fbso)->v_stream; 2316 qreply(fbstp->sd_wrq->q_next, mp); 2317 return (NULL); 2318 } 2319 soqueueconnind(so, mp); 2320 *allmsgsigs = S_INPUT | S_RDNORM; 2321 *pollwakeups = POLLIN | POLLRDNORM; 2322 *wakeups = RSLEEP; 2323 return (NULL); 2324 2325 case T_ORDREL_IND: 2326 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2327 zcmn_err(getzoneid(), CE_WARN, 2328 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2329 (ptrdiff_t)(MBLKL(mp))); 2330 freemsg(mp); 2331 return (NULL); 2332 } 2333 2334 /* 2335 * Some providers send this when not fully connected. 2336 * SunLink X.25 needs to retrieve disconnect reason after 2337 * disconnect for compatibility. It uses T_ORDREL_IND 2338 * instead of T_DISCON_IND so that it may use the 2339 * endpoint after a connect failure to retrieve the 2340 * reason using an ioctl. Thus we explicitly clear 2341 * SS_ISCONNECTING here for SunLink X.25. 2342 * This is a needed TPI violation. 2343 */ 2344 mutex_enter(&so->so_lock); 2345 so->so_state &= ~SS_ISCONNECTING; 2346 socantrcvmore(so); 2347 mutex_exit(&so->so_lock); 2348 strseteof(SOTOV(so), 1); 2349 /* 2350 * strseteof takes care of read side wakeups, 2351 * pollwakeups, and signals. 2352 */ 2353 freemsg(mp); 2354 return (NULL); 2355 2356 case T_DISCON_IND: 2357 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2358 zcmn_err(getzoneid(), CE_WARN, 2359 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2360 (ptrdiff_t)(MBLKL(mp))); 2361 freemsg(mp); 2362 return (NULL); 2363 } 2364 if (so->so_state & SS_ACCEPTCONN) { 2365 /* 2366 * This is a listener. Look for a queued T_CONN_IND 2367 * with a matching sequence number and remove it 2368 * from the list. 2369 * It is normal to not find the sequence number since 2370 * the soaccept might have already dequeued it 2371 * (in which case the T_CONN_RES will fail with 2372 * TBADSEQ). 2373 */ 2374 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2375 freemsg(mp); 2376 return (0); 2377 } 2378 2379 /* 2380 * Not a listener 2381 * 2382 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2383 * Such a discon_ind appears when the peer has first done 2384 * a shutdown() followed by a close() in which case we just 2385 * want to record socantsendmore. 2386 * In this case sockfs first receives a T_ORDREL_IND followed 2387 * by a T_DISCON_IND. 2388 * Note that for other transports (e.g. TCP) we need to handle 2389 * the discon_ind in this case since it signals an error. 2390 */ 2391 mutex_enter(&so->so_lock); 2392 if ((so->so_state & SS_CANTRCVMORE) && 2393 (so->so_family == AF_UNIX)) { 2394 socantsendmore(so); 2395 mutex_exit(&so->so_lock); 2396 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2397 dprintso(so, 1, 2398 ("T_DISCON_IND: error %d\n", so->so_error)); 2399 freemsg(mp); 2400 /* 2401 * Set these variables for caller to process them. 2402 * For the else part where T_DISCON_IND is processed, 2403 * this will be done in the function being called 2404 * (strsock_discon_ind()) 2405 */ 2406 *wakeups = WSLEEP; 2407 *allmsgsigs = S_OUTPUT; 2408 *pollwakeups = POLLOUT; 2409 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2410 /* 2411 * Deferred processing of T_DISCON_IND 2412 */ 2413 so_save_discon_ind(so, mp, strsock_discon_ind); 2414 mutex_exit(&so->so_lock); 2415 } else { 2416 /* 2417 * Process T_DISCON_IND now 2418 */ 2419 (void) strsock_discon_ind(so, mp); 2420 mutex_exit(&so->so_lock); 2421 } 2422 return (NULL); 2423 2424 case T_UDERROR_IND: { 2425 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2426 void *addr; 2427 t_uscalar_t addrlen; 2428 int error; 2429 2430 dprintso(so, 0, 2431 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2432 2433 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2434 zcmn_err(getzoneid(), CE_WARN, 2435 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2436 (ptrdiff_t)(MBLKL(mp))); 2437 freemsg(mp); 2438 return (NULL); 2439 } 2440 /* Ignore on connection-oriented transports */ 2441 if (so->so_mode & SM_CONNREQUIRED) { 2442 freemsg(mp); 2443 eprintsoline(so, 0); 2444 zcmn_err(getzoneid(), CE_WARN, 2445 "sockfs: T_uderror_ind on connection-oriented " 2446 "transport\n"); 2447 return (NULL); 2448 } 2449 addrlen = tudi->DEST_length; 2450 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2451 if (addr == NULL) { 2452 zcmn_err(getzoneid(), CE_WARN, 2453 "sockfs: T_uderror_ind with invalid " 2454 "addrlen/offset %u/%d\n", 2455 addrlen, tudi->DEST_offset); 2456 freemsg(mp); 2457 return (NULL); 2458 } 2459 2460 /* Verify source address for connected socket. */ 2461 mutex_enter(&so->so_lock); 2462 if (so->so_state & SS_ISCONNECTED) { 2463 void *faddr; 2464 t_uscalar_t faddr_len; 2465 boolean_t match = B_FALSE; 2466 2467 switch (so->so_family) { 2468 case AF_INET: { 2469 /* Compare just IP address and port */ 2470 struct sockaddr_in *sin1, *sin2; 2471 2472 sin1 = (struct sockaddr_in *)so->so_faddr_sa; 2473 sin2 = (struct sockaddr_in *)addr; 2474 if (addrlen == sizeof (struct sockaddr_in) && 2475 sin1->sin_port == sin2->sin_port && 2476 sin1->sin_addr.s_addr == 2477 sin2->sin_addr.s_addr) 2478 match = B_TRUE; 2479 break; 2480 } 2481 case AF_INET6: { 2482 /* Compare just IP address and port. Not flow */ 2483 struct sockaddr_in6 *sin1, *sin2; 2484 2485 sin1 = (struct sockaddr_in6 *)so->so_faddr_sa; 2486 sin2 = (struct sockaddr_in6 *)addr; 2487 if (addrlen == sizeof (struct sockaddr_in6) && 2488 sin1->sin6_port == sin2->sin6_port && 2489 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2490 &sin2->sin6_addr)) 2491 match = B_TRUE; 2492 break; 2493 } 2494 case AF_UNIX: 2495 faddr = &so->so_ux_faddr; 2496 faddr_len = 2497 (t_uscalar_t)sizeof (so->so_ux_faddr); 2498 if (faddr_len == addrlen && 2499 bcmp(addr, faddr, addrlen) == 0) 2500 match = B_TRUE; 2501 break; 2502 default: 2503 faddr = so->so_faddr_sa; 2504 faddr_len = (t_uscalar_t)so->so_faddr_len; 2505 if (faddr_len == addrlen && 2506 bcmp(addr, faddr, addrlen) == 0) 2507 match = B_TRUE; 2508 break; 2509 } 2510 2511 if (!match) { 2512 #ifdef DEBUG 2513 dprintso(so, 0, 2514 ("sockfs: T_UDERR_IND mismatch: %s - ", 2515 pr_addr(so->so_family, 2516 (struct sockaddr *)addr, 2517 addrlen))); 2518 dprintso(so, 0, ("%s\n", 2519 pr_addr(so->so_family, so->so_faddr_sa, 2520 so->so_faddr_len))); 2521 #endif /* DEBUG */ 2522 mutex_exit(&so->so_lock); 2523 freemsg(mp); 2524 return (NULL); 2525 } 2526 /* 2527 * Make the write error nonpersistent. If the error 2528 * is zero we use ECONNRESET. 2529 * This assumes that the name space for ERROR_type 2530 * is the errno name space. 2531 */ 2532 if (tudi->ERROR_type != 0) 2533 error = tudi->ERROR_type; 2534 else 2535 error = ECONNRESET; 2536 2537 soseterror(so, error); 2538 mutex_exit(&so->so_lock); 2539 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2540 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2541 *wakeups = RSLEEP | WSLEEP; 2542 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2543 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2544 freemsg(mp); 2545 return (NULL); 2546 } 2547 /* 2548 * If the application asked for delayed errors 2549 * record the T_UDERROR_IND so_eaddr_mp and the reason in 2550 * so_delayed_error for delayed error posting. If the reason 2551 * is zero use ECONNRESET. 2552 * Note that delayed error indications do not make sense for 2553 * AF_UNIX sockets since sendto checks that the destination 2554 * address is valid at the time of the sendto. 2555 */ 2556 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2557 mutex_exit(&so->so_lock); 2558 freemsg(mp); 2559 return (NULL); 2560 } 2561 if (so->so_eaddr_mp != NULL) 2562 freemsg(so->so_eaddr_mp); 2563 2564 so->so_eaddr_mp = mp; 2565 if (tudi->ERROR_type != 0) 2566 error = tudi->ERROR_type; 2567 else 2568 error = ECONNRESET; 2569 so->so_delayed_error = (ushort_t)error; 2570 mutex_exit(&so->so_lock); 2571 return (NULL); 2572 } 2573 2574 case T_ERROR_ACK: 2575 dprintso(so, 0, 2576 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2577 tpr->error_ack.ERROR_prim, 2578 tpr->error_ack.TLI_error, 2579 tpr->error_ack.UNIX_error)); 2580 2581 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2582 zcmn_err(getzoneid(), CE_WARN, 2583 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2584 (ptrdiff_t)(MBLKL(mp))); 2585 freemsg(mp); 2586 return (NULL); 2587 } 2588 /* 2589 * Check if we were waiting for the async message 2590 */ 2591 mutex_enter(&so->so_lock); 2592 if ((so->so_flag & SOASYNC_UNBIND) && 2593 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2594 so_unlock_single(so, SOASYNC_UNBIND); 2595 mutex_exit(&so->so_lock); 2596 freemsg(mp); 2597 return (NULL); 2598 } 2599 mutex_exit(&so->so_lock); 2600 soqueueack(so, mp); 2601 return (NULL); 2602 2603 case T_OK_ACK: 2604 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2605 zcmn_err(getzoneid(), CE_WARN, 2606 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2607 (ptrdiff_t)(MBLKL(mp))); 2608 freemsg(mp); 2609 return (NULL); 2610 } 2611 /* 2612 * Check if we were waiting for the async message 2613 */ 2614 mutex_enter(&so->so_lock); 2615 if ((so->so_flag & SOASYNC_UNBIND) && 2616 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2617 dprintso(so, 1, 2618 ("strsock_proto: T_OK_ACK async unbind\n")); 2619 so_unlock_single(so, SOASYNC_UNBIND); 2620 mutex_exit(&so->so_lock); 2621 freemsg(mp); 2622 return (NULL); 2623 } 2624 mutex_exit(&so->so_lock); 2625 soqueueack(so, mp); 2626 return (NULL); 2627 2628 case T_INFO_ACK: 2629 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2630 zcmn_err(getzoneid(), CE_WARN, 2631 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2632 (ptrdiff_t)(MBLKL(mp))); 2633 freemsg(mp); 2634 return (NULL); 2635 } 2636 soqueueack(so, mp); 2637 return (NULL); 2638 2639 case T_CAPABILITY_ACK: 2640 /* 2641 * A T_capability_ack need only be large enough to hold 2642 * the PRIM_type and CAP_bits1 fields; checking for anything 2643 * larger might reject a correct response from an older 2644 * provider. 2645 */ 2646 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2647 zcmn_err(getzoneid(), CE_WARN, 2648 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2649 (ptrdiff_t)(MBLKL(mp))); 2650 freemsg(mp); 2651 return (NULL); 2652 } 2653 soqueueack(so, mp); 2654 return (NULL); 2655 2656 case T_BIND_ACK: 2657 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2658 zcmn_err(getzoneid(), CE_WARN, 2659 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2660 (ptrdiff_t)(MBLKL(mp))); 2661 freemsg(mp); 2662 return (NULL); 2663 } 2664 soqueueack(so, mp); 2665 return (NULL); 2666 2667 case T_OPTMGMT_ACK: 2668 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2669 zcmn_err(getzoneid(), CE_WARN, 2670 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2671 (ptrdiff_t)(MBLKL(mp))); 2672 freemsg(mp); 2673 return (NULL); 2674 } 2675 soqueueack(so, mp); 2676 return (NULL); 2677 default: 2678 #ifdef DEBUG 2679 zcmn_err(getzoneid(), CE_WARN, 2680 "sockfs: unknown TPI primitive %d received\n", 2681 tpr->type); 2682 #endif /* DEBUG */ 2683 freemsg(mp); 2684 return (NULL); 2685 } 2686 } 2687 2688 /* 2689 * This routine is registered with the stream head to receive other 2690 * (non-data, and non-proto) messages. 2691 * 2692 * Returns NULL if the message was consumed. 2693 * Returns an mblk to make that mblk be processed by the stream head. 2694 * 2695 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2696 * *pollwakeups) for the stream head to take action on. 2697 */ 2698 static mblk_t * 2699 strsock_misc(vnode_t *vp, mblk_t *mp, 2700 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2701 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2702 { 2703 struct sonode *so; 2704 2705 so = VTOSO(vp); 2706 2707 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2708 vp, mp, DB_TYPE(mp))); 2709 2710 /* Set default return values */ 2711 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2712 2713 switch (DB_TYPE(mp)) { 2714 case M_PCSIG: 2715 /* 2716 * This assumes that an M_PCSIG for the urgent data arrives 2717 * before the corresponding T_EXDATA_IND. 2718 * 2719 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2720 * awoken before the urgent data shows up. 2721 * For OOBINLINE this can result in select returning 2722 * only exceptions as opposed to except|read. 2723 */ 2724 if (*mp->b_rptr == SIGURG) { 2725 mutex_enter(&so->so_lock); 2726 dprintso(so, 1, 2727 ("SIGURG(%p): counts %d/%d state %s\n", 2728 vp, so->so_oobsigcnt, 2729 so->so_oobcnt, 2730 pr_state(so->so_state, so->so_mode))); 2731 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2732 dprintso(so, 1, 2733 ("after SIGURG(%p): counts %d/%d " 2734 " poll 0x%x sig 0x%x state %s\n", 2735 vp, so->so_oobsigcnt, 2736 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2737 pr_state(so->so_state, so->so_mode))); 2738 mutex_exit(&so->so_lock); 2739 } 2740 freemsg(mp); 2741 return (NULL); 2742 2743 case M_SIG: 2744 case M_HANGUP: 2745 case M_UNHANGUP: 2746 case M_ERROR: 2747 /* M_ERRORs etc are ignored */ 2748 freemsg(mp); 2749 return (NULL); 2750 2751 case M_FLUSH: 2752 /* 2753 * Do not flush read queue. If the M_FLUSH 2754 * arrives because of an impending T_discon_ind 2755 * we still have to keep any queued data - this is part of 2756 * socket semantics. 2757 */ 2758 if (*mp->b_rptr & FLUSHW) { 2759 *mp->b_rptr &= ~FLUSHR; 2760 return (mp); 2761 } 2762 freemsg(mp); 2763 return (NULL); 2764 2765 default: 2766 return (mp); 2767 } 2768 } 2769 2770 2771 /* Register to receive signals for certain events */ 2772 int 2773 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2774 { 2775 struct strsigset ss; 2776 int32_t rval; 2777 2778 /* 2779 * Note that SOLOCKED will be set except for the call from soaccept(). 2780 */ 2781 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2782 ss.ss_pid = pgrp; 2783 ss.ss_events = events; 2784 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2785 &rval)); 2786 } 2787 2788 2789 /* Register for events matching the SS_ASYNC flag */ 2790 int 2791 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2792 { 2793 int events = so->so_state & SS_ASYNC ? 2794 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2795 S_RDBAND | S_BANDURG; 2796 2797 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2798 } 2799 2800 2801 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2802 int 2803 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2804 { 2805 ASSERT(mutex_owned(&so->so_lock)); 2806 if (so->so_pgrp != 0) { 2807 int error; 2808 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2809 S_RDBAND | S_BANDURG : /* New sigs */ 2810 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2811 2812 so_lock_single(so); 2813 mutex_exit(&so->so_lock); 2814 2815 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2816 2817 mutex_enter(&so->so_lock); 2818 so_unlock_single(so, SOLOCKED); 2819 if (error) 2820 return (error); 2821 } 2822 so->so_state ^= SS_ASYNC; 2823 return (0); 2824 } 2825 2826 /* 2827 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2828 * any existing one. If passed zero, just clear the existing one. 2829 */ 2830 int 2831 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2832 { 2833 int events = so->so_state & SS_ASYNC ? 2834 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2835 S_RDBAND | S_BANDURG; 2836 int error; 2837 2838 ASSERT(mutex_owned(&so->so_lock)); 2839 2840 /* 2841 * Change socket process (group). 2842 * 2843 * strioctl (via so_set_asyncsigs) will perform permission check and 2844 * also keep a PID_HOLD to prevent the pid from being reused. 2845 */ 2846 so_lock_single(so); 2847 mutex_exit(&so->so_lock); 2848 2849 if (pgrp != 0) { 2850 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2851 pgrp, events)); 2852 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2853 if (error != 0) { 2854 eprintsoline(so, error); 2855 goto bad; 2856 } 2857 } 2858 /* Remove the previously registered process/group */ 2859 if (so->so_pgrp != 0) { 2860 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2861 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2862 if (error != 0) { 2863 eprintsoline(so, error); 2864 error = 0; 2865 } 2866 } 2867 mutex_enter(&so->so_lock); 2868 so_unlock_single(so, SOLOCKED); 2869 so->so_pgrp = pgrp; 2870 return (0); 2871 bad: 2872 mutex_enter(&so->so_lock); 2873 so_unlock_single(so, SOLOCKED); 2874 return (error); 2875 } 2876 2877 2878 2879 /* 2880 * Translate a TLI(/XTI) error into a system error as best we can. 2881 */ 2882 static const int tli_errs[] = { 2883 0, /* no error */ 2884 EADDRNOTAVAIL, /* TBADADDR */ 2885 ENOPROTOOPT, /* TBADOPT */ 2886 EACCES, /* TACCES */ 2887 EBADF, /* TBADF */ 2888 EADDRNOTAVAIL, /* TNOADDR */ 2889 EPROTO, /* TOUTSTATE */ 2890 ECONNABORTED, /* TBADSEQ */ 2891 0, /* TSYSERR - will never get */ 2892 EPROTO, /* TLOOK - should never be sent by transport */ 2893 EMSGSIZE, /* TBADDATA */ 2894 EMSGSIZE, /* TBUFOVFLW */ 2895 EPROTO, /* TFLOW */ 2896 EWOULDBLOCK, /* TNODATA */ 2897 EPROTO, /* TNODIS */ 2898 EPROTO, /* TNOUDERR */ 2899 EINVAL, /* TBADFLAG */ 2900 EPROTO, /* TNOREL */ 2901 EOPNOTSUPP, /* TNOTSUPPORT */ 2902 EPROTO, /* TSTATECHNG */ 2903 /* following represent error namespace expansion with XTI */ 2904 EPROTO, /* TNOSTRUCTYPE - never sent by transport */ 2905 EPROTO, /* TBADNAME - never sent by transport */ 2906 EPROTO, /* TBADQLEN - never sent by transport */ 2907 EADDRINUSE, /* TADDRBUSY */ 2908 EBADF, /* TINDOUT */ 2909 EBADF, /* TPROVMISMATCH */ 2910 EBADF, /* TRESQLEN */ 2911 EBADF, /* TRESADDR */ 2912 EPROTO, /* TQFULL - never sent by transport */ 2913 EPROTO, /* TPROTO */ 2914 }; 2915 2916 static int 2917 tlitosyserr(int terr) 2918 { 2919 ASSERT(terr != TSYSERR); 2920 if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0]))) 2921 return (EPROTO); 2922 else 2923 return (tli_errs[terr]); 2924 } 2925 2926 /* 2927 * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable 2928 * transport driver/module with an mblk_t chain. 2929 * 2930 * Note, we in-line putq() for the fast-path cases of q is empty, q_last and 2931 * bp are of type M_DATA. All other cases we call putq(). 2932 * 2933 * On success a zero will be return, else an errno will be returned. 2934 */ 2935 int 2936 sodput(sodirect_t *sodp, mblk_t *bp) 2937 { 2938 queue_t *q = sodp->sod_q; 2939 struct stdata *stp = (struct stdata *)q->q_ptr; 2940 mblk_t *nbp; 2941 int ret; 2942 mblk_t *last = q->q_last; 2943 int bytecnt = 0; 2944 int mblkcnt = 0; 2945 2946 2947 ASSERT(MUTEX_HELD(sodp->sod_lock)); 2948 2949 if (stp->sd_flag == STREOF) { 2950 ret = 0; 2951 goto error; 2952 } 2953 2954 if (q->q_first == NULL) { 2955 /* Q empty, really fast fast-path */ 2956 bp->b_prev = NULL; 2957 bp->b_next = NULL; 2958 q->q_first = bp; 2959 q->q_last = bp; 2960 2961 } else if (last->b_datap->db_type == M_DATA && 2962 bp->b_datap->db_type == M_DATA) { 2963 /* 2964 * Last mblk_t chain and bp are both type M_DATA so 2965 * in-line putq() here, if the DBLK_UIOA state match 2966 * add bp to the end of the current last chain, else 2967 * start a new last chain with bp. 2968 */ 2969 if ((last->b_datap->db_flags & DBLK_UIOA) == 2970 (bp->b_datap->db_flags & DBLK_UIOA)) { 2971 /* Added to end */ 2972 while ((nbp = last->b_cont) != NULL) 2973 last = nbp; 2974 last->b_cont = bp; 2975 } else { 2976 /* New last */ 2977 last->b_next = bp; 2978 bp->b_next = NULL; 2979 bp->b_prev = last; 2980 q->q_last = bp; 2981 } 2982 } else { 2983 /* 2984 * Can't use q_last so just call putq(). 2985 */ 2986 (void) putq(q, bp); 2987 return (0); 2988 } 2989 2990 /* Count bytes and mblk_t's */ 2991 do { 2992 bytecnt += MBLKL(bp); 2993 mblkcnt++; 2994 } while ((bp = bp->b_cont) != NULL); 2995 q->q_count += bytecnt; 2996 q->q_mblkcnt += mblkcnt; 2997 2998 /* Check for QFULL */ 2999 if (q->q_count >= q->q_hiwat + sodp->sod_want || 3000 q->q_mblkcnt >= q->q_hiwat) { 3001 q->q_flag |= QFULL; 3002 } 3003 3004 return (0); 3005 3006 error: 3007 do { 3008 if ((nbp = bp->b_next) != NULL) 3009 bp->b_next = NULL; 3010 freemsg(bp); 3011 } while ((bp = nbp) != NULL); 3012 3013 return (ret); 3014 } 3015 3016 /* 3017 * Sockfs sodirect read wakeup. Called from a sodirect enabled transport 3018 * driver/module to indicate that read-side data is available. 3019 * 3020 * On return the sodirect_t.lock mutex will be exited so this must be the 3021 * last sodirect_t call to guarantee atomic access of *sodp. 3022 */ 3023 void 3024 sodwakeup(sodirect_t *sodp) 3025 { 3026 queue_t *q = sodp->sod_q; 3027 struct stdata *stp = (struct stdata *)q->q_ptr; 3028 3029 ASSERT(MUTEX_HELD(sodp->sod_lock)); 3030 3031 if (stp->sd_flag & RSLEEP) { 3032 stp->sd_flag &= ~RSLEEP; 3033 cv_broadcast(&q->q_wait); 3034 } 3035 3036 if (stp->sd_rput_opt & SR_POLLIN) { 3037 stp->sd_rput_opt &= ~SR_POLLIN; 3038 mutex_exit(sodp->sod_lock); 3039 pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM); 3040 } else 3041 mutex_exit(sodp->sod_lock); 3042 } 3043