1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/inttypes.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/buf.h> 33 #include <sys/conf.h> 34 #include <sys/cred.h> 35 #include <sys/kmem.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/user.h> 44 #include <sys/stream.h> 45 #include <sys/strsubr.h> 46 #include <sys/esunddi.h> 47 #include <sys/flock.h> 48 #include <sys/modctl.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/cmn_err.h> 52 #include <sys/proc.h> 53 #include <sys/ddi.h> 54 #include <sys/kmem_impl.h> 55 56 #include <sys/suntpi.h> 57 #include <sys/socket.h> 58 #include <sys/sockio.h> 59 #include <sys/socketvar.h> 60 #include <netinet/in.h> 61 62 #include <sys/tiuser.h> 63 #define _SUN_TPI_VERSION 2 64 #include <sys/tihdr.h> 65 66 #include <inet/kssl/ksslapi.h> 67 68 #include <c2/audit.h> 69 70 #include <sys/dcopy.h> 71 72 int so_default_version = SOV_SOCKSTREAM; 73 74 #ifdef DEBUG 75 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 76 int sockdebug = 0; 77 78 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 79 int sockprinterr = 0; 80 81 /* 82 * Set so_default_options to SO_DEBUG is all sockets should be created 83 * with SO_DEBUG set. This is needed to get debug printouts from the 84 * socket() call itself. 85 */ 86 int so_default_options = 0; 87 #endif /* DEBUG */ 88 89 #ifdef SOCK_TEST 90 /* 91 * Set to number of ticks to limit cv_waits for code coverage testing. 92 * Set to 1000 when SO_DEBUG is set to 2. 93 */ 94 clock_t sock_test_timelimit = 0; 95 #endif /* SOCK_TEST */ 96 97 /* 98 * For concurrency testing of e.g. opening /dev/ip which does not 99 * handle T_INFO_REQ messages. 100 */ 101 int so_no_tinfo = 0; 102 103 /* 104 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 105 * to simply ignore the T_CAPABILITY_REQ. 106 */ 107 clock_t sock_capability_timeout = 2; /* seconds */ 108 109 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 110 static void so_removehooks(struct sonode *so); 111 112 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 113 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 114 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 115 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 116 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 117 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 118 119 static int tlitosyserr(int terr); 120 121 /* 122 * Sodirect kmem_cache and put/wakeup functions. 123 */ 124 struct kmem_cache *socktpi_sod_cache; 125 static int sodput(sodirect_t *, mblk_t *); 126 static void sodwakeup(sodirect_t *); 127 128 /* 129 * Called by sockinit() when sockfs is loaded. 130 */ 131 int 132 sostr_init() 133 { 134 /* Allocate sodirect_t kmem_cache */ 135 socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache", 136 sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 137 138 return (0); 139 } 140 141 /* 142 * Convert a socket to a stream. Invoked when the illusory sockmod 143 * is popped from the stream. 144 * Change the stream head back to default operation without losing 145 * any messages (T_conn_ind's are moved to the stream head queue). 146 */ 147 int 148 so_sock2stream(struct sonode *so) 149 { 150 struct vnode *vp = SOTOV(so); 151 queue_t *rq; 152 mblk_t *mp; 153 int error = 0; 154 155 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 156 157 mutex_enter(&so->so_lock); 158 so_lock_single(so); 159 160 ASSERT(so->so_version != SOV_STREAM); 161 162 if (so->so_state & SS_DIRECT) { 163 mblk_t **mpp; 164 int rval; 165 166 /* 167 * Tell the transport below that sockmod is being popped 168 */ 169 mutex_exit(&so->so_lock); 170 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(), 171 &rval); 172 mutex_enter(&so->so_lock); 173 if (error != 0) { 174 dprintso(so, 0, ("so_sock2stream(%p): " 175 "_SIOCSOCKFALLBACK failed\n", (void *)so)); 176 goto exit; 177 } 178 so->so_state &= ~SS_DIRECT; 179 180 for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL; 181 mpp = &mp->b_next) { 182 struct T_conn_ind *conn_ind; 183 184 /* 185 * strsock_proto() has already verified the length of 186 * this message block. 187 */ 188 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 189 190 conn_ind = (struct T_conn_ind *)mp->b_rptr; 191 if (conn_ind->OPT_length == 0 && 192 conn_ind->OPT_offset == 0) 193 continue; 194 195 if (DB_REF(mp) > 1) { 196 mblk_t *newmp; 197 size_t length; 198 cred_t *cr; 199 200 /* 201 * Copy the message block because it is used 202 * elsewhere, too. 203 */ 204 length = MBLKL(mp); 205 newmp = soallocproto(length, _ALLOC_INTR); 206 if (newmp == NULL) { 207 error = EINTR; 208 goto exit; 209 } 210 bcopy(mp->b_rptr, newmp->b_wptr, length); 211 newmp->b_wptr += length; 212 newmp->b_next = mp->b_next; 213 cr = DB_CRED(mp); 214 if (cr != NULL) 215 mblk_setcred(newmp, cr); 216 DB_CPID(newmp) = DB_CPID(mp); 217 218 /* 219 * Link the new message block into the queue 220 * and free the old one. 221 */ 222 *mpp = newmp; 223 mp->b_next = NULL; 224 freemsg(mp); 225 226 mp = newmp; 227 conn_ind = (struct T_conn_ind *)mp->b_rptr; 228 } 229 230 /* 231 * Remove options added by TCP for accept fast-path. 232 */ 233 conn_ind->OPT_length = 0; 234 conn_ind->OPT_offset = 0; 235 } 236 } 237 238 so->so_version = SOV_STREAM; 239 so->so_priv = NULL; 240 241 /* 242 * Remove the hooks in the stream head to avoid queuing more 243 * packets in sockfs. 244 */ 245 mutex_exit(&so->so_lock); 246 so_removehooks(so); 247 mutex_enter(&so->so_lock); 248 249 /* 250 * Clear any state related to urgent data. Leave any T_EXDATA_IND 251 * on the queue - the behavior of urgent data after a switch is 252 * left undefined. 253 */ 254 so->so_error = so->so_delayed_error = 0; 255 freemsg(so->so_oobmsg); 256 so->so_oobmsg = NULL; 257 so->so_oobsigcnt = so->so_oobcnt = 0; 258 259 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 260 SS_HASCONNIND|SS_SAVEDEOR); 261 ASSERT(so_verify_oobstate(so)); 262 263 freemsg(so->so_ack_mp); 264 so->so_ack_mp = NULL; 265 266 /* 267 * Flush the T_DISCON_IND on so_discon_ind_mp. 268 */ 269 so_flush_discon_ind(so); 270 271 /* 272 * Move any queued T_CONN_IND messages to stream head queue. 273 */ 274 rq = RD(strvp2wq(vp)); 275 while ((mp = so->so_conn_ind_head) != NULL) { 276 so->so_conn_ind_head = mp->b_next; 277 mp->b_next = NULL; 278 if (so->so_conn_ind_head == NULL) { 279 ASSERT(so->so_conn_ind_tail == mp); 280 so->so_conn_ind_tail = NULL; 281 } 282 dprintso(so, 0, 283 ("so_sock2stream(%p): moving T_CONN_IND\n", 284 (void *)so)); 285 286 /* Drop lock across put() */ 287 mutex_exit(&so->so_lock); 288 put(rq, mp); 289 mutex_enter(&so->so_lock); 290 } 291 292 exit: 293 ASSERT(MUTEX_HELD(&so->so_lock)); 294 so_unlock_single(so, SOLOCKED); 295 mutex_exit(&so->so_lock); 296 return (error); 297 } 298 299 /* 300 * Covert a stream back to a socket. This is invoked when the illusory 301 * sockmod is pushed on a stream (where the stream was "created" by 302 * popping the illusory sockmod). 303 * This routine can not recreate the socket state (certain aspects of 304 * it like urgent data state and the bound/connected addresses for AF_UNIX 305 * sockets can not be recreated by asking the transport for information). 306 * Thus this routine implicitly assumes that the socket is in an initial 307 * state (as if it was just created). It flushes any messages queued on the 308 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 309 */ 310 void 311 so_stream2sock(struct sonode *so) 312 { 313 struct vnode *vp = SOTOV(so); 314 315 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 316 317 mutex_enter(&so->so_lock); 318 so_lock_single(so); 319 ASSERT(so->so_version == SOV_STREAM); 320 so->so_version = SOV_SOCKSTREAM; 321 so->so_pushcnt = 0; 322 mutex_exit(&so->so_lock); 323 324 /* 325 * Set a permenent error to force any thread in sorecvmsg to 326 * return (and drop SOREADLOCKED). Clear the error once 327 * we have SOREADLOCKED. 328 * This makes a read sleeping during the I_PUSH of sockmod return 329 * EIO. 330 */ 331 strsetrerror(SOTOV(so), EIO, 1, NULL); 332 333 /* 334 * Get the read lock before flushing data to avoid 335 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 336 */ 337 mutex_enter(&so->so_lock); 338 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 339 mutex_exit(&so->so_lock); 340 341 strsetrerror(SOTOV(so), 0, 0, NULL); 342 so_installhooks(so); 343 344 /* 345 * Flush everything on the read queue. 346 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 347 * remain; those types of messages would confuse sockfs. 348 */ 349 strflushrq(vp, FLUSHALL); 350 mutex_enter(&so->so_lock); 351 352 /* 353 * Flush the T_DISCON_IND on so_discon_ind_mp. 354 */ 355 so_flush_discon_ind(so); 356 so_unlock_read(so); /* Clear SOREADLOCKED */ 357 358 so_unlock_single(so, SOLOCKED); 359 mutex_exit(&so->so_lock); 360 } 361 362 /* 363 * Install the hooks in the stream head. 364 */ 365 void 366 so_installhooks(struct sonode *so) 367 { 368 struct vnode *vp = SOTOV(so); 369 370 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 371 strsock_proto, strsock_misc); 372 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 373 } 374 375 /* 376 * Remove the hooks in the stream head. 377 */ 378 static void 379 so_removehooks(struct sonode *so) 380 { 381 struct vnode *vp = SOTOV(so); 382 383 strsetrputhooks(vp, 0, NULL, NULL); 384 strsetwputhooks(vp, 0, STRTIMOUT); 385 /* 386 * Leave read behavior as it would have been for a normal 387 * stream i.e. a read of an M_PROTO will fail. 388 */ 389 } 390 391 /* 392 * Initialize the streams side of a socket including 393 * T_info_req/ack processing. If tso is not NULL its values are used thereby 394 * avoiding the T_INFO_REQ. 395 */ 396 int 397 so_strinit(struct sonode *so, struct sonode *tso) 398 { 399 struct vnode *vp = SOTOV(so); 400 struct stdata *stp; 401 mblk_t *mp; 402 int error; 403 404 dprintso(so, 1, ("so_strinit(%p)\n", (void *)so)); 405 406 /* Preallocate an unbind_req message */ 407 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 408 mutex_enter(&so->so_lock); 409 so->so_unbind_mp = mp; 410 #ifdef DEBUG 411 so->so_options = so_default_options; 412 #endif /* DEBUG */ 413 mutex_exit(&so->so_lock); 414 415 so_installhooks(so); 416 417 /* 418 * The T_CAPABILITY_REQ should be the first message sent down because 419 * at least TCP has a fast-path for this which avoids timeouts while 420 * waiting for the T_CAPABILITY_ACK under high system load. 421 */ 422 if (tso == NULL) { 423 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 424 if (error) 425 return (error); 426 } else { 427 mutex_enter(&so->so_lock); 428 so->so_tsdu_size = tso->so_tsdu_size; 429 so->so_etsdu_size = tso->so_etsdu_size; 430 so->so_addr_size = tso->so_addr_size; 431 so->so_opt_size = tso->so_opt_size; 432 so->so_tidu_size = tso->so_tidu_size; 433 so->so_serv_type = tso->so_serv_type; 434 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 435 mutex_exit(&so->so_lock); 436 437 /* the following do_tcapability may update so->so_mode */ 438 if ((tso->so_serv_type != T_CLTS) && 439 !(tso->so_state & SS_DIRECT)) { 440 error = do_tcapability(so, TC1_ACCEPTOR_ID); 441 if (error) 442 return (error); 443 } 444 } 445 /* 446 * If the addr_size is 0 we treat it as already bound 447 * and connected. This is used by the routing socket. 448 * We set the addr_size to something to allocate a the address 449 * structures. 450 */ 451 if (so->so_addr_size == 0) { 452 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 453 /* Address size can vary with address families. */ 454 if (so->so_family == AF_INET6) 455 so->so_addr_size = 456 (t_scalar_t)sizeof (struct sockaddr_in6); 457 else 458 so->so_addr_size = 459 (t_scalar_t)sizeof (struct sockaddr_in); 460 ASSERT(so->so_unbind_mp); 461 } 462 /* 463 * Allocate the addresses. 464 */ 465 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 466 ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0); 467 so->so_laddr_maxlen = so->so_faddr_maxlen = 468 P2ROUNDUP(so->so_addr_size, KMEM_ALIGN); 469 so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP); 470 so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa 471 + so->so_laddr_maxlen); 472 473 if (so->so_family == AF_UNIX) { 474 /* 475 * Initialize AF_UNIX related fields. 476 */ 477 bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr)); 478 bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr)); 479 } 480 481 stp = vp->v_stream; 482 /* 483 * Have to keep minpsz at zero in order to allow write/send of zero 484 * bytes. 485 */ 486 mutex_enter(&stp->sd_lock); 487 if (stp->sd_qn_minpsz == 1) 488 stp->sd_qn_minpsz = 0; 489 mutex_exit(&stp->sd_lock); 490 491 /* 492 * If sodirect capable allocate and initialize sodirect_t. 493 * Note, SS_SODIRECT is set in socktpi_open(). 494 */ 495 if (so->so_state & SS_SODIRECT) { 496 sodirect_t *sodp; 497 498 ASSERT(so->so_direct == NULL); 499 500 sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP); 501 sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT; 502 sodp->sod_want = 0; 503 sodp->sod_q = RD(stp->sd_wrq); 504 sodp->sod_enqueue = sodput; 505 sodp->sod_wakeup = sodwakeup; 506 sodp->sod_uioafh = NULL; 507 sodp->sod_uioaft = NULL; 508 sodp->sod_lockp = &stp->sd_lock; 509 /* 510 * Remainder of the sod_uioa members are left uninitialized 511 * but will be initialized later by uioainit() before uioa 512 * is enabled. 513 */ 514 sodp->sod_uioa.uioa_state = UIOA_ALLOC; 515 so->so_direct = sodp; 516 stp->sd_sodirect = sodp; 517 } 518 519 return (0); 520 } 521 522 static void 523 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 524 { 525 so->so_tsdu_size = tia->TSDU_size; 526 so->so_etsdu_size = tia->ETSDU_size; 527 so->so_addr_size = tia->ADDR_size; 528 so->so_opt_size = tia->OPT_size; 529 so->so_tidu_size = tia->TIDU_size; 530 so->so_serv_type = tia->SERV_type; 531 switch (tia->CURRENT_state) { 532 case TS_UNBND: 533 break; 534 case TS_IDLE: 535 so->so_state |= SS_ISBOUND; 536 so->so_laddr_len = 0; 537 so->so_state &= ~SS_LADDR_VALID; 538 break; 539 case TS_DATA_XFER: 540 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 541 so->so_laddr_len = 0; 542 so->so_faddr_len = 0; 543 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 544 break; 545 } 546 547 /* 548 * Heuristics for determining the socket mode flags 549 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 550 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 551 * from the info ack. 552 */ 553 if (so->so_serv_type == T_CLTS) { 554 so->so_mode |= SM_ATOMIC | SM_ADDR; 555 } else { 556 so->so_mode |= SM_CONNREQUIRED; 557 if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2) 558 so->so_mode |= SM_EXDATA; 559 } 560 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 561 /* Semantics are to discard tail end of messages */ 562 so->so_mode |= SM_ATOMIC; 563 } 564 if (so->so_family == AF_UNIX) { 565 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 566 if (so->so_addr_size == -1) { 567 /* MAXPATHLEN + soun_family + nul termination */ 568 so->so_addr_size = (t_scalar_t)(MAXPATHLEN + 569 sizeof (short) + 1); 570 } 571 if (so->so_type == SOCK_STREAM) { 572 /* 573 * Make it into a byte-stream transport. 574 * SOCK_SEQPACKET sockets are unchanged. 575 */ 576 so->so_tsdu_size = 0; 577 } 578 } else if (so->so_addr_size == -1) { 579 /* 580 * Logic extracted from sockmod - have to pick some max address 581 * length in order to preallocate the addresses. 582 */ 583 so->so_addr_size = SOA_DEFSIZE; 584 } 585 if (so->so_tsdu_size == 0) 586 so->so_mode |= SM_BYTESTREAM; 587 } 588 589 static int 590 check_tinfo(struct sonode *so) 591 { 592 /* Consistency checks */ 593 if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) { 594 eprintso(so, ("service type and socket type mismatch\n")); 595 eprintsoline(so, EPROTO); 596 return (EPROTO); 597 } 598 if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) { 599 eprintso(so, ("service type and socket type mismatch\n")); 600 eprintsoline(so, EPROTO); 601 return (EPROTO); 602 } 603 if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) { 604 eprintso(so, ("service type and socket type mismatch\n")); 605 eprintsoline(so, EPROTO); 606 return (EPROTO); 607 } 608 if (so->so_family == AF_INET && 609 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 610 eprintso(so, 611 ("AF_INET must have sockaddr_in address length. Got %d\n", 612 so->so_addr_size)); 613 eprintsoline(so, EMSGSIZE); 614 return (EMSGSIZE); 615 } 616 if (so->so_family == AF_INET6 && 617 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 618 eprintso(so, 619 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 620 so->so_addr_size)); 621 eprintsoline(so, EMSGSIZE); 622 return (EMSGSIZE); 623 } 624 625 dprintso(so, 1, ( 626 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 627 so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size, 628 so->so_addr_size, so->so_opt_size, 629 so->so_tidu_size)); 630 dprintso(so, 1, ("tinfo: so_state %s\n", 631 pr_state(so->so_state, so->so_mode))); 632 return (0); 633 } 634 635 /* 636 * Send down T_info_req and wait for the ack. 637 * Record interesting T_info_ack values in the sonode. 638 */ 639 static int 640 do_tinfo(struct sonode *so) 641 { 642 struct T_info_req tir; 643 mblk_t *mp; 644 int error; 645 646 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 647 648 if (so_no_tinfo) { 649 so->so_addr_size = 0; 650 return (0); 651 } 652 653 dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so)); 654 655 /* Send T_INFO_REQ */ 656 tir.PRIM_type = T_INFO_REQ; 657 mp = soallocproto1(&tir, sizeof (tir), 658 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 659 _ALLOC_INTR); 660 if (mp == NULL) { 661 eprintsoline(so, ENOBUFS); 662 return (ENOBUFS); 663 } 664 /* T_INFO_REQ has to be M_PCPROTO */ 665 DB_TYPE(mp) = M_PCPROTO; 666 667 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 668 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 669 if (error) { 670 eprintsoline(so, error); 671 return (error); 672 } 673 mutex_enter(&so->so_lock); 674 /* Wait for T_INFO_ACK */ 675 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 676 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 677 mutex_exit(&so->so_lock); 678 eprintsoline(so, error); 679 return (error); 680 } 681 682 ASSERT(mp); 683 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 684 mutex_exit(&so->so_lock); 685 freemsg(mp); 686 return (check_tinfo(so)); 687 } 688 689 /* 690 * Send down T_capability_req and wait for the ack. 691 * Record interesting T_capability_ack values in the sonode. 692 */ 693 static int 694 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 695 { 696 struct T_capability_req tcr; 697 struct T_capability_ack *tca; 698 mblk_t *mp; 699 int error; 700 701 ASSERT(cap_bits1 != 0); 702 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 703 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 704 705 if (so->so_provinfo->tpi_capability == PI_NO) 706 return (do_tinfo(so)); 707 708 if (so_no_tinfo) { 709 so->so_addr_size = 0; 710 if ((cap_bits1 &= ~TC1_INFO) == 0) 711 return (0); 712 } 713 714 dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so)); 715 716 /* Send T_CAPABILITY_REQ */ 717 tcr.PRIM_type = T_CAPABILITY_REQ; 718 tcr.CAP_bits1 = cap_bits1; 719 mp = soallocproto1(&tcr, sizeof (tcr), 720 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 721 _ALLOC_INTR); 722 if (mp == NULL) { 723 eprintsoline(so, ENOBUFS); 724 return (ENOBUFS); 725 } 726 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 727 DB_TYPE(mp) = M_PCPROTO; 728 729 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 730 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 731 if (error) { 732 eprintsoline(so, error); 733 return (error); 734 } 735 mutex_enter(&so->so_lock); 736 /* Wait for T_CAPABILITY_ACK */ 737 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 738 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 739 mutex_exit(&so->so_lock); 740 PI_PROVLOCK(so->so_provinfo); 741 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) 742 so->so_provinfo->tpi_capability = PI_NO; 743 PI_PROVUNLOCK(so->so_provinfo); 744 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 745 if (cap_bits1 & TC1_INFO) { 746 /* 747 * If the T_CAPABILITY_REQ timed out and then a 748 * T_INFO_REQ gets a protocol error, most likely 749 * the capability was slow (vs. unsupported). Return 750 * ENOSR for this case as a best guess. 751 */ 752 if (error == ETIME) { 753 return ((error = do_tinfo(so)) == EPROTO ? 754 ENOSR : error); 755 } 756 return (do_tinfo(so)); 757 } 758 return (0); 759 } 760 761 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) { 762 PI_PROVLOCK(so->so_provinfo); 763 so->so_provinfo->tpi_capability = PI_YES; 764 PI_PROVUNLOCK(so->so_provinfo); 765 } 766 767 ASSERT(mp); 768 tca = (struct T_capability_ack *)mp->b_rptr; 769 770 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 771 772 cap_bits1 = tca->CAP_bits1; 773 774 if (cap_bits1 & TC1_ACCEPTOR_ID) { 775 so->so_acceptor_id = tca->ACCEPTOR_id; 776 so->so_mode |= SM_ACCEPTOR_ID; 777 } 778 779 if (cap_bits1 & TC1_INFO) 780 copy_tinfo(so, &tca->INFO_ack); 781 782 mutex_exit(&so->so_lock); 783 freemsg(mp); 784 785 if (cap_bits1 & TC1_INFO) 786 return (check_tinfo(so)); 787 788 return (0); 789 } 790 791 /* 792 * Retrieve and clear the socket error. 793 */ 794 int 795 sogeterr(struct sonode *so) 796 { 797 int error; 798 799 ASSERT(MUTEX_HELD(&so->so_lock)); 800 801 error = so->so_error; 802 so->so_error = 0; 803 804 return (error); 805 } 806 807 /* 808 * This routine is registered with the stream head to retrieve read 809 * side errors. 810 * It does not clear the socket error for a peeking read side operation. 811 * It the error is to be cleared it sets *clearerr. 812 */ 813 int 814 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 815 { 816 struct sonode *so = VTOSO(vp); 817 int error; 818 819 mutex_enter(&so->so_lock); 820 if (ispeek) { 821 error = so->so_error; 822 *clearerr = 0; 823 } else { 824 error = so->so_error; 825 so->so_error = 0; 826 *clearerr = 1; 827 } 828 mutex_exit(&so->so_lock); 829 return (error); 830 } 831 832 /* 833 * This routine is registered with the stream head to retrieve write 834 * side errors. 835 * It does not clear the socket error for a peeking read side operation. 836 * It the error is to be cleared it sets *clearerr. 837 */ 838 int 839 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 840 { 841 struct sonode *so = VTOSO(vp); 842 int error; 843 844 mutex_enter(&so->so_lock); 845 if (so->so_state & SS_CANTSENDMORE) { 846 error = EPIPE; 847 *clearerr = 0; 848 } else { 849 error = so->so_error; 850 if (ispeek) { 851 *clearerr = 0; 852 } else { 853 so->so_error = 0; 854 *clearerr = 1; 855 } 856 } 857 mutex_exit(&so->so_lock); 858 return (error); 859 } 860 861 /* 862 * Set a nonpersistent read and write error on the socket. 863 * Used when there is a T_uderror_ind for a connected socket. 864 * The caller also needs to call strsetrerror and strsetwerror 865 * after dropping the lock. 866 */ 867 void 868 soseterror(struct sonode *so, int error) 869 { 870 ASSERT(error != 0); 871 872 ASSERT(MUTEX_HELD(&so->so_lock)); 873 so->so_error = (ushort_t)error; 874 } 875 876 void 877 soisconnecting(struct sonode *so) 878 { 879 ASSERT(MUTEX_HELD(&so->so_lock)); 880 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 881 so->so_state |= SS_ISCONNECTING; 882 cv_broadcast(&so->so_state_cv); 883 } 884 885 void 886 soisconnected(struct sonode *so) 887 { 888 ASSERT(MUTEX_HELD(&so->so_lock)); 889 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 890 so->so_state |= SS_ISCONNECTED; 891 cv_broadcast(&so->so_state_cv); 892 } 893 894 /* 895 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 896 */ 897 void 898 soisdisconnected(struct sonode *so, int error) 899 { 900 ASSERT(MUTEX_HELD(&so->so_lock)); 901 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING| 902 SS_LADDR_VALID|SS_FADDR_VALID); 903 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 904 so->so_error = (ushort_t)error; 905 if (so->so_peercred != NULL) { 906 crfree(so->so_peercred); 907 so->so_peercred = NULL; 908 } 909 cv_broadcast(&so->so_state_cv); 910 } 911 912 /* 913 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 914 * Does not affect write side. 915 * The caller also has to call strsetrerror. 916 */ 917 static void 918 sobreakconn(struct sonode *so, int error) 919 { 920 ASSERT(MUTEX_HELD(&so->so_lock)); 921 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 922 so->so_error = (ushort_t)error; 923 cv_broadcast(&so->so_state_cv); 924 } 925 926 /* 927 * Can no longer send. 928 * Caller must also call strsetwerror. 929 * 930 * We mark the peer address as no longer valid for getpeername, but 931 * leave it around for so_unix_close to notify the peer (that 932 * transport has no addressing held at that layer). 933 */ 934 void 935 socantsendmore(struct sonode *so) 936 { 937 ASSERT(MUTEX_HELD(&so->so_lock)); 938 so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE; 939 cv_broadcast(&so->so_state_cv); 940 } 941 942 /* 943 * The caller must call strseteof(,1) as well as this routine 944 * to change the socket state. 945 */ 946 void 947 socantrcvmore(struct sonode *so) 948 { 949 ASSERT(MUTEX_HELD(&so->so_lock)); 950 so->so_state |= SS_CANTRCVMORE; 951 cv_broadcast(&so->so_state_cv); 952 } 953 954 /* 955 * The caller has sent down a "request_prim" primitive and wants to wait for 956 * an ack ("ack_prim") or an T_ERROR_ACK for it. 957 * The specified "ack_prim" can be a T_OK_ACK. 958 * 959 * Assumes that all the TPI acks are M_PCPROTO messages. 960 * 961 * Note that the socket is single-threaded (using so_lock_single) 962 * for all operations that generate TPI ack messages. Since 963 * only TPI ack messages are M_PCPROTO we should never receive 964 * anything except either the ack we are expecting or a T_ERROR_ACK 965 * for the same primitive. 966 */ 967 int 968 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 969 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 970 { 971 mblk_t *mp; 972 union T_primitives *tpr; 973 int error; 974 975 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 976 (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait)); 977 978 ASSERT(MUTEX_HELD(&so->so_lock)); 979 980 error = sowaitack(so, &mp, wait); 981 if (error) 982 return (error); 983 984 dprintso(so, 1, ("got msg %p\n", (void *)mp)); 985 if (DB_TYPE(mp) != M_PCPROTO || 986 MBLKL(mp) < sizeof (tpr->type)) { 987 freemsg(mp); 988 eprintsoline(so, EPROTO); 989 return (EPROTO); 990 } 991 tpr = (union T_primitives *)mp->b_rptr; 992 /* 993 * Did we get the primitive that we were asking for? 994 * For T_OK_ACK we also check that it matches the request primitive. 995 */ 996 if (tpr->type == ack_prim && 997 (ack_prim != T_OK_ACK || 998 tpr->ok_ack.CORRECT_prim == request_prim)) { 999 if (MBLKL(mp) >= (ssize_t)min_size) { 1000 /* Found what we are looking for */ 1001 *mpp = mp; 1002 return (0); 1003 } 1004 /* Too short */ 1005 freemsg(mp); 1006 eprintsoline(so, EPROTO); 1007 return (EPROTO); 1008 } 1009 1010 if (tpr->type == T_ERROR_ACK && 1011 tpr->error_ack.ERROR_prim == request_prim) { 1012 /* Error to the primitive we were looking for */ 1013 if (tpr->error_ack.TLI_error == TSYSERR) { 1014 error = tpr->error_ack.UNIX_error; 1015 } else { 1016 error = tlitosyserr(tpr->error_ack.TLI_error); 1017 } 1018 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 1019 tpr->error_ack.ERROR_prim, 1020 tpr->error_ack.TLI_error, 1021 tpr->error_ack.UNIX_error, 1022 error)); 1023 freemsg(mp); 1024 return (error); 1025 } 1026 /* 1027 * Wrong primitive or T_ERROR_ACK for the wrong primitive 1028 */ 1029 #ifdef DEBUG 1030 if (tpr->type == T_ERROR_ACK) { 1031 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 1032 tpr->error_ack.ERROR_prim, 1033 tpr->error_ack.TLI_error, 1034 tpr->error_ack.UNIX_error)); 1035 } else if (tpr->type == T_OK_ACK) { 1036 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 1037 tpr->ok_ack.CORRECT_prim, 1038 ack_prim, request_prim)); 1039 } else { 1040 dprintso(so, 0, 1041 ("unexpected primitive %d, expected %d for %d\n", 1042 tpr->type, ack_prim, request_prim)); 1043 } 1044 #endif /* DEBUG */ 1045 1046 freemsg(mp); 1047 eprintsoline(so, EPROTO); 1048 return (EPROTO); 1049 } 1050 1051 /* 1052 * Wait for a T_OK_ACK for the specified primitive. 1053 */ 1054 int 1055 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1056 { 1057 mblk_t *mp; 1058 int error; 1059 1060 error = sowaitprim(so, request_prim, T_OK_ACK, 1061 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1062 if (error) 1063 return (error); 1064 freemsg(mp); 1065 return (0); 1066 } 1067 1068 /* 1069 * Queue a received TPI ack message on so_ack_mp. 1070 */ 1071 void 1072 soqueueack(struct sonode *so, mblk_t *mp) 1073 { 1074 if (DB_TYPE(mp) != M_PCPROTO) { 1075 zcmn_err(getzoneid(), CE_WARN, 1076 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1077 *(t_scalar_t *)mp->b_rptr); 1078 freemsg(mp); 1079 return; 1080 } 1081 1082 mutex_enter(&so->so_lock); 1083 if (so->so_ack_mp != NULL) { 1084 dprintso(so, 1, ("so_ack_mp already set\n")); 1085 freemsg(so->so_ack_mp); 1086 so->so_ack_mp = NULL; 1087 } 1088 so->so_ack_mp = mp; 1089 cv_broadcast(&so->so_ack_cv); 1090 mutex_exit(&so->so_lock); 1091 } 1092 1093 /* 1094 * Wait for a TPI ack ignoring signals and errors. 1095 */ 1096 int 1097 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1098 { 1099 ASSERT(MUTEX_HELD(&so->so_lock)); 1100 1101 while (so->so_ack_mp == NULL) { 1102 #ifdef SOCK_TEST 1103 if (wait == 0 && sock_test_timelimit != 0) 1104 wait = sock_test_timelimit; 1105 #endif 1106 if (wait != 0) { 1107 /* 1108 * Only wait for the time limit. 1109 */ 1110 clock_t now; 1111 1112 time_to_wait(&now, wait); 1113 if (cv_timedwait(&so->so_ack_cv, &so->so_lock, 1114 now) == -1) { 1115 eprintsoline(so, ETIME); 1116 return (ETIME); 1117 } 1118 } 1119 else 1120 cv_wait(&so->so_ack_cv, &so->so_lock); 1121 } 1122 *mpp = so->so_ack_mp; 1123 #ifdef DEBUG 1124 { 1125 union T_primitives *tpr; 1126 mblk_t *mp = *mpp; 1127 1128 tpr = (union T_primitives *)mp->b_rptr; 1129 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1130 ASSERT(tpr->type == T_OK_ACK || 1131 tpr->type == T_ERROR_ACK || 1132 tpr->type == T_BIND_ACK || 1133 tpr->type == T_CAPABILITY_ACK || 1134 tpr->type == T_INFO_ACK || 1135 tpr->type == T_OPTMGMT_ACK); 1136 } 1137 #endif /* DEBUG */ 1138 so->so_ack_mp = NULL; 1139 return (0); 1140 } 1141 1142 /* 1143 * Queue a received T_CONN_IND message on so_conn_ind_head/tail. 1144 */ 1145 void 1146 soqueueconnind(struct sonode *so, mblk_t *mp) 1147 { 1148 if (DB_TYPE(mp) != M_PROTO) { 1149 zcmn_err(getzoneid(), CE_WARN, 1150 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1151 freemsg(mp); 1152 return; 1153 } 1154 1155 mutex_enter(&so->so_lock); 1156 ASSERT(mp->b_next == NULL); 1157 if (so->so_conn_ind_head == NULL) { 1158 so->so_conn_ind_head = mp; 1159 so->so_state |= SS_HASCONNIND; 1160 } else { 1161 ASSERT(so->so_state & SS_HASCONNIND); 1162 ASSERT(so->so_conn_ind_tail->b_next == NULL); 1163 so->so_conn_ind_tail->b_next = mp; 1164 } 1165 so->so_conn_ind_tail = mp; 1166 /* Wakeup a single consumer of the T_CONN_IND */ 1167 cv_signal(&so->so_connind_cv); 1168 mutex_exit(&so->so_lock); 1169 } 1170 1171 /* 1172 * Wait for a T_CONN_IND. 1173 * Don't wait if nonblocking. 1174 * Accept signals and socket errors. 1175 */ 1176 int 1177 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1178 { 1179 mblk_t *mp; 1180 int error = 0; 1181 1182 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1183 mutex_enter(&so->so_lock); 1184 check_error: 1185 if (so->so_error) { 1186 error = sogeterr(so); 1187 if (error) { 1188 mutex_exit(&so->so_lock); 1189 return (error); 1190 } 1191 } 1192 1193 if (so->so_conn_ind_head == NULL) { 1194 if (fmode & (FNDELAY|FNONBLOCK)) { 1195 error = EWOULDBLOCK; 1196 goto done; 1197 } 1198 if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) { 1199 error = EINTR; 1200 goto done; 1201 } 1202 goto check_error; 1203 } 1204 mp = so->so_conn_ind_head; 1205 so->so_conn_ind_head = mp->b_next; 1206 mp->b_next = NULL; 1207 if (so->so_conn_ind_head == NULL) { 1208 ASSERT(so->so_conn_ind_tail == mp); 1209 so->so_conn_ind_tail = NULL; 1210 so->so_state &= ~SS_HASCONNIND; 1211 } 1212 *mpp = mp; 1213 done: 1214 mutex_exit(&so->so_lock); 1215 return (error); 1216 } 1217 1218 /* 1219 * Flush a T_CONN_IND matching the sequence number from the list. 1220 * Return zero if found; non-zero otherwise. 1221 * This is called very infrequently thus it is ok to do a linear search. 1222 */ 1223 int 1224 soflushconnind(struct sonode *so, t_scalar_t seqno) 1225 { 1226 mblk_t *prevmp, *mp; 1227 struct T_conn_ind *tci; 1228 1229 mutex_enter(&so->so_lock); 1230 for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL; 1231 prevmp = mp, mp = mp->b_next) { 1232 tci = (struct T_conn_ind *)mp->b_rptr; 1233 if (tci->SEQ_number == seqno) { 1234 dprintso(so, 1, 1235 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1236 /* Deleting last? */ 1237 if (so->so_conn_ind_tail == mp) { 1238 so->so_conn_ind_tail = prevmp; 1239 } 1240 if (prevmp == NULL) { 1241 /* Deleting first */ 1242 so->so_conn_ind_head = mp->b_next; 1243 } else { 1244 prevmp->b_next = mp->b_next; 1245 } 1246 mp->b_next = NULL; 1247 if (so->so_conn_ind_head == NULL) { 1248 ASSERT(so->so_conn_ind_tail == NULL); 1249 so->so_state &= ~SS_HASCONNIND; 1250 } else { 1251 ASSERT(so->so_conn_ind_tail != NULL); 1252 } 1253 so->so_error = ECONNABORTED; 1254 mutex_exit(&so->so_lock); 1255 1256 /* 1257 * T_KSSL_PROXY_CONN_IND may carry a handle for 1258 * an SSL context, and needs to be released. 1259 */ 1260 if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) && 1261 (mp->b_cont != NULL)) { 1262 kssl_ctx_t kssl_ctx; 1263 1264 ASSERT(MBLKL(mp->b_cont) == 1265 sizeof (kssl_ctx_t)); 1266 kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr); 1267 kssl_release_ctx(kssl_ctx); 1268 } 1269 freemsg(mp); 1270 return (0); 1271 } 1272 } 1273 mutex_exit(&so->so_lock); 1274 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1275 return (-1); 1276 } 1277 1278 /* 1279 * Wait until the socket is connected or there is an error. 1280 * fmode should contain any nonblocking flags. nosig should be 1281 * set if the caller does not want the wait to be interrupted by a signal. 1282 */ 1283 int 1284 sowaitconnected(struct sonode *so, int fmode, int nosig) 1285 { 1286 int error; 1287 1288 ASSERT(MUTEX_HELD(&so->so_lock)); 1289 1290 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1291 SS_ISCONNECTING && so->so_error == 0) { 1292 1293 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", 1294 (void *)so)); 1295 if (fmode & (FNDELAY|FNONBLOCK)) 1296 return (EINPROGRESS); 1297 1298 if (nosig) 1299 cv_wait(&so->so_state_cv, &so->so_lock); 1300 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1301 /* 1302 * Return EINTR and let the application use 1303 * nonblocking techniques for detecting when 1304 * the connection has been established. 1305 */ 1306 return (EINTR); 1307 } 1308 dprintso(so, 1, ("awoken on %p\n", (void *)so)); 1309 } 1310 1311 if (so->so_error != 0) { 1312 error = sogeterr(so); 1313 ASSERT(error != 0); 1314 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1315 return (error); 1316 } 1317 if (!(so->so_state & SS_ISCONNECTED)) { 1318 /* 1319 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1320 * zero errno. Or another thread could have consumed so_error 1321 * e.g. by calling read. 1322 */ 1323 error = ECONNREFUSED; 1324 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1325 return (error); 1326 } 1327 return (0); 1328 } 1329 1330 1331 /* 1332 * Handle the signal generation aspect of urgent data. 1333 */ 1334 static void 1335 so_oob_sig(struct sonode *so, int extrasig, 1336 strsigset_t *signals, strpollset_t *pollwakeups) 1337 { 1338 ASSERT(MUTEX_HELD(&so->so_lock)); 1339 1340 ASSERT(so_verify_oobstate(so)); 1341 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1342 if (so->so_oobsigcnt > so->so_oobcnt) { 1343 /* 1344 * Signal has already been generated once for this 1345 * urgent "event". However, since TCP can receive updated 1346 * urgent pointers we still generate a signal. 1347 */ 1348 ASSERT(so->so_state & SS_OOBPEND); 1349 if (extrasig) { 1350 *signals |= S_RDBAND; 1351 *pollwakeups |= POLLRDBAND; 1352 } 1353 return; 1354 } 1355 1356 so->so_oobsigcnt++; 1357 ASSERT(so->so_oobsigcnt > 0); /* Wraparound */ 1358 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1359 1360 /* 1361 * Record (for select/poll) that urgent data is pending. 1362 */ 1363 so->so_state |= SS_OOBPEND; 1364 /* 1365 * New urgent data on the way so forget about any old 1366 * urgent data. 1367 */ 1368 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1369 if (so->so_oobmsg != NULL) { 1370 dprintso(so, 1, ("sock: discarding old oob\n")); 1371 freemsg(so->so_oobmsg); 1372 so->so_oobmsg = NULL; 1373 } 1374 *signals |= S_RDBAND; 1375 *pollwakeups |= POLLRDBAND; 1376 ASSERT(so_verify_oobstate(so)); 1377 } 1378 1379 /* 1380 * Handle the processing of the T_EXDATA_IND with urgent data. 1381 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1382 */ 1383 /* ARGSUSED2 */ 1384 static mblk_t * 1385 so_oob_exdata(struct sonode *so, mblk_t *mp, 1386 strsigset_t *signals, strpollset_t *pollwakeups) 1387 { 1388 ASSERT(MUTEX_HELD(&so->so_lock)); 1389 1390 ASSERT(so_verify_oobstate(so)); 1391 1392 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1393 1394 so->so_oobcnt++; 1395 ASSERT(so->so_oobcnt > 0); /* wraparound? */ 1396 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1397 1398 /* 1399 * Set MSGMARK for SIOCATMARK. 1400 */ 1401 mp->b_flag |= MSGMARK; 1402 1403 ASSERT(so_verify_oobstate(so)); 1404 return (mp); 1405 } 1406 1407 /* 1408 * Handle the processing of the actual urgent data. 1409 * Returns the data mblk if it should be queued on the read queue. 1410 */ 1411 static mblk_t * 1412 so_oob_data(struct sonode *so, mblk_t *mp, 1413 strsigset_t *signals, strpollset_t *pollwakeups) 1414 { 1415 ASSERT(MUTEX_HELD(&so->so_lock)); 1416 1417 ASSERT(so_verify_oobstate(so)); 1418 1419 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1420 ASSERT(mp != NULL); 1421 /* 1422 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1423 * Otherwise we store it in so_oobmsg. 1424 */ 1425 ASSERT(so->so_oobmsg == NULL); 1426 if (so->so_options & SO_OOBINLINE) { 1427 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1428 *signals |= S_INPUT | S_RDNORM; 1429 } else { 1430 *pollwakeups |= POLLRDBAND; 1431 so->so_state |= SS_HAVEOOBDATA; 1432 so->so_oobmsg = mp; 1433 mp = NULL; 1434 } 1435 ASSERT(so_verify_oobstate(so)); 1436 return (mp); 1437 } 1438 1439 /* 1440 * Caller must hold the mutex. 1441 * For delayed processing, save the T_DISCON_IND received 1442 * from below on so_discon_ind_mp. 1443 * When the message is processed the framework will call: 1444 * (*func)(so, mp); 1445 */ 1446 static void 1447 so_save_discon_ind(struct sonode *so, 1448 mblk_t *mp, 1449 void (*func)(struct sonode *so, mblk_t *)) 1450 { 1451 ASSERT(MUTEX_HELD(&so->so_lock)); 1452 1453 /* 1454 * Discard new T_DISCON_IND if we have already received another. 1455 * Currently the earlier message can either be on so_discon_ind_mp 1456 * or being processed. 1457 */ 1458 if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1459 zcmn_err(getzoneid(), CE_WARN, 1460 "sockfs: received unexpected additional T_DISCON_IND\n"); 1461 freemsg(mp); 1462 return; 1463 } 1464 mp->b_prev = (mblk_t *)func; 1465 mp->b_next = NULL; 1466 so->so_discon_ind_mp = mp; 1467 } 1468 1469 /* 1470 * Caller must hold the mutex and make sure that either SOLOCKED 1471 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1472 * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp. 1473 * Need to ensure that strsock_proto() will not end up sleeping for 1474 * SOASYNC_UNBIND, while executing this function. 1475 */ 1476 void 1477 so_drain_discon_ind(struct sonode *so) 1478 { 1479 mblk_t *bp; 1480 void (*func)(struct sonode *so, mblk_t *); 1481 1482 ASSERT(MUTEX_HELD(&so->so_lock)); 1483 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1484 1485 /* Process T_DISCON_IND on so_discon_ind_mp */ 1486 if ((bp = so->so_discon_ind_mp) != NULL) { 1487 so->so_discon_ind_mp = NULL; 1488 func = (void (*)())bp->b_prev; 1489 bp->b_prev = NULL; 1490 1491 /* 1492 * This (*func) is supposed to generate a message downstream 1493 * and we need to have a flag set until the corresponding 1494 * upstream message reaches stream head. 1495 * When processing T_DISCON_IND in strsock_discon_ind 1496 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1497 * drop the flag after we get the ACK in strsock_proto. 1498 */ 1499 (void) (*func)(so, bp); 1500 } 1501 } 1502 1503 /* 1504 * Caller must hold the mutex. 1505 * Remove the T_DISCON_IND on so_discon_ind_mp. 1506 */ 1507 void 1508 so_flush_discon_ind(struct sonode *so) 1509 { 1510 mblk_t *bp; 1511 1512 ASSERT(MUTEX_HELD(&so->so_lock)); 1513 1514 /* 1515 * Remove T_DISCON_IND mblk at so_discon_ind_mp. 1516 */ 1517 if ((bp = so->so_discon_ind_mp) != NULL) { 1518 so->so_discon_ind_mp = NULL; 1519 bp->b_prev = NULL; 1520 freemsg(bp); 1521 } 1522 } 1523 1524 /* 1525 * Caller must hold the mutex. 1526 * 1527 * This function is used to process the T_DISCON_IND message. It does 1528 * immediate processing when called from strsock_proto and delayed 1529 * processing of discon_ind saved on so_discon_ind_mp when called from 1530 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1531 * so_discon_ind_mp for delayed processing, this function is registered 1532 * as the callback function to process the message. 1533 * 1534 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1535 * unbind operation, and should be released only after we receive the ACK 1536 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1537 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1538 * sent from either this function or tcp_unbind(), flushing away any TPI 1539 * message that is being sent down and stays in a lower module's queue. 1540 * 1541 * This function drops so_lock and grabs it again. 1542 */ 1543 static void 1544 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1545 { 1546 struct vnode *vp; 1547 struct stdata *stp; 1548 union T_primitives *tpr; 1549 struct T_unbind_req *ubr; 1550 mblk_t *mp; 1551 int error; 1552 1553 ASSERT(MUTEX_HELD(&so->so_lock)); 1554 ASSERT(discon_mp); 1555 ASSERT(discon_mp->b_rptr); 1556 1557 tpr = (union T_primitives *)discon_mp->b_rptr; 1558 ASSERT(tpr->type == T_DISCON_IND); 1559 1560 vp = SOTOV(so); 1561 stp = vp->v_stream; 1562 ASSERT(stp); 1563 1564 /* 1565 * Not a listener 1566 */ 1567 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1568 1569 /* 1570 * This assumes that the name space for DISCON_reason 1571 * is the errno name space. 1572 */ 1573 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1574 1575 /* 1576 * Unbind with the transport without blocking. 1577 * If we've already received a T_DISCON_IND do not unbind. 1578 * 1579 * If there is no preallocated unbind message, we have already 1580 * unbound with the transport 1581 * 1582 * If the socket is not bound, no need to unbind. 1583 */ 1584 mp = so->so_unbind_mp; 1585 if (mp == NULL) { 1586 ASSERT(!(so->so_state & SS_ISBOUND)); 1587 mutex_exit(&so->so_lock); 1588 } else if (!(so->so_state & SS_ISBOUND)) { 1589 mutex_exit(&so->so_lock); 1590 } else { 1591 so->so_unbind_mp = NULL; 1592 1593 /* 1594 * Is another T_DISCON_IND being processed. 1595 */ 1596 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1597 1598 /* 1599 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1600 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1601 * only after we receive the ACK in strsock_proto. 1602 */ 1603 so->so_flag |= SOASYNC_UNBIND; 1604 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1605 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1606 mutex_exit(&so->so_lock); 1607 1608 /* 1609 * Send down T_UNBIND_REQ ignoring flow control. 1610 * XXX Assumes that MSG_IGNFLOW implies that this thread 1611 * does not run service procedures. 1612 */ 1613 ASSERT(DB_TYPE(mp) == M_PROTO); 1614 ubr = (struct T_unbind_req *)mp->b_rptr; 1615 mp->b_wptr += sizeof (*ubr); 1616 ubr->PRIM_type = T_UNBIND_REQ; 1617 1618 /* 1619 * Flush the read and write side (except stream head read queue) 1620 * and send down T_UNBIND_REQ. 1621 */ 1622 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1623 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1624 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1625 /* LINTED - warning: statement has no consequent: if */ 1626 if (error) { 1627 eprintsoline(so, error); 1628 } 1629 } 1630 1631 if (tpr->discon_ind.DISCON_reason != 0) 1632 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1633 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1634 strseteof(SOTOV(so), 1); 1635 /* 1636 * strseteof takes care of read side wakeups, 1637 * pollwakeups, and signals. 1638 */ 1639 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1640 freemsg(discon_mp); 1641 1642 1643 pollwakeup(&stp->sd_pollist, POLLOUT); 1644 mutex_enter(&stp->sd_lock); 1645 1646 /* 1647 * Wake sleeping write 1648 */ 1649 if (stp->sd_flag & WSLEEP) { 1650 stp->sd_flag &= ~WSLEEP; 1651 cv_broadcast(&stp->sd_wrq->q_wait); 1652 } 1653 1654 /* 1655 * strsendsig can handle multiple signals with a 1656 * single call. Send SIGPOLL for S_OUTPUT event. 1657 */ 1658 if (stp->sd_sigflags & S_OUTPUT) 1659 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1660 1661 mutex_exit(&stp->sd_lock); 1662 mutex_enter(&so->so_lock); 1663 } 1664 1665 /* 1666 * This routine is registered with the stream head to receive M_PROTO 1667 * and M_PCPROTO messages. 1668 * 1669 * Returns NULL if the message was consumed. 1670 * Returns an mblk to make that mblk be processed (and queued) by the stream 1671 * head. 1672 * 1673 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1674 * *pollwakeups) for the stream head to take action on. Note that since 1675 * sockets always deliver SIGIO for every new piece of data this routine 1676 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1677 * 1678 * This routine handles all data related TPI messages independent of 1679 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1680 * arrive on a SOCK_STREAM. 1681 */ 1682 static mblk_t * 1683 strsock_proto(vnode_t *vp, mblk_t *mp, 1684 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1685 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1686 { 1687 union T_primitives *tpr; 1688 struct sonode *so; 1689 1690 so = VTOSO(vp); 1691 1692 dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp)); 1693 1694 /* Set default return values */ 1695 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1696 1697 ASSERT(DB_TYPE(mp) == M_PROTO || 1698 DB_TYPE(mp) == M_PCPROTO); 1699 1700 if (MBLKL(mp) < sizeof (tpr->type)) { 1701 /* The message is too short to even contain the primitive */ 1702 zcmn_err(getzoneid(), CE_WARN, 1703 "sockfs: Too short TPI message received. Len = %ld\n", 1704 (ptrdiff_t)(MBLKL(mp))); 1705 freemsg(mp); 1706 return (NULL); 1707 } 1708 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1709 /* The read pointer is not aligned correctly for TPI */ 1710 zcmn_err(getzoneid(), CE_WARN, 1711 "sockfs: Unaligned TPI message received. rptr = %p\n", 1712 (void *)mp->b_rptr); 1713 freemsg(mp); 1714 return (NULL); 1715 } 1716 tpr = (union T_primitives *)mp->b_rptr; 1717 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1718 1719 switch (tpr->type) { 1720 1721 case T_DATA_IND: 1722 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1723 zcmn_err(getzoneid(), CE_WARN, 1724 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1725 (ptrdiff_t)(MBLKL(mp))); 1726 freemsg(mp); 1727 return (NULL); 1728 } 1729 /* 1730 * Ignore zero-length T_DATA_IND messages. These might be 1731 * generated by some transports. 1732 * This is needed to prevent read (which skips the M_PROTO 1733 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1734 * on a non-blocking socket after select/poll has indicated 1735 * that data is available). 1736 */ 1737 if (msgdsize(mp->b_cont) == 0) { 1738 dprintso(so, 0, 1739 ("strsock_proto: zero length T_DATA_IND\n")); 1740 freemsg(mp); 1741 return (NULL); 1742 } 1743 *allmsgsigs = S_INPUT | S_RDNORM; 1744 *pollwakeups = POLLIN | POLLRDNORM; 1745 *wakeups = RSLEEP; 1746 return (mp); 1747 1748 case T_UNITDATA_IND: { 1749 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1750 void *addr; 1751 t_uscalar_t addrlen; 1752 1753 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1754 zcmn_err(getzoneid(), CE_WARN, 1755 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1756 (ptrdiff_t)(MBLKL(mp))); 1757 freemsg(mp); 1758 return (NULL); 1759 } 1760 1761 /* Is this is not a connected datagram socket? */ 1762 if ((so->so_mode & SM_CONNREQUIRED) || 1763 !(so->so_state & SS_ISCONNECTED)) { 1764 /* 1765 * Not a connected datagram socket. Look for 1766 * the SO_UNIX_CLOSE option. If such an option is found 1767 * discard the message (since it has no meaning 1768 * unless connected). 1769 */ 1770 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1771 tudi->OPT_length != 0) { 1772 void *opt; 1773 t_uscalar_t optlen = tudi->OPT_length; 1774 1775 opt = sogetoff(mp, tudi->OPT_offset, 1776 optlen, __TPI_ALIGN_SIZE); 1777 if (opt == NULL) { 1778 /* The len/off falls outside mp */ 1779 freemsg(mp); 1780 mutex_enter(&so->so_lock); 1781 soseterror(so, EPROTO); 1782 mutex_exit(&so->so_lock); 1783 zcmn_err(getzoneid(), CE_WARN, 1784 "sockfs: T_unidata_ind with " 1785 "invalid optlen/offset %u/%d\n", 1786 optlen, tudi->OPT_offset); 1787 return (NULL); 1788 } 1789 if (so_getopt_unix_close(opt, optlen)) { 1790 freemsg(mp); 1791 return (NULL); 1792 } 1793 } 1794 *allmsgsigs = S_INPUT | S_RDNORM; 1795 *pollwakeups = POLLIN | POLLRDNORM; 1796 *wakeups = RSLEEP; 1797 if (audit_active) 1798 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1799 mp, 0); 1800 return (mp); 1801 } 1802 1803 /* 1804 * A connect datagram socket. For AF_INET{,6} we verify that 1805 * the source address matches the "connected to" address. 1806 * The semantics of AF_UNIX sockets is to not verify 1807 * the source address. 1808 * Note that this source address verification is transport 1809 * specific. Thus the real fix would be to extent TPI 1810 * to allow T_CONN_REQ messages to be send to connectionless 1811 * transport providers and always let the transport provider 1812 * do whatever filtering is needed. 1813 * 1814 * The verification/filtering semantics for transports 1815 * other than AF_INET and AF_UNIX are unknown. The choice 1816 * would be to either filter using bcmp or let all messages 1817 * get through. This code does not filter other address 1818 * families since this at least allows the application to 1819 * work around any missing filtering. 1820 * 1821 * XXX Should we move filtering to UDP/ICMP??? 1822 * That would require passing e.g. a T_DISCON_REQ to UDP 1823 * when the socket becomes unconnected. 1824 */ 1825 addrlen = tudi->SRC_length; 1826 /* 1827 * The alignment restriction is really to strict but 1828 * we want enough alignment to inspect the fields of 1829 * a sockaddr_in. 1830 */ 1831 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1832 __TPI_ALIGN_SIZE); 1833 if (addr == NULL) { 1834 freemsg(mp); 1835 mutex_enter(&so->so_lock); 1836 soseterror(so, EPROTO); 1837 mutex_exit(&so->so_lock); 1838 zcmn_err(getzoneid(), CE_WARN, 1839 "sockfs: T_unidata_ind with invalid " 1840 "addrlen/offset %u/%d\n", 1841 addrlen, tudi->SRC_offset); 1842 return (NULL); 1843 } 1844 1845 if (so->so_family == AF_INET) { 1846 /* 1847 * For AF_INET we allow wildcarding both sin_addr 1848 * and sin_port. 1849 */ 1850 struct sockaddr_in *faddr, *sin; 1851 1852 /* Prevent so_faddr_sa from changing while accessed */ 1853 mutex_enter(&so->so_lock); 1854 ASSERT(so->so_faddr_len == 1855 (socklen_t)sizeof (struct sockaddr_in)); 1856 faddr = (struct sockaddr_in *)so->so_faddr_sa; 1857 sin = (struct sockaddr_in *)addr; 1858 if (addrlen != 1859 (t_uscalar_t)sizeof (struct sockaddr_in) || 1860 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1861 faddr->sin_addr.s_addr != INADDR_ANY) || 1862 (so->so_type != SOCK_RAW && 1863 sin->sin_port != faddr->sin_port && 1864 faddr->sin_port != 0)) { 1865 #ifdef DEBUG 1866 dprintso(so, 0, 1867 ("sockfs: T_UNITDATA_IND mismatch: %s", 1868 pr_addr(so->so_family, 1869 (struct sockaddr *)addr, 1870 addrlen))); 1871 dprintso(so, 0, (" - %s\n", 1872 pr_addr(so->so_family, so->so_faddr_sa, 1873 (t_uscalar_t)so->so_faddr_len))); 1874 #endif /* DEBUG */ 1875 mutex_exit(&so->so_lock); 1876 freemsg(mp); 1877 return (NULL); 1878 } 1879 mutex_exit(&so->so_lock); 1880 } else if (so->so_family == AF_INET6) { 1881 /* 1882 * For AF_INET6 we allow wildcarding both sin6_addr 1883 * and sin6_port. 1884 */ 1885 struct sockaddr_in6 *faddr6, *sin6; 1886 static struct in6_addr zeroes; /* inits to all zeros */ 1887 1888 /* Prevent so_faddr_sa from changing while accessed */ 1889 mutex_enter(&so->so_lock); 1890 ASSERT(so->so_faddr_len == 1891 (socklen_t)sizeof (struct sockaddr_in6)); 1892 faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa; 1893 sin6 = (struct sockaddr_in6 *)addr; 1894 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1895 if (addrlen != 1896 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1897 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1898 &faddr6->sin6_addr) && 1899 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1900 (so->so_type != SOCK_RAW && 1901 sin6->sin6_port != faddr6->sin6_port && 1902 faddr6->sin6_port != 0)) { 1903 #ifdef DEBUG 1904 dprintso(so, 0, 1905 ("sockfs: T_UNITDATA_IND mismatch: %s", 1906 pr_addr(so->so_family, 1907 (struct sockaddr *)addr, 1908 addrlen))); 1909 dprintso(so, 0, (" - %s\n", 1910 pr_addr(so->so_family, so->so_faddr_sa, 1911 (t_uscalar_t)so->so_faddr_len))); 1912 #endif /* DEBUG */ 1913 mutex_exit(&so->so_lock); 1914 freemsg(mp); 1915 return (NULL); 1916 } 1917 mutex_exit(&so->so_lock); 1918 } else if (so->so_family == AF_UNIX && 1919 msgdsize(mp->b_cont) == 0 && 1920 tudi->OPT_length != 0) { 1921 /* 1922 * Attempt to extract AF_UNIX 1923 * SO_UNIX_CLOSE indication from options. 1924 */ 1925 void *opt; 1926 t_uscalar_t optlen = tudi->OPT_length; 1927 1928 opt = sogetoff(mp, tudi->OPT_offset, 1929 optlen, __TPI_ALIGN_SIZE); 1930 if (opt == NULL) { 1931 /* The len/off falls outside mp */ 1932 freemsg(mp); 1933 mutex_enter(&so->so_lock); 1934 soseterror(so, EPROTO); 1935 mutex_exit(&so->so_lock); 1936 zcmn_err(getzoneid(), CE_WARN, 1937 "sockfs: T_unidata_ind with invalid " 1938 "optlen/offset %u/%d\n", 1939 optlen, tudi->OPT_offset); 1940 return (NULL); 1941 } 1942 /* 1943 * If we received a unix close indication mark the 1944 * socket and discard this message. 1945 */ 1946 if (so_getopt_unix_close(opt, optlen)) { 1947 mutex_enter(&so->so_lock); 1948 sobreakconn(so, ECONNRESET); 1949 mutex_exit(&so->so_lock); 1950 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1951 freemsg(mp); 1952 *pollwakeups = POLLIN | POLLRDNORM; 1953 *allmsgsigs = S_INPUT | S_RDNORM; 1954 *wakeups = RSLEEP; 1955 return (NULL); 1956 } 1957 } 1958 *allmsgsigs = S_INPUT | S_RDNORM; 1959 *pollwakeups = POLLIN | POLLRDNORM; 1960 *wakeups = RSLEEP; 1961 return (mp); 1962 } 1963 1964 case T_OPTDATA_IND: { 1965 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1966 1967 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1968 zcmn_err(getzoneid(), CE_WARN, 1969 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1970 (ptrdiff_t)(MBLKL(mp))); 1971 freemsg(mp); 1972 return (NULL); 1973 } 1974 /* 1975 * Allow zero-length messages carrying options. 1976 * This is used when carrying the SO_UNIX_CLOSE option. 1977 */ 1978 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 1979 tdi->OPT_length != 0) { 1980 /* 1981 * Attempt to extract AF_UNIX close indication 1982 * from the options. Ignore any other options - 1983 * those are handled once the message is removed 1984 * from the queue. 1985 * The close indication message should not carry data. 1986 */ 1987 void *opt; 1988 t_uscalar_t optlen = tdi->OPT_length; 1989 1990 opt = sogetoff(mp, tdi->OPT_offset, 1991 optlen, __TPI_ALIGN_SIZE); 1992 if (opt == NULL) { 1993 /* The len/off falls outside mp */ 1994 freemsg(mp); 1995 mutex_enter(&so->so_lock); 1996 soseterror(so, EPROTO); 1997 mutex_exit(&so->so_lock); 1998 zcmn_err(getzoneid(), CE_WARN, 1999 "sockfs: T_optdata_ind with invalid " 2000 "optlen/offset %u/%d\n", 2001 optlen, tdi->OPT_offset); 2002 return (NULL); 2003 } 2004 /* 2005 * If we received a close indication mark the 2006 * socket and discard this message. 2007 */ 2008 if (so_getopt_unix_close(opt, optlen)) { 2009 mutex_enter(&so->so_lock); 2010 socantsendmore(so); 2011 mutex_exit(&so->so_lock); 2012 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2013 freemsg(mp); 2014 return (NULL); 2015 } 2016 } 2017 *allmsgsigs = S_INPUT | S_RDNORM; 2018 *pollwakeups = POLLIN | POLLRDNORM; 2019 *wakeups = RSLEEP; 2020 return (mp); 2021 } 2022 2023 case T_EXDATA_IND: { 2024 mblk_t *mctl, *mdata; 2025 mblk_t *lbp; 2026 union T_primitives *tprp; 2027 struct stdata *stp; 2028 queue_t *qp; 2029 2030 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 2031 zcmn_err(getzoneid(), CE_WARN, 2032 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 2033 (ptrdiff_t)(MBLKL(mp))); 2034 freemsg(mp); 2035 return (NULL); 2036 } 2037 /* 2038 * Ignore zero-length T_EXDATA_IND messages. These might be 2039 * generated by some transports. 2040 * 2041 * This is needed to prevent read (which skips the M_PROTO 2042 * part) to unexpectedly return 0 (or return EWOULDBLOCK 2043 * on a non-blocking socket after select/poll has indicated 2044 * that data is available). 2045 */ 2046 dprintso(so, 1, 2047 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 2048 (void *)vp, so->so_oobsigcnt, so->so_oobcnt, 2049 pr_state(so->so_state, so->so_mode))); 2050 2051 if (msgdsize(mp->b_cont) == 0) { 2052 dprintso(so, 0, 2053 ("strsock_proto: zero length T_EXDATA_IND\n")); 2054 freemsg(mp); 2055 return (NULL); 2056 } 2057 2058 /* 2059 * Split into the T_EXDATA_IND and the M_DATA part. 2060 * We process these three pieces separately: 2061 * signal generation 2062 * handling T_EXDATA_IND 2063 * handling M_DATA component 2064 */ 2065 mctl = mp; 2066 mdata = mctl->b_cont; 2067 mctl->b_cont = NULL; 2068 mutex_enter(&so->so_lock); 2069 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2070 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2071 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2072 2073 stp = vp->v_stream; 2074 ASSERT(stp != NULL); 2075 qp = _RD(stp->sd_wrq); 2076 2077 mutex_enter(QLOCK(qp)); 2078 lbp = qp->q_last; 2079 2080 /* 2081 * We want to avoid queueing up a string of T_EXDATA_IND 2082 * messages with no intervening data messages at the stream 2083 * head. These messages contribute to the total message 2084 * count. Eventually this can lead to STREAMS flow contol 2085 * and also cause TCP to advertise a zero window condition 2086 * to the peer. This can happen in the degenerate case where 2087 * the sender and receiver exchange only OOB data. The sender 2088 * only sends messages with MSG_OOB flag and the receiver 2089 * receives only MSG_OOB messages and does not use SO_OOBINLINE. 2090 * An example of this scenario has been reported in applications 2091 * that use OOB data to exchange heart beats. Flow control 2092 * relief will never happen if the application only reads OOB 2093 * data which is done directly by sorecvoob() and the 2094 * T_EXDATA_IND messages at the streamhead won't be consumed. 2095 * Note that there is no correctness issue in compressing the 2096 * string of T_EXDATA_IND messages into a single T_EXDATA_IND 2097 * message. A single read that does not specify MSG_OOB will 2098 * read across all the marks in a loop in sotpi_recvmsg(). 2099 * Each mark is individually distinguishable only if the 2100 * T_EXDATA_IND messages are separated by data messages. 2101 */ 2102 if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) { 2103 tprp = (union T_primitives *)lbp->b_rptr; 2104 if ((tprp->type == T_EXDATA_IND) && 2105 !(so->so_options & SO_OOBINLINE)) { 2106 2107 /* 2108 * free the new M_PROTO message 2109 */ 2110 freemsg(mctl); 2111 2112 /* 2113 * adjust the OOB count and OOB signal count 2114 * just incremented for the new OOB data. 2115 */ 2116 so->so_oobcnt--; 2117 so->so_oobsigcnt--; 2118 mutex_exit(QLOCK(qp)); 2119 mutex_exit(&so->so_lock); 2120 return (NULL); 2121 } 2122 } 2123 mutex_exit(QLOCK(qp)); 2124 2125 /* 2126 * Pass the T_EXDATA_IND and the M_DATA back separately 2127 * by using b_next linkage. (The stream head will queue any 2128 * b_next linked messages separately.) This is needed 2129 * since MSGMARK applies to the last by of the message 2130 * hence we can not have any M_DATA component attached 2131 * to the marked T_EXDATA_IND. Note that the stream head 2132 * will not consolidate M_DATA messages onto an MSGMARK'ed 2133 * message in order to preserve the constraint that 2134 * the T_EXDATA_IND always is a separate message. 2135 */ 2136 ASSERT(mctl != NULL); 2137 mctl->b_next = mdata; 2138 mp = mctl; 2139 #ifdef DEBUG 2140 if (mdata == NULL) { 2141 dprintso(so, 1, 2142 ("after outofline T_EXDATA_IND(%p): " 2143 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2144 (void *)vp, so->so_oobsigcnt, 2145 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2146 pr_state(so->so_state, so->so_mode))); 2147 } else { 2148 dprintso(so, 1, 2149 ("after inline T_EXDATA_IND(%p): " 2150 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2151 (void *)vp, so->so_oobsigcnt, 2152 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2153 pr_state(so->so_state, so->so_mode))); 2154 } 2155 #endif /* DEBUG */ 2156 mutex_exit(&so->so_lock); 2157 *wakeups = RSLEEP; 2158 return (mp); 2159 } 2160 2161 case T_CONN_CON: { 2162 struct T_conn_con *conn_con; 2163 void *addr; 2164 t_uscalar_t addrlen; 2165 2166 /* 2167 * Verify the state, update the state to ISCONNECTED, 2168 * record the potentially new address in the message, 2169 * and drop the message. 2170 */ 2171 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2172 zcmn_err(getzoneid(), CE_WARN, 2173 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2174 (ptrdiff_t)(MBLKL(mp))); 2175 freemsg(mp); 2176 return (NULL); 2177 } 2178 2179 mutex_enter(&so->so_lock); 2180 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2181 SS_ISCONNECTING) { 2182 mutex_exit(&so->so_lock); 2183 dprintso(so, 1, 2184 ("T_CONN_CON: state %x\n", so->so_state)); 2185 freemsg(mp); 2186 return (NULL); 2187 } 2188 2189 conn_con = &tpr->conn_con; 2190 addrlen = conn_con->RES_length; 2191 /* 2192 * Allow the address to be of different size than sent down 2193 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2194 * For AF_UNIX require the identical length. 2195 */ 2196 if (so->so_family == AF_UNIX ? 2197 addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) : 2198 addrlen > (t_uscalar_t)so->so_faddr_maxlen) { 2199 zcmn_err(getzoneid(), CE_WARN, 2200 "sockfs: T_conn_con with different " 2201 "length %u/%d\n", 2202 addrlen, conn_con->RES_length); 2203 soisdisconnected(so, EPROTO); 2204 mutex_exit(&so->so_lock); 2205 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2206 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2207 strseteof(SOTOV(so), 1); 2208 freemsg(mp); 2209 /* 2210 * strseteof takes care of read side wakeups, 2211 * pollwakeups, and signals. 2212 */ 2213 *wakeups = WSLEEP; 2214 *allmsgsigs = S_OUTPUT; 2215 *pollwakeups = POLLOUT; 2216 return (NULL); 2217 } 2218 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2219 if (addr == NULL) { 2220 zcmn_err(getzoneid(), CE_WARN, 2221 "sockfs: T_conn_con with invalid " 2222 "addrlen/offset %u/%d\n", 2223 addrlen, conn_con->RES_offset); 2224 mutex_exit(&so->so_lock); 2225 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2226 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2227 strseteof(SOTOV(so), 1); 2228 freemsg(mp); 2229 /* 2230 * strseteof takes care of read side wakeups, 2231 * pollwakeups, and signals. 2232 */ 2233 *wakeups = WSLEEP; 2234 *allmsgsigs = S_OUTPUT; 2235 *pollwakeups = POLLOUT; 2236 return (NULL); 2237 } 2238 2239 /* 2240 * Save for getpeername. 2241 */ 2242 if (so->so_family != AF_UNIX) { 2243 so->so_faddr_len = (socklen_t)addrlen; 2244 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2245 bcopy(addr, so->so_faddr_sa, addrlen); 2246 so->so_state |= SS_FADDR_VALID; 2247 } 2248 2249 if (so->so_peercred != NULL) 2250 crfree(so->so_peercred); 2251 so->so_peercred = DB_CRED(mp); 2252 so->so_cpid = DB_CPID(mp); 2253 if (so->so_peercred != NULL) 2254 crhold(so->so_peercred); 2255 2256 /* Wakeup anybody sleeping in sowaitconnected */ 2257 soisconnected(so); 2258 mutex_exit(&so->so_lock); 2259 2260 /* 2261 * The socket is now available for sending data. 2262 */ 2263 *wakeups = WSLEEP; 2264 *allmsgsigs = S_OUTPUT; 2265 *pollwakeups = POLLOUT; 2266 freemsg(mp); 2267 return (NULL); 2268 } 2269 2270 /* 2271 * Extra processing in case of an SSL proxy, before queuing or 2272 * forwarding to the fallback endpoint 2273 */ 2274 case T_SSL_PROXY_CONN_IND: 2275 case T_CONN_IND: 2276 /* 2277 * Verify the min size and queue the message on 2278 * the so_conn_ind_head/tail list. 2279 */ 2280 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2281 zcmn_err(getzoneid(), CE_WARN, 2282 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2283 (ptrdiff_t)(MBLKL(mp))); 2284 freemsg(mp); 2285 return (NULL); 2286 } 2287 2288 if (audit_active) 2289 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2290 if (!(so->so_state & SS_ACCEPTCONN)) { 2291 zcmn_err(getzoneid(), CE_WARN, 2292 "sockfs: T_conn_ind on non-listening socket\n"); 2293 freemsg(mp); 2294 return (NULL); 2295 } 2296 2297 if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) { 2298 /* No context: need to fall back */ 2299 struct sonode *fbso; 2300 stdata_t *fbstp; 2301 2302 tpr->type = T_CONN_IND; 2303 2304 fbso = kssl_find_fallback(so->so_kssl_ent); 2305 2306 /* 2307 * No fallback: the remote will timeout and 2308 * disconnect. 2309 */ 2310 if (fbso == NULL) { 2311 freemsg(mp); 2312 return (NULL); 2313 } 2314 fbstp = SOTOV(fbso)->v_stream; 2315 qreply(fbstp->sd_wrq->q_next, mp); 2316 return (NULL); 2317 } 2318 soqueueconnind(so, mp); 2319 *allmsgsigs = S_INPUT | S_RDNORM; 2320 *pollwakeups = POLLIN | POLLRDNORM; 2321 *wakeups = RSLEEP; 2322 return (NULL); 2323 2324 case T_ORDREL_IND: 2325 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2326 zcmn_err(getzoneid(), CE_WARN, 2327 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2328 (ptrdiff_t)(MBLKL(mp))); 2329 freemsg(mp); 2330 return (NULL); 2331 } 2332 2333 /* 2334 * Some providers send this when not fully connected. 2335 * SunLink X.25 needs to retrieve disconnect reason after 2336 * disconnect for compatibility. It uses T_ORDREL_IND 2337 * instead of T_DISCON_IND so that it may use the 2338 * endpoint after a connect failure to retrieve the 2339 * reason using an ioctl. Thus we explicitly clear 2340 * SS_ISCONNECTING here for SunLink X.25. 2341 * This is a needed TPI violation. 2342 */ 2343 mutex_enter(&so->so_lock); 2344 so->so_state &= ~SS_ISCONNECTING; 2345 socantrcvmore(so); 2346 mutex_exit(&so->so_lock); 2347 strseteof(SOTOV(so), 1); 2348 /* 2349 * strseteof takes care of read side wakeups, 2350 * pollwakeups, and signals. 2351 */ 2352 freemsg(mp); 2353 return (NULL); 2354 2355 case T_DISCON_IND: 2356 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2357 zcmn_err(getzoneid(), CE_WARN, 2358 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2359 (ptrdiff_t)(MBLKL(mp))); 2360 freemsg(mp); 2361 return (NULL); 2362 } 2363 if (so->so_state & SS_ACCEPTCONN) { 2364 /* 2365 * This is a listener. Look for a queued T_CONN_IND 2366 * with a matching sequence number and remove it 2367 * from the list. 2368 * It is normal to not find the sequence number since 2369 * the soaccept might have already dequeued it 2370 * (in which case the T_CONN_RES will fail with 2371 * TBADSEQ). 2372 */ 2373 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2374 freemsg(mp); 2375 return (0); 2376 } 2377 2378 /* 2379 * Not a listener 2380 * 2381 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2382 * Such a discon_ind appears when the peer has first done 2383 * a shutdown() followed by a close() in which case we just 2384 * want to record socantsendmore. 2385 * In this case sockfs first receives a T_ORDREL_IND followed 2386 * by a T_DISCON_IND. 2387 * Note that for other transports (e.g. TCP) we need to handle 2388 * the discon_ind in this case since it signals an error. 2389 */ 2390 mutex_enter(&so->so_lock); 2391 if ((so->so_state & SS_CANTRCVMORE) && 2392 (so->so_family == AF_UNIX)) { 2393 socantsendmore(so); 2394 mutex_exit(&so->so_lock); 2395 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2396 dprintso(so, 1, 2397 ("T_DISCON_IND: error %d\n", so->so_error)); 2398 freemsg(mp); 2399 /* 2400 * Set these variables for caller to process them. 2401 * For the else part where T_DISCON_IND is processed, 2402 * this will be done in the function being called 2403 * (strsock_discon_ind()) 2404 */ 2405 *wakeups = WSLEEP; 2406 *allmsgsigs = S_OUTPUT; 2407 *pollwakeups = POLLOUT; 2408 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2409 /* 2410 * Deferred processing of T_DISCON_IND 2411 */ 2412 so_save_discon_ind(so, mp, strsock_discon_ind); 2413 mutex_exit(&so->so_lock); 2414 } else { 2415 /* 2416 * Process T_DISCON_IND now 2417 */ 2418 (void) strsock_discon_ind(so, mp); 2419 mutex_exit(&so->so_lock); 2420 } 2421 return (NULL); 2422 2423 case T_UDERROR_IND: { 2424 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2425 void *addr; 2426 t_uscalar_t addrlen; 2427 int error; 2428 2429 dprintso(so, 0, 2430 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2431 2432 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2433 zcmn_err(getzoneid(), CE_WARN, 2434 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2435 (ptrdiff_t)(MBLKL(mp))); 2436 freemsg(mp); 2437 return (NULL); 2438 } 2439 /* Ignore on connection-oriented transports */ 2440 if (so->so_mode & SM_CONNREQUIRED) { 2441 freemsg(mp); 2442 eprintsoline(so, 0); 2443 zcmn_err(getzoneid(), CE_WARN, 2444 "sockfs: T_uderror_ind on connection-oriented " 2445 "transport\n"); 2446 return (NULL); 2447 } 2448 addrlen = tudi->DEST_length; 2449 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2450 if (addr == NULL) { 2451 zcmn_err(getzoneid(), CE_WARN, 2452 "sockfs: T_uderror_ind with invalid " 2453 "addrlen/offset %u/%d\n", 2454 addrlen, tudi->DEST_offset); 2455 freemsg(mp); 2456 return (NULL); 2457 } 2458 2459 /* Verify source address for connected socket. */ 2460 mutex_enter(&so->so_lock); 2461 if (so->so_state & SS_ISCONNECTED) { 2462 void *faddr; 2463 t_uscalar_t faddr_len; 2464 boolean_t match = B_FALSE; 2465 2466 switch (so->so_family) { 2467 case AF_INET: { 2468 /* Compare just IP address and port */ 2469 struct sockaddr_in *sin1, *sin2; 2470 2471 sin1 = (struct sockaddr_in *)so->so_faddr_sa; 2472 sin2 = (struct sockaddr_in *)addr; 2473 if (addrlen == sizeof (struct sockaddr_in) && 2474 sin1->sin_port == sin2->sin_port && 2475 sin1->sin_addr.s_addr == 2476 sin2->sin_addr.s_addr) 2477 match = B_TRUE; 2478 break; 2479 } 2480 case AF_INET6: { 2481 /* Compare just IP address and port. Not flow */ 2482 struct sockaddr_in6 *sin1, *sin2; 2483 2484 sin1 = (struct sockaddr_in6 *)so->so_faddr_sa; 2485 sin2 = (struct sockaddr_in6 *)addr; 2486 if (addrlen == sizeof (struct sockaddr_in6) && 2487 sin1->sin6_port == sin2->sin6_port && 2488 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2489 &sin2->sin6_addr)) 2490 match = B_TRUE; 2491 break; 2492 } 2493 case AF_UNIX: 2494 faddr = &so->so_ux_faddr; 2495 faddr_len = 2496 (t_uscalar_t)sizeof (so->so_ux_faddr); 2497 if (faddr_len == addrlen && 2498 bcmp(addr, faddr, addrlen) == 0) 2499 match = B_TRUE; 2500 break; 2501 default: 2502 faddr = so->so_faddr_sa; 2503 faddr_len = (t_uscalar_t)so->so_faddr_len; 2504 if (faddr_len == addrlen && 2505 bcmp(addr, faddr, addrlen) == 0) 2506 match = B_TRUE; 2507 break; 2508 } 2509 2510 if (!match) { 2511 #ifdef DEBUG 2512 dprintso(so, 0, 2513 ("sockfs: T_UDERR_IND mismatch: %s - ", 2514 pr_addr(so->so_family, 2515 (struct sockaddr *)addr, 2516 addrlen))); 2517 dprintso(so, 0, ("%s\n", 2518 pr_addr(so->so_family, so->so_faddr_sa, 2519 so->so_faddr_len))); 2520 #endif /* DEBUG */ 2521 mutex_exit(&so->so_lock); 2522 freemsg(mp); 2523 return (NULL); 2524 } 2525 /* 2526 * Make the write error nonpersistent. If the error 2527 * is zero we use ECONNRESET. 2528 * This assumes that the name space for ERROR_type 2529 * is the errno name space. 2530 */ 2531 if (tudi->ERROR_type != 0) 2532 error = tudi->ERROR_type; 2533 else 2534 error = ECONNRESET; 2535 2536 soseterror(so, error); 2537 mutex_exit(&so->so_lock); 2538 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2539 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2540 *wakeups = RSLEEP | WSLEEP; 2541 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2542 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2543 freemsg(mp); 2544 return (NULL); 2545 } 2546 /* 2547 * If the application asked for delayed errors 2548 * record the T_UDERROR_IND so_eaddr_mp and the reason in 2549 * so_delayed_error for delayed error posting. If the reason 2550 * is zero use ECONNRESET. 2551 * Note that delayed error indications do not make sense for 2552 * AF_UNIX sockets since sendto checks that the destination 2553 * address is valid at the time of the sendto. 2554 */ 2555 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2556 mutex_exit(&so->so_lock); 2557 freemsg(mp); 2558 return (NULL); 2559 } 2560 if (so->so_eaddr_mp != NULL) 2561 freemsg(so->so_eaddr_mp); 2562 2563 so->so_eaddr_mp = mp; 2564 if (tudi->ERROR_type != 0) 2565 error = tudi->ERROR_type; 2566 else 2567 error = ECONNRESET; 2568 so->so_delayed_error = (ushort_t)error; 2569 mutex_exit(&so->so_lock); 2570 return (NULL); 2571 } 2572 2573 case T_ERROR_ACK: 2574 dprintso(so, 0, 2575 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2576 tpr->error_ack.ERROR_prim, 2577 tpr->error_ack.TLI_error, 2578 tpr->error_ack.UNIX_error)); 2579 2580 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2581 zcmn_err(getzoneid(), CE_WARN, 2582 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2583 (ptrdiff_t)(MBLKL(mp))); 2584 freemsg(mp); 2585 return (NULL); 2586 } 2587 /* 2588 * Check if we were waiting for the async message 2589 */ 2590 mutex_enter(&so->so_lock); 2591 if ((so->so_flag & SOASYNC_UNBIND) && 2592 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2593 so_unlock_single(so, SOASYNC_UNBIND); 2594 mutex_exit(&so->so_lock); 2595 freemsg(mp); 2596 return (NULL); 2597 } 2598 mutex_exit(&so->so_lock); 2599 soqueueack(so, mp); 2600 return (NULL); 2601 2602 case T_OK_ACK: 2603 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2604 zcmn_err(getzoneid(), CE_WARN, 2605 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2606 (ptrdiff_t)(MBLKL(mp))); 2607 freemsg(mp); 2608 return (NULL); 2609 } 2610 /* 2611 * Check if we were waiting for the async message 2612 */ 2613 mutex_enter(&so->so_lock); 2614 if ((so->so_flag & SOASYNC_UNBIND) && 2615 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2616 dprintso(so, 1, 2617 ("strsock_proto: T_OK_ACK async unbind\n")); 2618 so_unlock_single(so, SOASYNC_UNBIND); 2619 mutex_exit(&so->so_lock); 2620 freemsg(mp); 2621 return (NULL); 2622 } 2623 mutex_exit(&so->so_lock); 2624 soqueueack(so, mp); 2625 return (NULL); 2626 2627 case T_INFO_ACK: 2628 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2629 zcmn_err(getzoneid(), CE_WARN, 2630 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2631 (ptrdiff_t)(MBLKL(mp))); 2632 freemsg(mp); 2633 return (NULL); 2634 } 2635 soqueueack(so, mp); 2636 return (NULL); 2637 2638 case T_CAPABILITY_ACK: 2639 /* 2640 * A T_capability_ack need only be large enough to hold 2641 * the PRIM_type and CAP_bits1 fields; checking for anything 2642 * larger might reject a correct response from an older 2643 * provider. 2644 */ 2645 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2646 zcmn_err(getzoneid(), CE_WARN, 2647 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2648 (ptrdiff_t)(MBLKL(mp))); 2649 freemsg(mp); 2650 return (NULL); 2651 } 2652 soqueueack(so, mp); 2653 return (NULL); 2654 2655 case T_BIND_ACK: 2656 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2657 zcmn_err(getzoneid(), CE_WARN, 2658 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2659 (ptrdiff_t)(MBLKL(mp))); 2660 freemsg(mp); 2661 return (NULL); 2662 } 2663 soqueueack(so, mp); 2664 return (NULL); 2665 2666 case T_OPTMGMT_ACK: 2667 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2668 zcmn_err(getzoneid(), CE_WARN, 2669 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2670 (ptrdiff_t)(MBLKL(mp))); 2671 freemsg(mp); 2672 return (NULL); 2673 } 2674 soqueueack(so, mp); 2675 return (NULL); 2676 default: 2677 #ifdef DEBUG 2678 zcmn_err(getzoneid(), CE_WARN, 2679 "sockfs: unknown TPI primitive %d received\n", 2680 tpr->type); 2681 #endif /* DEBUG */ 2682 freemsg(mp); 2683 return (NULL); 2684 } 2685 } 2686 2687 /* 2688 * This routine is registered with the stream head to receive other 2689 * (non-data, and non-proto) messages. 2690 * 2691 * Returns NULL if the message was consumed. 2692 * Returns an mblk to make that mblk be processed by the stream head. 2693 * 2694 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2695 * *pollwakeups) for the stream head to take action on. 2696 */ 2697 static mblk_t * 2698 strsock_misc(vnode_t *vp, mblk_t *mp, 2699 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2700 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2701 { 2702 struct sonode *so; 2703 2704 so = VTOSO(vp); 2705 2706 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2707 (void *)vp, (void *)mp, DB_TYPE(mp))); 2708 2709 /* Set default return values */ 2710 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2711 2712 switch (DB_TYPE(mp)) { 2713 case M_PCSIG: 2714 /* 2715 * This assumes that an M_PCSIG for the urgent data arrives 2716 * before the corresponding T_EXDATA_IND. 2717 * 2718 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2719 * awoken before the urgent data shows up. 2720 * For OOBINLINE this can result in select returning 2721 * only exceptions as opposed to except|read. 2722 */ 2723 if (*mp->b_rptr == SIGURG) { 2724 mutex_enter(&so->so_lock); 2725 dprintso(so, 1, 2726 ("SIGURG(%p): counts %d/%d state %s\n", 2727 (void *)vp, so->so_oobsigcnt, 2728 so->so_oobcnt, 2729 pr_state(so->so_state, so->so_mode))); 2730 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2731 dprintso(so, 1, 2732 ("after SIGURG(%p): counts %d/%d " 2733 " poll 0x%x sig 0x%x state %s\n", 2734 (void *)vp, so->so_oobsigcnt, 2735 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2736 pr_state(so->so_state, so->so_mode))); 2737 mutex_exit(&so->so_lock); 2738 } 2739 freemsg(mp); 2740 return (NULL); 2741 2742 case M_SIG: 2743 case M_HANGUP: 2744 case M_UNHANGUP: 2745 case M_ERROR: 2746 /* M_ERRORs etc are ignored */ 2747 freemsg(mp); 2748 return (NULL); 2749 2750 case M_FLUSH: 2751 /* 2752 * Do not flush read queue. If the M_FLUSH 2753 * arrives because of an impending T_discon_ind 2754 * we still have to keep any queued data - this is part of 2755 * socket semantics. 2756 */ 2757 if (*mp->b_rptr & FLUSHW) { 2758 *mp->b_rptr &= ~FLUSHR; 2759 return (mp); 2760 } 2761 freemsg(mp); 2762 return (NULL); 2763 2764 default: 2765 return (mp); 2766 } 2767 } 2768 2769 2770 /* Register to receive signals for certain events */ 2771 int 2772 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2773 { 2774 struct strsigset ss; 2775 int32_t rval; 2776 2777 /* 2778 * Note that SOLOCKED will be set except for the call from soaccept(). 2779 */ 2780 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2781 ss.ss_pid = pgrp; 2782 ss.ss_events = events; 2783 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2784 &rval)); 2785 } 2786 2787 2788 /* Register for events matching the SS_ASYNC flag */ 2789 int 2790 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2791 { 2792 int events = so->so_state & SS_ASYNC ? 2793 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2794 S_RDBAND | S_BANDURG; 2795 2796 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2797 } 2798 2799 2800 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2801 int 2802 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2803 { 2804 ASSERT(mutex_owned(&so->so_lock)); 2805 if (so->so_pgrp != 0) { 2806 int error; 2807 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2808 S_RDBAND | S_BANDURG : /* New sigs */ 2809 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2810 2811 so_lock_single(so); 2812 mutex_exit(&so->so_lock); 2813 2814 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2815 2816 mutex_enter(&so->so_lock); 2817 so_unlock_single(so, SOLOCKED); 2818 if (error) 2819 return (error); 2820 } 2821 so->so_state ^= SS_ASYNC; 2822 return (0); 2823 } 2824 2825 /* 2826 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2827 * any existing one. If passed zero, just clear the existing one. 2828 */ 2829 int 2830 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2831 { 2832 int events = so->so_state & SS_ASYNC ? 2833 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2834 S_RDBAND | S_BANDURG; 2835 int error; 2836 2837 ASSERT(mutex_owned(&so->so_lock)); 2838 2839 /* 2840 * Change socket process (group). 2841 * 2842 * strioctl (via so_set_asyncsigs) will perform permission check and 2843 * also keep a PID_HOLD to prevent the pid from being reused. 2844 */ 2845 so_lock_single(so); 2846 mutex_exit(&so->so_lock); 2847 2848 if (pgrp != 0) { 2849 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2850 pgrp, events)); 2851 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2852 if (error != 0) { 2853 eprintsoline(so, error); 2854 goto bad; 2855 } 2856 } 2857 /* Remove the previously registered process/group */ 2858 if (so->so_pgrp != 0) { 2859 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2860 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2861 if (error != 0) { 2862 eprintsoline(so, error); 2863 error = 0; 2864 } 2865 } 2866 mutex_enter(&so->so_lock); 2867 so_unlock_single(so, SOLOCKED); 2868 so->so_pgrp = pgrp; 2869 return (0); 2870 bad: 2871 mutex_enter(&so->so_lock); 2872 so_unlock_single(so, SOLOCKED); 2873 return (error); 2874 } 2875 2876 2877 2878 /* 2879 * Translate a TLI(/XTI) error into a system error as best we can. 2880 */ 2881 static const int tli_errs[] = { 2882 0, /* no error */ 2883 EADDRNOTAVAIL, /* TBADADDR */ 2884 ENOPROTOOPT, /* TBADOPT */ 2885 EACCES, /* TACCES */ 2886 EBADF, /* TBADF */ 2887 EADDRNOTAVAIL, /* TNOADDR */ 2888 EPROTO, /* TOUTSTATE */ 2889 ECONNABORTED, /* TBADSEQ */ 2890 0, /* TSYSERR - will never get */ 2891 EPROTO, /* TLOOK - should never be sent by transport */ 2892 EMSGSIZE, /* TBADDATA */ 2893 EMSGSIZE, /* TBUFOVFLW */ 2894 EPROTO, /* TFLOW */ 2895 EWOULDBLOCK, /* TNODATA */ 2896 EPROTO, /* TNODIS */ 2897 EPROTO, /* TNOUDERR */ 2898 EINVAL, /* TBADFLAG */ 2899 EPROTO, /* TNOREL */ 2900 EOPNOTSUPP, /* TNOTSUPPORT */ 2901 EPROTO, /* TSTATECHNG */ 2902 /* following represent error namespace expansion with XTI */ 2903 EPROTO, /* TNOSTRUCTYPE - never sent by transport */ 2904 EPROTO, /* TBADNAME - never sent by transport */ 2905 EPROTO, /* TBADQLEN - never sent by transport */ 2906 EADDRINUSE, /* TADDRBUSY */ 2907 EBADF, /* TINDOUT */ 2908 EBADF, /* TPROVMISMATCH */ 2909 EBADF, /* TRESQLEN */ 2910 EBADF, /* TRESADDR */ 2911 EPROTO, /* TQFULL - never sent by transport */ 2912 EPROTO, /* TPROTO */ 2913 }; 2914 2915 static int 2916 tlitosyserr(int terr) 2917 { 2918 ASSERT(terr != TSYSERR); 2919 if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0]))) 2920 return (EPROTO); 2921 else 2922 return (tli_errs[terr]); 2923 } 2924 2925 /* 2926 * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable 2927 * transport driver/module with an mblk_t chain. 2928 * 2929 * Note, we in-line putq() for the fast-path cases of q is empty, q_last and 2930 * bp are of type M_DATA. All other cases we call putq(). 2931 * 2932 * On success a zero will be return, else an errno will be returned. 2933 */ 2934 int 2935 sodput(sodirect_t *sodp, mblk_t *bp) 2936 { 2937 queue_t *q = sodp->sod_q; 2938 struct stdata *stp = (struct stdata *)q->q_ptr; 2939 mblk_t *nbp; 2940 mblk_t *last = q->q_last; 2941 int bytecnt = 0; 2942 int mblkcnt = 0; 2943 2944 2945 ASSERT(MUTEX_HELD(sodp->sod_lockp)); 2946 2947 if (stp->sd_flag == STREOF) { 2948 do { 2949 if ((nbp = bp->b_next) != NULL) 2950 bp->b_next = NULL; 2951 freemsg(bp); 2952 } while ((bp = nbp) != NULL); 2953 2954 return (0); 2955 } 2956 2957 mutex_enter(QLOCK(q)); 2958 if (q->q_first == NULL) { 2959 /* Q empty, really fast fast-path */ 2960 bp->b_prev = NULL; 2961 bp->b_next = NULL; 2962 q->q_first = bp; 2963 q->q_last = bp; 2964 2965 } else if (last->b_datap->db_type == M_DATA && 2966 bp->b_datap->db_type == M_DATA) { 2967 /* 2968 * Last mblk_t chain and bp are both type M_DATA so 2969 * in-line putq() here, if the DBLK_UIOA state match 2970 * add bp to the end of the current last chain, else 2971 * start a new last chain with bp. 2972 */ 2973 if ((last->b_datap->db_flags & DBLK_UIOA) == 2974 (bp->b_datap->db_flags & DBLK_UIOA)) { 2975 /* Added to end */ 2976 while ((nbp = last->b_cont) != NULL) 2977 last = nbp; 2978 last->b_cont = bp; 2979 } else { 2980 /* New last */ 2981 ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 || 2982 msgdsize(bp) == sodp->sod_uioa.uioa_mbytes); 2983 last->b_next = bp; 2984 bp->b_next = NULL; 2985 bp->b_prev = last; 2986 q->q_last = bp; 2987 } 2988 } else { 2989 /* 2990 * Can't use q_last so just call putq(). 2991 */ 2992 mutex_exit(QLOCK(q)); 2993 2994 ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 || 2995 msgdsize(bp) == sodp->sod_uioa.uioa_mbytes); 2996 (void) putq(q, bp); 2997 return (0); 2998 } 2999 3000 /* Count bytes and mblk_t's */ 3001 do { 3002 bytecnt += MBLKL(bp); 3003 mblkcnt++; 3004 } while ((bp = bp->b_cont) != NULL); 3005 q->q_count += bytecnt; 3006 q->q_mblkcnt += mblkcnt; 3007 3008 /* Check for QFULL */ 3009 if (q->q_count >= q->q_hiwat + sodp->sod_want || 3010 q->q_mblkcnt >= q->q_hiwat) { 3011 q->q_flag |= QFULL; 3012 } 3013 3014 mutex_exit(QLOCK(q)); 3015 return (0); 3016 } 3017 3018 /* 3019 * Sockfs sodirect read wakeup. Called from a sodirect enabled transport 3020 * driver/module to indicate that read-side data is available. 3021 * 3022 * On return the sodirect_t.lock mutex will be exited so this must be the 3023 * last sodirect_t call to guarantee atomic access of *sodp. 3024 */ 3025 void 3026 sodwakeup(sodirect_t *sodp) 3027 { 3028 queue_t *q = sodp->sod_q; 3029 struct stdata *stp = (struct stdata *)q->q_ptr; 3030 3031 ASSERT(MUTEX_HELD(sodp->sod_lockp)); 3032 3033 if (stp->sd_flag & RSLEEP) { 3034 stp->sd_flag &= ~RSLEEP; 3035 cv_broadcast(&q->q_wait); 3036 } 3037 3038 if (stp->sd_rput_opt & SR_POLLIN) { 3039 stp->sd_rput_opt &= ~SR_POLLIN; 3040 mutex_exit(sodp->sod_lockp); 3041 pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM); 3042 } else 3043 mutex_exit(sodp->sod_lockp); 3044 } 3045