1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/inttypes.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/buf.h> 35 #include <sys/conf.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/sysmacros.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/user.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/esunddi.h> 49 #include <sys/flock.h> 50 #include <sys/modctl.h> 51 #include <sys/vtrace.h> 52 #include <sys/strsun.h> 53 #include <sys/cmn_err.h> 54 #include <sys/proc.h> 55 #include <sys/ddi.h> 56 #include <sys/kmem_impl.h> 57 58 #include <sys/suntpi.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/socketvar.h> 62 #include <netinet/in.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 68 #include <inet/kssl/ksslapi.h> 69 70 #include <c2/audit.h> 71 72 #include <sys/dcopy.h> 73 74 int so_default_version = SOV_SOCKSTREAM; 75 76 #ifdef DEBUG 77 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 78 int sockdebug = 0; 79 80 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 81 int sockprinterr = 0; 82 83 /* 84 * Set so_default_options to SO_DEBUG is all sockets should be created 85 * with SO_DEBUG set. This is needed to get debug printouts from the 86 * socket() call itself. 87 */ 88 int so_default_options = 0; 89 #endif /* DEBUG */ 90 91 #ifdef SOCK_TEST 92 /* 93 * Set to number of ticks to limit cv_waits for code coverage testing. 94 * Set to 1000 when SO_DEBUG is set to 2. 95 */ 96 clock_t sock_test_timelimit = 0; 97 #endif /* SOCK_TEST */ 98 99 /* 100 * For concurrency testing of e.g. opening /dev/ip which does not 101 * handle T_INFO_REQ messages. 102 */ 103 int so_no_tinfo = 0; 104 105 /* 106 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 107 * to simply ignore the T_CAPABILITY_REQ. 108 */ 109 clock_t sock_capability_timeout = 2; /* seconds */ 110 111 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 112 static void so_removehooks(struct sonode *so); 113 114 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 115 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 116 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 117 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 118 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 119 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 120 121 static int tlitosyserr(int terr); 122 123 /* 124 * Sodirect kmem_cache and put/wakeup functions. 125 */ 126 struct kmem_cache *socktpi_sod_cache; 127 static int sodput(sodirect_t *, mblk_t *); 128 static void sodwakeup(sodirect_t *); 129 130 /* 131 * Called by sockinit() when sockfs is loaded. 132 */ 133 int 134 sostr_init() 135 { 136 /* Allocate sodirect_t kmem_cache */ 137 socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache", 138 sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 139 140 return (0); 141 } 142 143 /* 144 * Convert a socket to a stream. Invoked when the illusory sockmod 145 * is popped from the stream. 146 * Change the stream head back to default operation without losing 147 * any messages (T_conn_ind's are moved to the stream head queue). 148 */ 149 int 150 so_sock2stream(struct sonode *so) 151 { 152 struct vnode *vp = SOTOV(so); 153 queue_t *rq; 154 mblk_t *mp; 155 int error = 0; 156 157 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 158 159 mutex_enter(&so->so_lock); 160 so_lock_single(so); 161 162 ASSERT(so->so_version != SOV_STREAM); 163 164 if (so->so_state & SS_DIRECT) { 165 mblk_t **mpp; 166 int rval; 167 168 /* 169 * Tell the transport below that sockmod is being popped 170 */ 171 mutex_exit(&so->so_lock); 172 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(), 173 &rval); 174 mutex_enter(&so->so_lock); 175 if (error != 0) { 176 dprintso(so, 0, ("so_sock2stream(%p): " 177 "_SIOCSOCKFALLBACK failed\n", (void *)so)); 178 goto exit; 179 } 180 so->so_state &= ~SS_DIRECT; 181 182 for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL; 183 mpp = &mp->b_next) { 184 struct T_conn_ind *conn_ind; 185 186 /* 187 * strsock_proto() has already verified the length of 188 * this message block. 189 */ 190 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 191 192 conn_ind = (struct T_conn_ind *)mp->b_rptr; 193 if (conn_ind->OPT_length == 0 && 194 conn_ind->OPT_offset == 0) 195 continue; 196 197 if (DB_REF(mp) > 1) { 198 mblk_t *newmp; 199 size_t length; 200 cred_t *cr; 201 202 /* 203 * Copy the message block because it is used 204 * elsewhere, too. 205 */ 206 length = MBLKL(mp); 207 newmp = soallocproto(length, _ALLOC_INTR); 208 if (newmp == NULL) { 209 error = EINTR; 210 goto exit; 211 } 212 bcopy(mp->b_rptr, newmp->b_wptr, length); 213 newmp->b_wptr += length; 214 newmp->b_next = mp->b_next; 215 cr = DB_CRED(mp); 216 if (cr != NULL) 217 mblk_setcred(newmp, cr); 218 DB_CPID(newmp) = DB_CPID(mp); 219 220 /* 221 * Link the new message block into the queue 222 * and free the old one. 223 */ 224 *mpp = newmp; 225 mp->b_next = NULL; 226 freemsg(mp); 227 228 mp = newmp; 229 conn_ind = (struct T_conn_ind *)mp->b_rptr; 230 } 231 232 /* 233 * Remove options added by TCP for accept fast-path. 234 */ 235 conn_ind->OPT_length = 0; 236 conn_ind->OPT_offset = 0; 237 } 238 } 239 240 so->so_version = SOV_STREAM; 241 so->so_priv = NULL; 242 243 /* 244 * Remove the hooks in the stream head to avoid queuing more 245 * packets in sockfs. 246 */ 247 mutex_exit(&so->so_lock); 248 so_removehooks(so); 249 mutex_enter(&so->so_lock); 250 251 /* 252 * Clear any state related to urgent data. Leave any T_EXDATA_IND 253 * on the queue - the behavior of urgent data after a switch is 254 * left undefined. 255 */ 256 so->so_error = so->so_delayed_error = 0; 257 freemsg(so->so_oobmsg); 258 so->so_oobmsg = NULL; 259 so->so_oobsigcnt = so->so_oobcnt = 0; 260 261 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 262 SS_HASCONNIND|SS_SAVEDEOR); 263 ASSERT(so_verify_oobstate(so)); 264 265 freemsg(so->so_ack_mp); 266 so->so_ack_mp = NULL; 267 268 /* 269 * Flush the T_DISCON_IND on so_discon_ind_mp. 270 */ 271 so_flush_discon_ind(so); 272 273 /* 274 * Move any queued T_CONN_IND messages to stream head queue. 275 */ 276 rq = RD(strvp2wq(vp)); 277 while ((mp = so->so_conn_ind_head) != NULL) { 278 so->so_conn_ind_head = mp->b_next; 279 mp->b_next = NULL; 280 if (so->so_conn_ind_head == NULL) { 281 ASSERT(so->so_conn_ind_tail == mp); 282 so->so_conn_ind_tail = NULL; 283 } 284 dprintso(so, 0, 285 ("so_sock2stream(%p): moving T_CONN_IND\n", 286 (void *)so)); 287 288 /* Drop lock across put() */ 289 mutex_exit(&so->so_lock); 290 put(rq, mp); 291 mutex_enter(&so->so_lock); 292 } 293 294 exit: 295 ASSERT(MUTEX_HELD(&so->so_lock)); 296 so_unlock_single(so, SOLOCKED); 297 mutex_exit(&so->so_lock); 298 return (error); 299 } 300 301 /* 302 * Covert a stream back to a socket. This is invoked when the illusory 303 * sockmod is pushed on a stream (where the stream was "created" by 304 * popping the illusory sockmod). 305 * This routine can not recreate the socket state (certain aspects of 306 * it like urgent data state and the bound/connected addresses for AF_UNIX 307 * sockets can not be recreated by asking the transport for information). 308 * Thus this routine implicitly assumes that the socket is in an initial 309 * state (as if it was just created). It flushes any messages queued on the 310 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 311 */ 312 void 313 so_stream2sock(struct sonode *so) 314 { 315 struct vnode *vp = SOTOV(so); 316 317 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 318 319 mutex_enter(&so->so_lock); 320 so_lock_single(so); 321 ASSERT(so->so_version == SOV_STREAM); 322 so->so_version = SOV_SOCKSTREAM; 323 so->so_pushcnt = 0; 324 mutex_exit(&so->so_lock); 325 326 /* 327 * Set a permenent error to force any thread in sorecvmsg to 328 * return (and drop SOREADLOCKED). Clear the error once 329 * we have SOREADLOCKED. 330 * This makes a read sleeping during the I_PUSH of sockmod return 331 * EIO. 332 */ 333 strsetrerror(SOTOV(so), EIO, 1, NULL); 334 335 /* 336 * Get the read lock before flushing data to avoid 337 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 338 */ 339 mutex_enter(&so->so_lock); 340 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 341 mutex_exit(&so->so_lock); 342 343 strsetrerror(SOTOV(so), 0, 0, NULL); 344 so_installhooks(so); 345 346 /* 347 * Flush everything on the read queue. 348 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 349 * remain; those types of messages would confuse sockfs. 350 */ 351 strflushrq(vp, FLUSHALL); 352 mutex_enter(&so->so_lock); 353 354 /* 355 * Flush the T_DISCON_IND on so_discon_ind_mp. 356 */ 357 so_flush_discon_ind(so); 358 so_unlock_read(so); /* Clear SOREADLOCKED */ 359 360 so_unlock_single(so, SOLOCKED); 361 mutex_exit(&so->so_lock); 362 } 363 364 /* 365 * Install the hooks in the stream head. 366 */ 367 void 368 so_installhooks(struct sonode *so) 369 { 370 struct vnode *vp = SOTOV(so); 371 372 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 373 strsock_proto, strsock_misc); 374 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 375 } 376 377 /* 378 * Remove the hooks in the stream head. 379 */ 380 static void 381 so_removehooks(struct sonode *so) 382 { 383 struct vnode *vp = SOTOV(so); 384 385 strsetrputhooks(vp, 0, NULL, NULL); 386 strsetwputhooks(vp, 0, STRTIMOUT); 387 /* 388 * Leave read behavior as it would have been for a normal 389 * stream i.e. a read of an M_PROTO will fail. 390 */ 391 } 392 393 /* 394 * Initialize the streams side of a socket including 395 * T_info_req/ack processing. If tso is not NULL its values are used thereby 396 * avoiding the T_INFO_REQ. 397 */ 398 int 399 so_strinit(struct sonode *so, struct sonode *tso) 400 { 401 struct vnode *vp = SOTOV(so); 402 struct stdata *stp; 403 mblk_t *mp; 404 int error; 405 406 dprintso(so, 1, ("so_strinit(%p)\n", (void *)so)); 407 408 /* Preallocate an unbind_req message */ 409 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 410 mutex_enter(&so->so_lock); 411 so->so_unbind_mp = mp; 412 #ifdef DEBUG 413 so->so_options = so_default_options; 414 #endif /* DEBUG */ 415 mutex_exit(&so->so_lock); 416 417 so_installhooks(so); 418 419 /* 420 * The T_CAPABILITY_REQ should be the first message sent down because 421 * at least TCP has a fast-path for this which avoids timeouts while 422 * waiting for the T_CAPABILITY_ACK under high system load. 423 */ 424 if (tso == NULL) { 425 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 426 if (error) 427 return (error); 428 } else { 429 mutex_enter(&so->so_lock); 430 so->so_tsdu_size = tso->so_tsdu_size; 431 so->so_etsdu_size = tso->so_etsdu_size; 432 so->so_addr_size = tso->so_addr_size; 433 so->so_opt_size = tso->so_opt_size; 434 so->so_tidu_size = tso->so_tidu_size; 435 so->so_serv_type = tso->so_serv_type; 436 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 437 mutex_exit(&so->so_lock); 438 439 /* the following do_tcapability may update so->so_mode */ 440 if ((tso->so_serv_type != T_CLTS) && 441 !(tso->so_state & SS_DIRECT)) { 442 error = do_tcapability(so, TC1_ACCEPTOR_ID); 443 if (error) 444 return (error); 445 } 446 } 447 /* 448 * If the addr_size is 0 we treat it as already bound 449 * and connected. This is used by the routing socket. 450 * We set the addr_size to something to allocate a the address 451 * structures. 452 */ 453 if (so->so_addr_size == 0) { 454 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 455 /* Address size can vary with address families. */ 456 if (so->so_family == AF_INET6) 457 so->so_addr_size = 458 (t_scalar_t)sizeof (struct sockaddr_in6); 459 else 460 so->so_addr_size = 461 (t_scalar_t)sizeof (struct sockaddr_in); 462 ASSERT(so->so_unbind_mp); 463 } 464 /* 465 * Allocate the addresses. 466 */ 467 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 468 ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0); 469 so->so_laddr_maxlen = so->so_faddr_maxlen = 470 P2ROUNDUP(so->so_addr_size, KMEM_ALIGN); 471 so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP); 472 so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa 473 + so->so_laddr_maxlen); 474 475 if (so->so_family == AF_UNIX) { 476 /* 477 * Initialize AF_UNIX related fields. 478 */ 479 bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr)); 480 bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr)); 481 } 482 483 stp = vp->v_stream; 484 /* 485 * Have to keep minpsz at zero in order to allow write/send of zero 486 * bytes. 487 */ 488 mutex_enter(&stp->sd_lock); 489 if (stp->sd_qn_minpsz == 1) 490 stp->sd_qn_minpsz = 0; 491 mutex_exit(&stp->sd_lock); 492 493 /* 494 * If sodirect capable allocate and initialize sodirect_t. 495 * Note, SS_SODIRECT is set in socktpi_open(). 496 */ 497 if (so->so_state & SS_SODIRECT) { 498 sodirect_t *sodp; 499 500 ASSERT(so->so_direct == NULL); 501 502 sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP); 503 sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT; 504 sodp->sod_want = 0; 505 sodp->sod_q = RD(stp->sd_wrq); 506 sodp->sod_enqueue = sodput; 507 sodp->sod_wakeup = sodwakeup; 508 sodp->sod_uioafh = NULL; 509 sodp->sod_uioaft = NULL; 510 sodp->sod_lock = &stp->sd_lock; 511 /* 512 * Remainder of the sod_uioa members are left uninitialized 513 * but will be initialized later by uioainit() before uioa 514 * is enabled. 515 */ 516 sodp->sod_uioa.uioa_state = UIOA_ALLOC; 517 so->so_direct = sodp; 518 stp->sd_sodirect = sodp; 519 } 520 521 return (0); 522 } 523 524 static void 525 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 526 { 527 so->so_tsdu_size = tia->TSDU_size; 528 so->so_etsdu_size = tia->ETSDU_size; 529 so->so_addr_size = tia->ADDR_size; 530 so->so_opt_size = tia->OPT_size; 531 so->so_tidu_size = tia->TIDU_size; 532 so->so_serv_type = tia->SERV_type; 533 switch (tia->CURRENT_state) { 534 case TS_UNBND: 535 break; 536 case TS_IDLE: 537 so->so_state |= SS_ISBOUND; 538 so->so_laddr_len = 0; 539 so->so_state &= ~SS_LADDR_VALID; 540 break; 541 case TS_DATA_XFER: 542 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 543 so->so_laddr_len = 0; 544 so->so_faddr_len = 0; 545 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 546 break; 547 } 548 549 /* 550 * Heuristics for determining the socket mode flags 551 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 552 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 553 * from the info ack. 554 */ 555 if (so->so_serv_type == T_CLTS) { 556 so->so_mode |= SM_ATOMIC | SM_ADDR; 557 } else { 558 so->so_mode |= SM_CONNREQUIRED; 559 if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2) 560 so->so_mode |= SM_EXDATA; 561 } 562 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 563 /* Semantics are to discard tail end of messages */ 564 so->so_mode |= SM_ATOMIC; 565 } 566 if (so->so_family == AF_UNIX) { 567 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 568 if (so->so_addr_size == -1) { 569 /* MAXPATHLEN + soun_family + nul termination */ 570 so->so_addr_size = (t_scalar_t)(MAXPATHLEN + 571 sizeof (short) + 1); 572 } 573 if (so->so_type == SOCK_STREAM) { 574 /* 575 * Make it into a byte-stream transport. 576 * SOCK_SEQPACKET sockets are unchanged. 577 */ 578 so->so_tsdu_size = 0; 579 } 580 } else if (so->so_addr_size == -1) { 581 /* 582 * Logic extracted from sockmod - have to pick some max address 583 * length in order to preallocate the addresses. 584 */ 585 so->so_addr_size = SOA_DEFSIZE; 586 } 587 if (so->so_tsdu_size == 0) 588 so->so_mode |= SM_BYTESTREAM; 589 } 590 591 static int 592 check_tinfo(struct sonode *so) 593 { 594 /* Consistency checks */ 595 if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) { 596 eprintso(so, ("service type and socket type mismatch\n")); 597 eprintsoline(so, EPROTO); 598 return (EPROTO); 599 } 600 if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) { 601 eprintso(so, ("service type and socket type mismatch\n")); 602 eprintsoline(so, EPROTO); 603 return (EPROTO); 604 } 605 if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) { 606 eprintso(so, ("service type and socket type mismatch\n")); 607 eprintsoline(so, EPROTO); 608 return (EPROTO); 609 } 610 if (so->so_family == AF_INET && 611 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 612 eprintso(so, 613 ("AF_INET must have sockaddr_in address length. Got %d\n", 614 so->so_addr_size)); 615 eprintsoline(so, EMSGSIZE); 616 return (EMSGSIZE); 617 } 618 if (so->so_family == AF_INET6 && 619 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 620 eprintso(so, 621 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 622 so->so_addr_size)); 623 eprintsoline(so, EMSGSIZE); 624 return (EMSGSIZE); 625 } 626 627 dprintso(so, 1, ( 628 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 629 so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size, 630 so->so_addr_size, so->so_opt_size, 631 so->so_tidu_size)); 632 dprintso(so, 1, ("tinfo: so_state %s\n", 633 pr_state(so->so_state, so->so_mode))); 634 return (0); 635 } 636 637 /* 638 * Send down T_info_req and wait for the ack. 639 * Record interesting T_info_ack values in the sonode. 640 */ 641 static int 642 do_tinfo(struct sonode *so) 643 { 644 struct T_info_req tir; 645 mblk_t *mp; 646 int error; 647 648 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 649 650 if (so_no_tinfo) { 651 so->so_addr_size = 0; 652 return (0); 653 } 654 655 dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so)); 656 657 /* Send T_INFO_REQ */ 658 tir.PRIM_type = T_INFO_REQ; 659 mp = soallocproto1(&tir, sizeof (tir), 660 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 661 _ALLOC_INTR); 662 if (mp == NULL) { 663 eprintsoline(so, ENOBUFS); 664 return (ENOBUFS); 665 } 666 /* T_INFO_REQ has to be M_PCPROTO */ 667 DB_TYPE(mp) = M_PCPROTO; 668 669 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 670 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 671 if (error) { 672 eprintsoline(so, error); 673 return (error); 674 } 675 mutex_enter(&so->so_lock); 676 /* Wait for T_INFO_ACK */ 677 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 678 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 679 mutex_exit(&so->so_lock); 680 eprintsoline(so, error); 681 return (error); 682 } 683 684 ASSERT(mp); 685 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 686 mutex_exit(&so->so_lock); 687 freemsg(mp); 688 return (check_tinfo(so)); 689 } 690 691 /* 692 * Send down T_capability_req and wait for the ack. 693 * Record interesting T_capability_ack values in the sonode. 694 */ 695 static int 696 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 697 { 698 struct T_capability_req tcr; 699 struct T_capability_ack *tca; 700 mblk_t *mp; 701 int error; 702 703 ASSERT(cap_bits1 != 0); 704 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 705 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 706 707 if (so->so_provinfo->tpi_capability == PI_NO) 708 return (do_tinfo(so)); 709 710 if (so_no_tinfo) { 711 so->so_addr_size = 0; 712 if ((cap_bits1 &= ~TC1_INFO) == 0) 713 return (0); 714 } 715 716 dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so)); 717 718 /* Send T_CAPABILITY_REQ */ 719 tcr.PRIM_type = T_CAPABILITY_REQ; 720 tcr.CAP_bits1 = cap_bits1; 721 mp = soallocproto1(&tcr, sizeof (tcr), 722 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 723 _ALLOC_INTR); 724 if (mp == NULL) { 725 eprintsoline(so, ENOBUFS); 726 return (ENOBUFS); 727 } 728 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 729 DB_TYPE(mp) = M_PCPROTO; 730 731 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 732 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 733 if (error) { 734 eprintsoline(so, error); 735 return (error); 736 } 737 mutex_enter(&so->so_lock); 738 /* Wait for T_CAPABILITY_ACK */ 739 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 740 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 741 mutex_exit(&so->so_lock); 742 PI_PROVLOCK(so->so_provinfo); 743 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) 744 so->so_provinfo->tpi_capability = PI_NO; 745 PI_PROVUNLOCK(so->so_provinfo); 746 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 747 if (cap_bits1 & TC1_INFO) { 748 /* 749 * If the T_CAPABILITY_REQ timed out and then a 750 * T_INFO_REQ gets a protocol error, most likely 751 * the capability was slow (vs. unsupported). Return 752 * ENOSR for this case as a best guess. 753 */ 754 if (error == ETIME) { 755 return ((error = do_tinfo(so)) == EPROTO ? 756 ENOSR : error); 757 } 758 return (do_tinfo(so)); 759 } 760 return (0); 761 } 762 763 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) { 764 PI_PROVLOCK(so->so_provinfo); 765 so->so_provinfo->tpi_capability = PI_YES; 766 PI_PROVUNLOCK(so->so_provinfo); 767 } 768 769 ASSERT(mp); 770 tca = (struct T_capability_ack *)mp->b_rptr; 771 772 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 773 774 cap_bits1 = tca->CAP_bits1; 775 776 if (cap_bits1 & TC1_ACCEPTOR_ID) { 777 so->so_acceptor_id = tca->ACCEPTOR_id; 778 so->so_mode |= SM_ACCEPTOR_ID; 779 } 780 781 if (cap_bits1 & TC1_INFO) 782 copy_tinfo(so, &tca->INFO_ack); 783 784 mutex_exit(&so->so_lock); 785 freemsg(mp); 786 787 if (cap_bits1 & TC1_INFO) 788 return (check_tinfo(so)); 789 790 return (0); 791 } 792 793 /* 794 * Retrieve and clear the socket error. 795 */ 796 int 797 sogeterr(struct sonode *so) 798 { 799 int error; 800 801 ASSERT(MUTEX_HELD(&so->so_lock)); 802 803 error = so->so_error; 804 so->so_error = 0; 805 806 return (error); 807 } 808 809 /* 810 * This routine is registered with the stream head to retrieve read 811 * side errors. 812 * It does not clear the socket error for a peeking read side operation. 813 * It the error is to be cleared it sets *clearerr. 814 */ 815 int 816 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 817 { 818 struct sonode *so = VTOSO(vp); 819 int error; 820 821 mutex_enter(&so->so_lock); 822 if (ispeek) { 823 error = so->so_error; 824 *clearerr = 0; 825 } else { 826 error = so->so_error; 827 so->so_error = 0; 828 *clearerr = 1; 829 } 830 mutex_exit(&so->so_lock); 831 return (error); 832 } 833 834 /* 835 * This routine is registered with the stream head to retrieve write 836 * side errors. 837 * It does not clear the socket error for a peeking read side operation. 838 * It the error is to be cleared it sets *clearerr. 839 */ 840 int 841 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 842 { 843 struct sonode *so = VTOSO(vp); 844 int error; 845 846 mutex_enter(&so->so_lock); 847 if (so->so_state & SS_CANTSENDMORE) { 848 error = EPIPE; 849 *clearerr = 0; 850 } else { 851 error = so->so_error; 852 if (ispeek) { 853 *clearerr = 0; 854 } else { 855 so->so_error = 0; 856 *clearerr = 1; 857 } 858 } 859 mutex_exit(&so->so_lock); 860 return (error); 861 } 862 863 /* 864 * Set a nonpersistent read and write error on the socket. 865 * Used when there is a T_uderror_ind for a connected socket. 866 * The caller also needs to call strsetrerror and strsetwerror 867 * after dropping the lock. 868 */ 869 void 870 soseterror(struct sonode *so, int error) 871 { 872 ASSERT(error != 0); 873 874 ASSERT(MUTEX_HELD(&so->so_lock)); 875 so->so_error = (ushort_t)error; 876 } 877 878 void 879 soisconnecting(struct sonode *so) 880 { 881 ASSERT(MUTEX_HELD(&so->so_lock)); 882 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 883 so->so_state |= SS_ISCONNECTING; 884 cv_broadcast(&so->so_state_cv); 885 } 886 887 void 888 soisconnected(struct sonode *so) 889 { 890 ASSERT(MUTEX_HELD(&so->so_lock)); 891 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 892 so->so_state |= SS_ISCONNECTED; 893 cv_broadcast(&so->so_state_cv); 894 } 895 896 /* 897 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 898 */ 899 void 900 soisdisconnected(struct sonode *so, int error) 901 { 902 ASSERT(MUTEX_HELD(&so->so_lock)); 903 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING| 904 SS_LADDR_VALID|SS_FADDR_VALID); 905 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 906 so->so_error = (ushort_t)error; 907 if (so->so_peercred != NULL) { 908 crfree(so->so_peercred); 909 so->so_peercred = NULL; 910 } 911 cv_broadcast(&so->so_state_cv); 912 } 913 914 /* 915 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 916 * Does not affect write side. 917 * The caller also has to call strsetrerror. 918 */ 919 static void 920 sobreakconn(struct sonode *so, int error) 921 { 922 ASSERT(MUTEX_HELD(&so->so_lock)); 923 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 924 so->so_error = (ushort_t)error; 925 cv_broadcast(&so->so_state_cv); 926 } 927 928 /* 929 * Can no longer send. 930 * Caller must also call strsetwerror. 931 * 932 * We mark the peer address as no longer valid for getpeername, but 933 * leave it around for so_unix_close to notify the peer (that 934 * transport has no addressing held at that layer). 935 */ 936 void 937 socantsendmore(struct sonode *so) 938 { 939 ASSERT(MUTEX_HELD(&so->so_lock)); 940 so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE; 941 cv_broadcast(&so->so_state_cv); 942 } 943 944 /* 945 * The caller must call strseteof(,1) as well as this routine 946 * to change the socket state. 947 */ 948 void 949 socantrcvmore(struct sonode *so) 950 { 951 ASSERT(MUTEX_HELD(&so->so_lock)); 952 so->so_state |= SS_CANTRCVMORE; 953 cv_broadcast(&so->so_state_cv); 954 } 955 956 /* 957 * The caller has sent down a "request_prim" primitive and wants to wait for 958 * an ack ("ack_prim") or an T_ERROR_ACK for it. 959 * The specified "ack_prim" can be a T_OK_ACK. 960 * 961 * Assumes that all the TPI acks are M_PCPROTO messages. 962 * 963 * Note that the socket is single-threaded (using so_lock_single) 964 * for all operations that generate TPI ack messages. Since 965 * only TPI ack messages are M_PCPROTO we should never receive 966 * anything except either the ack we are expecting or a T_ERROR_ACK 967 * for the same primitive. 968 */ 969 int 970 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 971 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 972 { 973 mblk_t *mp; 974 union T_primitives *tpr; 975 int error; 976 977 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 978 (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait)); 979 980 ASSERT(MUTEX_HELD(&so->so_lock)); 981 982 error = sowaitack(so, &mp, wait); 983 if (error) 984 return (error); 985 986 dprintso(so, 1, ("got msg %p\n", (void *)mp)); 987 if (DB_TYPE(mp) != M_PCPROTO || 988 MBLKL(mp) < sizeof (tpr->type)) { 989 freemsg(mp); 990 eprintsoline(so, EPROTO); 991 return (EPROTO); 992 } 993 tpr = (union T_primitives *)mp->b_rptr; 994 /* 995 * Did we get the primitive that we were asking for? 996 * For T_OK_ACK we also check that it matches the request primitive. 997 */ 998 if (tpr->type == ack_prim && 999 (ack_prim != T_OK_ACK || 1000 tpr->ok_ack.CORRECT_prim == request_prim)) { 1001 if (MBLKL(mp) >= (ssize_t)min_size) { 1002 /* Found what we are looking for */ 1003 *mpp = mp; 1004 return (0); 1005 } 1006 /* Too short */ 1007 freemsg(mp); 1008 eprintsoline(so, EPROTO); 1009 return (EPROTO); 1010 } 1011 1012 if (tpr->type == T_ERROR_ACK && 1013 tpr->error_ack.ERROR_prim == request_prim) { 1014 /* Error to the primitive we were looking for */ 1015 if (tpr->error_ack.TLI_error == TSYSERR) { 1016 error = tpr->error_ack.UNIX_error; 1017 } else { 1018 error = tlitosyserr(tpr->error_ack.TLI_error); 1019 } 1020 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 1021 tpr->error_ack.ERROR_prim, 1022 tpr->error_ack.TLI_error, 1023 tpr->error_ack.UNIX_error, 1024 error)); 1025 freemsg(mp); 1026 return (error); 1027 } 1028 /* 1029 * Wrong primitive or T_ERROR_ACK for the wrong primitive 1030 */ 1031 #ifdef DEBUG 1032 if (tpr->type == T_ERROR_ACK) { 1033 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 1034 tpr->error_ack.ERROR_prim, 1035 tpr->error_ack.TLI_error, 1036 tpr->error_ack.UNIX_error)); 1037 } else if (tpr->type == T_OK_ACK) { 1038 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 1039 tpr->ok_ack.CORRECT_prim, 1040 ack_prim, request_prim)); 1041 } else { 1042 dprintso(so, 0, 1043 ("unexpected primitive %d, expected %d for %d\n", 1044 tpr->type, ack_prim, request_prim)); 1045 } 1046 #endif /* DEBUG */ 1047 1048 freemsg(mp); 1049 eprintsoline(so, EPROTO); 1050 return (EPROTO); 1051 } 1052 1053 /* 1054 * Wait for a T_OK_ACK for the specified primitive. 1055 */ 1056 int 1057 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1058 { 1059 mblk_t *mp; 1060 int error; 1061 1062 error = sowaitprim(so, request_prim, T_OK_ACK, 1063 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1064 if (error) 1065 return (error); 1066 freemsg(mp); 1067 return (0); 1068 } 1069 1070 /* 1071 * Queue a received TPI ack message on so_ack_mp. 1072 */ 1073 void 1074 soqueueack(struct sonode *so, mblk_t *mp) 1075 { 1076 if (DB_TYPE(mp) != M_PCPROTO) { 1077 zcmn_err(getzoneid(), CE_WARN, 1078 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1079 *(t_scalar_t *)mp->b_rptr); 1080 freemsg(mp); 1081 return; 1082 } 1083 1084 mutex_enter(&so->so_lock); 1085 if (so->so_ack_mp != NULL) { 1086 dprintso(so, 1, ("so_ack_mp already set\n")); 1087 freemsg(so->so_ack_mp); 1088 so->so_ack_mp = NULL; 1089 } 1090 so->so_ack_mp = mp; 1091 cv_broadcast(&so->so_ack_cv); 1092 mutex_exit(&so->so_lock); 1093 } 1094 1095 /* 1096 * Wait for a TPI ack ignoring signals and errors. 1097 */ 1098 int 1099 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1100 { 1101 ASSERT(MUTEX_HELD(&so->so_lock)); 1102 1103 while (so->so_ack_mp == NULL) { 1104 #ifdef SOCK_TEST 1105 if (wait == 0 && sock_test_timelimit != 0) 1106 wait = sock_test_timelimit; 1107 #endif 1108 if (wait != 0) { 1109 /* 1110 * Only wait for the time limit. 1111 */ 1112 clock_t now; 1113 1114 time_to_wait(&now, wait); 1115 if (cv_timedwait(&so->so_ack_cv, &so->so_lock, 1116 now) == -1) { 1117 eprintsoline(so, ETIME); 1118 return (ETIME); 1119 } 1120 } 1121 else 1122 cv_wait(&so->so_ack_cv, &so->so_lock); 1123 } 1124 *mpp = so->so_ack_mp; 1125 #ifdef DEBUG 1126 { 1127 union T_primitives *tpr; 1128 mblk_t *mp = *mpp; 1129 1130 tpr = (union T_primitives *)mp->b_rptr; 1131 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1132 ASSERT(tpr->type == T_OK_ACK || 1133 tpr->type == T_ERROR_ACK || 1134 tpr->type == T_BIND_ACK || 1135 tpr->type == T_CAPABILITY_ACK || 1136 tpr->type == T_INFO_ACK || 1137 tpr->type == T_OPTMGMT_ACK); 1138 } 1139 #endif /* DEBUG */ 1140 so->so_ack_mp = NULL; 1141 return (0); 1142 } 1143 1144 /* 1145 * Queue a received T_CONN_IND message on so_conn_ind_head/tail. 1146 */ 1147 void 1148 soqueueconnind(struct sonode *so, mblk_t *mp) 1149 { 1150 if (DB_TYPE(mp) != M_PROTO) { 1151 zcmn_err(getzoneid(), CE_WARN, 1152 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1153 freemsg(mp); 1154 return; 1155 } 1156 1157 mutex_enter(&so->so_lock); 1158 ASSERT(mp->b_next == NULL); 1159 if (so->so_conn_ind_head == NULL) { 1160 so->so_conn_ind_head = mp; 1161 so->so_state |= SS_HASCONNIND; 1162 } else { 1163 ASSERT(so->so_state & SS_HASCONNIND); 1164 ASSERT(so->so_conn_ind_tail->b_next == NULL); 1165 so->so_conn_ind_tail->b_next = mp; 1166 } 1167 so->so_conn_ind_tail = mp; 1168 /* Wakeup a single consumer of the T_CONN_IND */ 1169 cv_signal(&so->so_connind_cv); 1170 mutex_exit(&so->so_lock); 1171 } 1172 1173 /* 1174 * Wait for a T_CONN_IND. 1175 * Don't wait if nonblocking. 1176 * Accept signals and socket errors. 1177 */ 1178 int 1179 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1180 { 1181 mblk_t *mp; 1182 int error = 0; 1183 1184 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1185 mutex_enter(&so->so_lock); 1186 check_error: 1187 if (so->so_error) { 1188 error = sogeterr(so); 1189 if (error) { 1190 mutex_exit(&so->so_lock); 1191 return (error); 1192 } 1193 } 1194 1195 if (so->so_conn_ind_head == NULL) { 1196 if (fmode & (FNDELAY|FNONBLOCK)) { 1197 error = EWOULDBLOCK; 1198 goto done; 1199 } 1200 if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) { 1201 error = EINTR; 1202 goto done; 1203 } 1204 goto check_error; 1205 } 1206 mp = so->so_conn_ind_head; 1207 so->so_conn_ind_head = mp->b_next; 1208 mp->b_next = NULL; 1209 if (so->so_conn_ind_head == NULL) { 1210 ASSERT(so->so_conn_ind_tail == mp); 1211 so->so_conn_ind_tail = NULL; 1212 so->so_state &= ~SS_HASCONNIND; 1213 } 1214 *mpp = mp; 1215 done: 1216 mutex_exit(&so->so_lock); 1217 return (error); 1218 } 1219 1220 /* 1221 * Flush a T_CONN_IND matching the sequence number from the list. 1222 * Return zero if found; non-zero otherwise. 1223 * This is called very infrequently thus it is ok to do a linear search. 1224 */ 1225 int 1226 soflushconnind(struct sonode *so, t_scalar_t seqno) 1227 { 1228 mblk_t *prevmp, *mp; 1229 struct T_conn_ind *tci; 1230 1231 mutex_enter(&so->so_lock); 1232 for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL; 1233 prevmp = mp, mp = mp->b_next) { 1234 tci = (struct T_conn_ind *)mp->b_rptr; 1235 if (tci->SEQ_number == seqno) { 1236 dprintso(so, 1, 1237 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1238 /* Deleting last? */ 1239 if (so->so_conn_ind_tail == mp) { 1240 so->so_conn_ind_tail = prevmp; 1241 } 1242 if (prevmp == NULL) { 1243 /* Deleting first */ 1244 so->so_conn_ind_head = mp->b_next; 1245 } else { 1246 prevmp->b_next = mp->b_next; 1247 } 1248 mp->b_next = NULL; 1249 if (so->so_conn_ind_head == NULL) { 1250 ASSERT(so->so_conn_ind_tail == NULL); 1251 so->so_state &= ~SS_HASCONNIND; 1252 } else { 1253 ASSERT(so->so_conn_ind_tail != NULL); 1254 } 1255 so->so_error = ECONNABORTED; 1256 mutex_exit(&so->so_lock); 1257 1258 /* 1259 * T_KSSL_PROXY_CONN_IND may carry a handle for 1260 * an SSL context, and needs to be released. 1261 */ 1262 if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) && 1263 (mp->b_cont != NULL)) { 1264 kssl_ctx_t kssl_ctx; 1265 1266 ASSERT(MBLKL(mp->b_cont) == 1267 sizeof (kssl_ctx_t)); 1268 kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr); 1269 kssl_release_ctx(kssl_ctx); 1270 } 1271 freemsg(mp); 1272 return (0); 1273 } 1274 } 1275 mutex_exit(&so->so_lock); 1276 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1277 return (-1); 1278 } 1279 1280 /* 1281 * Wait until the socket is connected or there is an error. 1282 * fmode should contain any nonblocking flags. nosig should be 1283 * set if the caller does not want the wait to be interrupted by a signal. 1284 */ 1285 int 1286 sowaitconnected(struct sonode *so, int fmode, int nosig) 1287 { 1288 int error; 1289 1290 ASSERT(MUTEX_HELD(&so->so_lock)); 1291 1292 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1293 SS_ISCONNECTING && so->so_error == 0) { 1294 1295 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", 1296 (void *)so)); 1297 if (fmode & (FNDELAY|FNONBLOCK)) 1298 return (EINPROGRESS); 1299 1300 if (nosig) 1301 cv_wait(&so->so_state_cv, &so->so_lock); 1302 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1303 /* 1304 * Return EINTR and let the application use 1305 * nonblocking techniques for detecting when 1306 * the connection has been established. 1307 */ 1308 return (EINTR); 1309 } 1310 dprintso(so, 1, ("awoken on %p\n", (void *)so)); 1311 } 1312 1313 if (so->so_error != 0) { 1314 error = sogeterr(so); 1315 ASSERT(error != 0); 1316 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1317 return (error); 1318 } 1319 if (!(so->so_state & SS_ISCONNECTED)) { 1320 /* 1321 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1322 * zero errno. Or another thread could have consumed so_error 1323 * e.g. by calling read. 1324 */ 1325 error = ECONNREFUSED; 1326 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1327 return (error); 1328 } 1329 return (0); 1330 } 1331 1332 1333 /* 1334 * Handle the signal generation aspect of urgent data. 1335 */ 1336 static void 1337 so_oob_sig(struct sonode *so, int extrasig, 1338 strsigset_t *signals, strpollset_t *pollwakeups) 1339 { 1340 ASSERT(MUTEX_HELD(&so->so_lock)); 1341 1342 ASSERT(so_verify_oobstate(so)); 1343 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1344 if (so->so_oobsigcnt > so->so_oobcnt) { 1345 /* 1346 * Signal has already been generated once for this 1347 * urgent "event". However, since TCP can receive updated 1348 * urgent pointers we still generate a signal. 1349 */ 1350 ASSERT(so->so_state & SS_OOBPEND); 1351 if (extrasig) { 1352 *signals |= S_RDBAND; 1353 *pollwakeups |= POLLRDBAND; 1354 } 1355 return; 1356 } 1357 1358 so->so_oobsigcnt++; 1359 ASSERT(so->so_oobsigcnt > 0); /* Wraparound */ 1360 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1361 1362 /* 1363 * Record (for select/poll) that urgent data is pending. 1364 */ 1365 so->so_state |= SS_OOBPEND; 1366 /* 1367 * New urgent data on the way so forget about any old 1368 * urgent data. 1369 */ 1370 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1371 if (so->so_oobmsg != NULL) { 1372 dprintso(so, 1, ("sock: discarding old oob\n")); 1373 freemsg(so->so_oobmsg); 1374 so->so_oobmsg = NULL; 1375 } 1376 *signals |= S_RDBAND; 1377 *pollwakeups |= POLLRDBAND; 1378 ASSERT(so_verify_oobstate(so)); 1379 } 1380 1381 /* 1382 * Handle the processing of the T_EXDATA_IND with urgent data. 1383 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1384 */ 1385 /* ARGSUSED2 */ 1386 static mblk_t * 1387 so_oob_exdata(struct sonode *so, mblk_t *mp, 1388 strsigset_t *signals, strpollset_t *pollwakeups) 1389 { 1390 ASSERT(MUTEX_HELD(&so->so_lock)); 1391 1392 ASSERT(so_verify_oobstate(so)); 1393 1394 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1395 1396 so->so_oobcnt++; 1397 ASSERT(so->so_oobcnt > 0); /* wraparound? */ 1398 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1399 1400 /* 1401 * Set MSGMARK for SIOCATMARK. 1402 */ 1403 mp->b_flag |= MSGMARK; 1404 1405 ASSERT(so_verify_oobstate(so)); 1406 return (mp); 1407 } 1408 1409 /* 1410 * Handle the processing of the actual urgent data. 1411 * Returns the data mblk if it should be queued on the read queue. 1412 */ 1413 static mblk_t * 1414 so_oob_data(struct sonode *so, mblk_t *mp, 1415 strsigset_t *signals, strpollset_t *pollwakeups) 1416 { 1417 ASSERT(MUTEX_HELD(&so->so_lock)); 1418 1419 ASSERT(so_verify_oobstate(so)); 1420 1421 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1422 ASSERT(mp != NULL); 1423 /* 1424 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1425 * Otherwise we store it in so_oobmsg. 1426 */ 1427 ASSERT(so->so_oobmsg == NULL); 1428 if (so->so_options & SO_OOBINLINE) { 1429 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1430 *signals |= S_INPUT | S_RDNORM; 1431 } else { 1432 *pollwakeups |= POLLRDBAND; 1433 so->so_state |= SS_HAVEOOBDATA; 1434 so->so_oobmsg = mp; 1435 mp = NULL; 1436 } 1437 ASSERT(so_verify_oobstate(so)); 1438 return (mp); 1439 } 1440 1441 /* 1442 * Caller must hold the mutex. 1443 * For delayed processing, save the T_DISCON_IND received 1444 * from below on so_discon_ind_mp. 1445 * When the message is processed the framework will call: 1446 * (*func)(so, mp); 1447 */ 1448 static void 1449 so_save_discon_ind(struct sonode *so, 1450 mblk_t *mp, 1451 void (*func)(struct sonode *so, mblk_t *)) 1452 { 1453 ASSERT(MUTEX_HELD(&so->so_lock)); 1454 1455 /* 1456 * Discard new T_DISCON_IND if we have already received another. 1457 * Currently the earlier message can either be on so_discon_ind_mp 1458 * or being processed. 1459 */ 1460 if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1461 zcmn_err(getzoneid(), CE_WARN, 1462 "sockfs: received unexpected additional T_DISCON_IND\n"); 1463 freemsg(mp); 1464 return; 1465 } 1466 mp->b_prev = (mblk_t *)func; 1467 mp->b_next = NULL; 1468 so->so_discon_ind_mp = mp; 1469 } 1470 1471 /* 1472 * Caller must hold the mutex and make sure that either SOLOCKED 1473 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1474 * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp. 1475 * Need to ensure that strsock_proto() will not end up sleeping for 1476 * SOASYNC_UNBIND, while executing this function. 1477 */ 1478 void 1479 so_drain_discon_ind(struct sonode *so) 1480 { 1481 mblk_t *bp; 1482 void (*func)(struct sonode *so, mblk_t *); 1483 1484 ASSERT(MUTEX_HELD(&so->so_lock)); 1485 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1486 1487 /* Process T_DISCON_IND on so_discon_ind_mp */ 1488 if ((bp = so->so_discon_ind_mp) != NULL) { 1489 so->so_discon_ind_mp = NULL; 1490 func = (void (*)())bp->b_prev; 1491 bp->b_prev = NULL; 1492 1493 /* 1494 * This (*func) is supposed to generate a message downstream 1495 * and we need to have a flag set until the corresponding 1496 * upstream message reaches stream head. 1497 * When processing T_DISCON_IND in strsock_discon_ind 1498 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1499 * drop the flag after we get the ACK in strsock_proto. 1500 */ 1501 (void) (*func)(so, bp); 1502 } 1503 } 1504 1505 /* 1506 * Caller must hold the mutex. 1507 * Remove the T_DISCON_IND on so_discon_ind_mp. 1508 */ 1509 void 1510 so_flush_discon_ind(struct sonode *so) 1511 { 1512 mblk_t *bp; 1513 1514 ASSERT(MUTEX_HELD(&so->so_lock)); 1515 1516 /* 1517 * Remove T_DISCON_IND mblk at so_discon_ind_mp. 1518 */ 1519 if ((bp = so->so_discon_ind_mp) != NULL) { 1520 so->so_discon_ind_mp = NULL; 1521 bp->b_prev = NULL; 1522 freemsg(bp); 1523 } 1524 } 1525 1526 /* 1527 * Caller must hold the mutex. 1528 * 1529 * This function is used to process the T_DISCON_IND message. It does 1530 * immediate processing when called from strsock_proto and delayed 1531 * processing of discon_ind saved on so_discon_ind_mp when called from 1532 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1533 * so_discon_ind_mp for delayed processing, this function is registered 1534 * as the callback function to process the message. 1535 * 1536 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1537 * unbind operation, and should be released only after we receive the ACK 1538 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1539 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1540 * sent from either this function or tcp_unbind(), flushing away any TPI 1541 * message that is being sent down and stays in a lower module's queue. 1542 * 1543 * This function drops so_lock and grabs it again. 1544 */ 1545 static void 1546 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1547 { 1548 struct vnode *vp; 1549 struct stdata *stp; 1550 union T_primitives *tpr; 1551 struct T_unbind_req *ubr; 1552 mblk_t *mp; 1553 int error; 1554 1555 ASSERT(MUTEX_HELD(&so->so_lock)); 1556 ASSERT(discon_mp); 1557 ASSERT(discon_mp->b_rptr); 1558 1559 tpr = (union T_primitives *)discon_mp->b_rptr; 1560 ASSERT(tpr->type == T_DISCON_IND); 1561 1562 vp = SOTOV(so); 1563 stp = vp->v_stream; 1564 ASSERT(stp); 1565 1566 /* 1567 * Not a listener 1568 */ 1569 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1570 1571 /* 1572 * This assumes that the name space for DISCON_reason 1573 * is the errno name space. 1574 */ 1575 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1576 1577 /* 1578 * Unbind with the transport without blocking. 1579 * If we've already received a T_DISCON_IND do not unbind. 1580 * 1581 * If there is no preallocated unbind message, we have already 1582 * unbound with the transport 1583 * 1584 * If the socket is not bound, no need to unbind. 1585 */ 1586 mp = so->so_unbind_mp; 1587 if (mp == NULL) { 1588 ASSERT(!(so->so_state & SS_ISBOUND)); 1589 mutex_exit(&so->so_lock); 1590 } else if (!(so->so_state & SS_ISBOUND)) { 1591 mutex_exit(&so->so_lock); 1592 } else { 1593 so->so_unbind_mp = NULL; 1594 1595 /* 1596 * Is another T_DISCON_IND being processed. 1597 */ 1598 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1599 1600 /* 1601 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1602 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1603 * only after we receive the ACK in strsock_proto. 1604 */ 1605 so->so_flag |= SOASYNC_UNBIND; 1606 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1607 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1608 mutex_exit(&so->so_lock); 1609 1610 /* 1611 * Send down T_UNBIND_REQ ignoring flow control. 1612 * XXX Assumes that MSG_IGNFLOW implies that this thread 1613 * does not run service procedures. 1614 */ 1615 ASSERT(DB_TYPE(mp) == M_PROTO); 1616 ubr = (struct T_unbind_req *)mp->b_rptr; 1617 mp->b_wptr += sizeof (*ubr); 1618 ubr->PRIM_type = T_UNBIND_REQ; 1619 1620 /* 1621 * Flush the read and write side (except stream head read queue) 1622 * and send down T_UNBIND_REQ. 1623 */ 1624 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1625 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1626 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1627 /* LINTED - warning: statement has no consequent: if */ 1628 if (error) { 1629 eprintsoline(so, error); 1630 } 1631 } 1632 1633 if (tpr->discon_ind.DISCON_reason != 0) 1634 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1635 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1636 strseteof(SOTOV(so), 1); 1637 /* 1638 * strseteof takes care of read side wakeups, 1639 * pollwakeups, and signals. 1640 */ 1641 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1642 freemsg(discon_mp); 1643 1644 1645 pollwakeup(&stp->sd_pollist, POLLOUT); 1646 mutex_enter(&stp->sd_lock); 1647 1648 /* 1649 * Wake sleeping write 1650 */ 1651 if (stp->sd_flag & WSLEEP) { 1652 stp->sd_flag &= ~WSLEEP; 1653 cv_broadcast(&stp->sd_wrq->q_wait); 1654 } 1655 1656 /* 1657 * strsendsig can handle multiple signals with a 1658 * single call. Send SIGPOLL for S_OUTPUT event. 1659 */ 1660 if (stp->sd_sigflags & S_OUTPUT) 1661 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1662 1663 mutex_exit(&stp->sd_lock); 1664 mutex_enter(&so->so_lock); 1665 } 1666 1667 /* 1668 * This routine is registered with the stream head to receive M_PROTO 1669 * and M_PCPROTO messages. 1670 * 1671 * Returns NULL if the message was consumed. 1672 * Returns an mblk to make that mblk be processed (and queued) by the stream 1673 * head. 1674 * 1675 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1676 * *pollwakeups) for the stream head to take action on. Note that since 1677 * sockets always deliver SIGIO for every new piece of data this routine 1678 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1679 * 1680 * This routine handles all data related TPI messages independent of 1681 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1682 * arrive on a SOCK_STREAM. 1683 */ 1684 static mblk_t * 1685 strsock_proto(vnode_t *vp, mblk_t *mp, 1686 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1687 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1688 { 1689 union T_primitives *tpr; 1690 struct sonode *so; 1691 1692 so = VTOSO(vp); 1693 1694 dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp)); 1695 1696 /* Set default return values */ 1697 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1698 1699 ASSERT(DB_TYPE(mp) == M_PROTO || 1700 DB_TYPE(mp) == M_PCPROTO); 1701 1702 if (MBLKL(mp) < sizeof (tpr->type)) { 1703 /* The message is too short to even contain the primitive */ 1704 zcmn_err(getzoneid(), CE_WARN, 1705 "sockfs: Too short TPI message received. Len = %ld\n", 1706 (ptrdiff_t)(MBLKL(mp))); 1707 freemsg(mp); 1708 return (NULL); 1709 } 1710 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1711 /* The read pointer is not aligned correctly for TPI */ 1712 zcmn_err(getzoneid(), CE_WARN, 1713 "sockfs: Unaligned TPI message received. rptr = %p\n", 1714 (void *)mp->b_rptr); 1715 freemsg(mp); 1716 return (NULL); 1717 } 1718 tpr = (union T_primitives *)mp->b_rptr; 1719 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1720 1721 switch (tpr->type) { 1722 1723 case T_DATA_IND: 1724 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1725 zcmn_err(getzoneid(), CE_WARN, 1726 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1727 (ptrdiff_t)(MBLKL(mp))); 1728 freemsg(mp); 1729 return (NULL); 1730 } 1731 /* 1732 * Ignore zero-length T_DATA_IND messages. These might be 1733 * generated by some transports. 1734 * This is needed to prevent read (which skips the M_PROTO 1735 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1736 * on a non-blocking socket after select/poll has indicated 1737 * that data is available). 1738 */ 1739 if (msgdsize(mp->b_cont) == 0) { 1740 dprintso(so, 0, 1741 ("strsock_proto: zero length T_DATA_IND\n")); 1742 freemsg(mp); 1743 return (NULL); 1744 } 1745 *allmsgsigs = S_INPUT | S_RDNORM; 1746 *pollwakeups = POLLIN | POLLRDNORM; 1747 *wakeups = RSLEEP; 1748 return (mp); 1749 1750 case T_UNITDATA_IND: { 1751 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1752 void *addr; 1753 t_uscalar_t addrlen; 1754 1755 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1756 zcmn_err(getzoneid(), CE_WARN, 1757 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1758 (ptrdiff_t)(MBLKL(mp))); 1759 freemsg(mp); 1760 return (NULL); 1761 } 1762 1763 /* Is this is not a connected datagram socket? */ 1764 if ((so->so_mode & SM_CONNREQUIRED) || 1765 !(so->so_state & SS_ISCONNECTED)) { 1766 /* 1767 * Not a connected datagram socket. Look for 1768 * the SO_UNIX_CLOSE option. If such an option is found 1769 * discard the message (since it has no meaning 1770 * unless connected). 1771 */ 1772 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1773 tudi->OPT_length != 0) { 1774 void *opt; 1775 t_uscalar_t optlen = tudi->OPT_length; 1776 1777 opt = sogetoff(mp, tudi->OPT_offset, 1778 optlen, __TPI_ALIGN_SIZE); 1779 if (opt == NULL) { 1780 /* The len/off falls outside mp */ 1781 freemsg(mp); 1782 mutex_enter(&so->so_lock); 1783 soseterror(so, EPROTO); 1784 mutex_exit(&so->so_lock); 1785 zcmn_err(getzoneid(), CE_WARN, 1786 "sockfs: T_unidata_ind with " 1787 "invalid optlen/offset %u/%d\n", 1788 optlen, tudi->OPT_offset); 1789 return (NULL); 1790 } 1791 if (so_getopt_unix_close(opt, optlen)) { 1792 freemsg(mp); 1793 return (NULL); 1794 } 1795 } 1796 *allmsgsigs = S_INPUT | S_RDNORM; 1797 *pollwakeups = POLLIN | POLLRDNORM; 1798 *wakeups = RSLEEP; 1799 if (audit_active) 1800 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1801 mp, 0); 1802 return (mp); 1803 } 1804 1805 /* 1806 * A connect datagram socket. For AF_INET{,6} we verify that 1807 * the source address matches the "connected to" address. 1808 * The semantics of AF_UNIX sockets is to not verify 1809 * the source address. 1810 * Note that this source address verification is transport 1811 * specific. Thus the real fix would be to extent TPI 1812 * to allow T_CONN_REQ messages to be send to connectionless 1813 * transport providers and always let the transport provider 1814 * do whatever filtering is needed. 1815 * 1816 * The verification/filtering semantics for transports 1817 * other than AF_INET and AF_UNIX are unknown. The choice 1818 * would be to either filter using bcmp or let all messages 1819 * get through. This code does not filter other address 1820 * families since this at least allows the application to 1821 * work around any missing filtering. 1822 * 1823 * XXX Should we move filtering to UDP/ICMP??? 1824 * That would require passing e.g. a T_DISCON_REQ to UDP 1825 * when the socket becomes unconnected. 1826 */ 1827 addrlen = tudi->SRC_length; 1828 /* 1829 * The alignment restriction is really to strict but 1830 * we want enough alignment to inspect the fields of 1831 * a sockaddr_in. 1832 */ 1833 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1834 __TPI_ALIGN_SIZE); 1835 if (addr == NULL) { 1836 freemsg(mp); 1837 mutex_enter(&so->so_lock); 1838 soseterror(so, EPROTO); 1839 mutex_exit(&so->so_lock); 1840 zcmn_err(getzoneid(), CE_WARN, 1841 "sockfs: T_unidata_ind with invalid " 1842 "addrlen/offset %u/%d\n", 1843 addrlen, tudi->SRC_offset); 1844 return (NULL); 1845 } 1846 1847 if (so->so_family == AF_INET) { 1848 /* 1849 * For AF_INET we allow wildcarding both sin_addr 1850 * and sin_port. 1851 */ 1852 struct sockaddr_in *faddr, *sin; 1853 1854 /* Prevent so_faddr_sa from changing while accessed */ 1855 mutex_enter(&so->so_lock); 1856 ASSERT(so->so_faddr_len == 1857 (socklen_t)sizeof (struct sockaddr_in)); 1858 faddr = (struct sockaddr_in *)so->so_faddr_sa; 1859 sin = (struct sockaddr_in *)addr; 1860 if (addrlen != 1861 (t_uscalar_t)sizeof (struct sockaddr_in) || 1862 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1863 faddr->sin_addr.s_addr != INADDR_ANY) || 1864 (so->so_type != SOCK_RAW && 1865 sin->sin_port != faddr->sin_port && 1866 faddr->sin_port != 0)) { 1867 #ifdef DEBUG 1868 dprintso(so, 0, 1869 ("sockfs: T_UNITDATA_IND mismatch: %s", 1870 pr_addr(so->so_family, 1871 (struct sockaddr *)addr, 1872 addrlen))); 1873 dprintso(so, 0, (" - %s\n", 1874 pr_addr(so->so_family, so->so_faddr_sa, 1875 (t_uscalar_t)so->so_faddr_len))); 1876 #endif /* DEBUG */ 1877 mutex_exit(&so->so_lock); 1878 freemsg(mp); 1879 return (NULL); 1880 } 1881 mutex_exit(&so->so_lock); 1882 } else if (so->so_family == AF_INET6) { 1883 /* 1884 * For AF_INET6 we allow wildcarding both sin6_addr 1885 * and sin6_port. 1886 */ 1887 struct sockaddr_in6 *faddr6, *sin6; 1888 static struct in6_addr zeroes; /* inits to all zeros */ 1889 1890 /* Prevent so_faddr_sa from changing while accessed */ 1891 mutex_enter(&so->so_lock); 1892 ASSERT(so->so_faddr_len == 1893 (socklen_t)sizeof (struct sockaddr_in6)); 1894 faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa; 1895 sin6 = (struct sockaddr_in6 *)addr; 1896 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1897 if (addrlen != 1898 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1899 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1900 &faddr6->sin6_addr) && 1901 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1902 (so->so_type != SOCK_RAW && 1903 sin6->sin6_port != faddr6->sin6_port && 1904 faddr6->sin6_port != 0)) { 1905 #ifdef DEBUG 1906 dprintso(so, 0, 1907 ("sockfs: T_UNITDATA_IND mismatch: %s", 1908 pr_addr(so->so_family, 1909 (struct sockaddr *)addr, 1910 addrlen))); 1911 dprintso(so, 0, (" - %s\n", 1912 pr_addr(so->so_family, so->so_faddr_sa, 1913 (t_uscalar_t)so->so_faddr_len))); 1914 #endif /* DEBUG */ 1915 mutex_exit(&so->so_lock); 1916 freemsg(mp); 1917 return (NULL); 1918 } 1919 mutex_exit(&so->so_lock); 1920 } else if (so->so_family == AF_UNIX && 1921 msgdsize(mp->b_cont) == 0 && 1922 tudi->OPT_length != 0) { 1923 /* 1924 * Attempt to extract AF_UNIX 1925 * SO_UNIX_CLOSE indication from options. 1926 */ 1927 void *opt; 1928 t_uscalar_t optlen = tudi->OPT_length; 1929 1930 opt = sogetoff(mp, tudi->OPT_offset, 1931 optlen, __TPI_ALIGN_SIZE); 1932 if (opt == NULL) { 1933 /* The len/off falls outside mp */ 1934 freemsg(mp); 1935 mutex_enter(&so->so_lock); 1936 soseterror(so, EPROTO); 1937 mutex_exit(&so->so_lock); 1938 zcmn_err(getzoneid(), CE_WARN, 1939 "sockfs: T_unidata_ind with invalid " 1940 "optlen/offset %u/%d\n", 1941 optlen, tudi->OPT_offset); 1942 return (NULL); 1943 } 1944 /* 1945 * If we received a unix close indication mark the 1946 * socket and discard this message. 1947 */ 1948 if (so_getopt_unix_close(opt, optlen)) { 1949 mutex_enter(&so->so_lock); 1950 sobreakconn(so, ECONNRESET); 1951 mutex_exit(&so->so_lock); 1952 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1953 freemsg(mp); 1954 *pollwakeups = POLLIN | POLLRDNORM; 1955 *allmsgsigs = S_INPUT | S_RDNORM; 1956 *wakeups = RSLEEP; 1957 return (NULL); 1958 } 1959 } 1960 *allmsgsigs = S_INPUT | S_RDNORM; 1961 *pollwakeups = POLLIN | POLLRDNORM; 1962 *wakeups = RSLEEP; 1963 return (mp); 1964 } 1965 1966 case T_OPTDATA_IND: { 1967 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1968 1969 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1970 zcmn_err(getzoneid(), CE_WARN, 1971 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1972 (ptrdiff_t)(MBLKL(mp))); 1973 freemsg(mp); 1974 return (NULL); 1975 } 1976 /* 1977 * Allow zero-length messages carrying options. 1978 * This is used when carrying the SO_UNIX_CLOSE option. 1979 */ 1980 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 1981 tdi->OPT_length != 0) { 1982 /* 1983 * Attempt to extract AF_UNIX close indication 1984 * from the options. Ignore any other options - 1985 * those are handled once the message is removed 1986 * from the queue. 1987 * The close indication message should not carry data. 1988 */ 1989 void *opt; 1990 t_uscalar_t optlen = tdi->OPT_length; 1991 1992 opt = sogetoff(mp, tdi->OPT_offset, 1993 optlen, __TPI_ALIGN_SIZE); 1994 if (opt == NULL) { 1995 /* The len/off falls outside mp */ 1996 freemsg(mp); 1997 mutex_enter(&so->so_lock); 1998 soseterror(so, EPROTO); 1999 mutex_exit(&so->so_lock); 2000 zcmn_err(getzoneid(), CE_WARN, 2001 "sockfs: T_optdata_ind with invalid " 2002 "optlen/offset %u/%d\n", 2003 optlen, tdi->OPT_offset); 2004 return (NULL); 2005 } 2006 /* 2007 * If we received a close indication mark the 2008 * socket and discard this message. 2009 */ 2010 if (so_getopt_unix_close(opt, optlen)) { 2011 mutex_enter(&so->so_lock); 2012 socantsendmore(so); 2013 mutex_exit(&so->so_lock); 2014 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2015 freemsg(mp); 2016 return (NULL); 2017 } 2018 } 2019 *allmsgsigs = S_INPUT | S_RDNORM; 2020 *pollwakeups = POLLIN | POLLRDNORM; 2021 *wakeups = RSLEEP; 2022 return (mp); 2023 } 2024 2025 case T_EXDATA_IND: { 2026 mblk_t *mctl, *mdata; 2027 mblk_t *lbp; 2028 union T_primitives *tprp; 2029 struct stdata *stp; 2030 queue_t *qp; 2031 2032 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 2033 zcmn_err(getzoneid(), CE_WARN, 2034 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 2035 (ptrdiff_t)(MBLKL(mp))); 2036 freemsg(mp); 2037 return (NULL); 2038 } 2039 /* 2040 * Ignore zero-length T_EXDATA_IND messages. These might be 2041 * generated by some transports. 2042 * 2043 * This is needed to prevent read (which skips the M_PROTO 2044 * part) to unexpectedly return 0 (or return EWOULDBLOCK 2045 * on a non-blocking socket after select/poll has indicated 2046 * that data is available). 2047 */ 2048 dprintso(so, 1, 2049 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 2050 (void *)vp, so->so_oobsigcnt, so->so_oobcnt, 2051 pr_state(so->so_state, so->so_mode))); 2052 2053 if (msgdsize(mp->b_cont) == 0) { 2054 dprintso(so, 0, 2055 ("strsock_proto: zero length T_EXDATA_IND\n")); 2056 freemsg(mp); 2057 return (NULL); 2058 } 2059 2060 /* 2061 * Split into the T_EXDATA_IND and the M_DATA part. 2062 * We process these three pieces separately: 2063 * signal generation 2064 * handling T_EXDATA_IND 2065 * handling M_DATA component 2066 */ 2067 mctl = mp; 2068 mdata = mctl->b_cont; 2069 mctl->b_cont = NULL; 2070 mutex_enter(&so->so_lock); 2071 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2072 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2073 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2074 2075 stp = vp->v_stream; 2076 ASSERT(stp != NULL); 2077 qp = _RD(stp->sd_wrq); 2078 2079 mutex_enter(QLOCK(qp)); 2080 lbp = qp->q_last; 2081 2082 /* 2083 * We want to avoid queueing up a string of T_EXDATA_IND 2084 * messages with no intervening data messages at the stream 2085 * head. These messages contribute to the total message 2086 * count. Eventually this can lead to STREAMS flow contol 2087 * and also cause TCP to advertise a zero window condition 2088 * to the peer. This can happen in the degenerate case where 2089 * the sender and receiver exchange only OOB data. The sender 2090 * only sends messages with MSG_OOB flag and the receiver 2091 * receives only MSG_OOB messages and does not use SO_OOBINLINE. 2092 * An example of this scenario has been reported in applications 2093 * that use OOB data to exchange heart beats. Flow control 2094 * relief will never happen if the application only reads OOB 2095 * data which is done directly by sorecvoob() and the 2096 * T_EXDATA_IND messages at the streamhead won't be consumed. 2097 * Note that there is no correctness issue in compressing the 2098 * string of T_EXDATA_IND messages into a single T_EXDATA_IND 2099 * message. A single read that does not specify MSG_OOB will 2100 * read across all the marks in a loop in sotpi_recvmsg(). 2101 * Each mark is individually distinguishable only if the 2102 * T_EXDATA_IND messages are separated by data messages. 2103 */ 2104 if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) { 2105 tprp = (union T_primitives *)lbp->b_rptr; 2106 if ((tprp->type == T_EXDATA_IND) && 2107 !(so->so_options & SO_OOBINLINE)) { 2108 2109 /* 2110 * free the new M_PROTO message 2111 */ 2112 freemsg(mctl); 2113 2114 /* 2115 * adjust the OOB count and OOB signal count 2116 * just incremented for the new OOB data. 2117 */ 2118 so->so_oobcnt--; 2119 so->so_oobsigcnt--; 2120 mutex_exit(QLOCK(qp)); 2121 mutex_exit(&so->so_lock); 2122 return (NULL); 2123 } 2124 } 2125 mutex_exit(QLOCK(qp)); 2126 2127 /* 2128 * Pass the T_EXDATA_IND and the M_DATA back separately 2129 * by using b_next linkage. (The stream head will queue any 2130 * b_next linked messages separately.) This is needed 2131 * since MSGMARK applies to the last by of the message 2132 * hence we can not have any M_DATA component attached 2133 * to the marked T_EXDATA_IND. Note that the stream head 2134 * will not consolidate M_DATA messages onto an MSGMARK'ed 2135 * message in order to preserve the constraint that 2136 * the T_EXDATA_IND always is a separate message. 2137 */ 2138 ASSERT(mctl != NULL); 2139 mctl->b_next = mdata; 2140 mp = mctl; 2141 #ifdef DEBUG 2142 if (mdata == NULL) { 2143 dprintso(so, 1, 2144 ("after outofline T_EXDATA_IND(%p): " 2145 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2146 (void *)vp, so->so_oobsigcnt, 2147 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2148 pr_state(so->so_state, so->so_mode))); 2149 } else { 2150 dprintso(so, 1, 2151 ("after inline T_EXDATA_IND(%p): " 2152 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2153 (void *)vp, so->so_oobsigcnt, 2154 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2155 pr_state(so->so_state, so->so_mode))); 2156 } 2157 #endif /* DEBUG */ 2158 mutex_exit(&so->so_lock); 2159 *wakeups = RSLEEP; 2160 return (mp); 2161 } 2162 2163 case T_CONN_CON: { 2164 struct T_conn_con *conn_con; 2165 void *addr; 2166 t_uscalar_t addrlen; 2167 2168 /* 2169 * Verify the state, update the state to ISCONNECTED, 2170 * record the potentially new address in the message, 2171 * and drop the message. 2172 */ 2173 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2174 zcmn_err(getzoneid(), CE_WARN, 2175 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2176 (ptrdiff_t)(MBLKL(mp))); 2177 freemsg(mp); 2178 return (NULL); 2179 } 2180 2181 mutex_enter(&so->so_lock); 2182 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2183 SS_ISCONNECTING) { 2184 mutex_exit(&so->so_lock); 2185 dprintso(so, 1, 2186 ("T_CONN_CON: state %x\n", so->so_state)); 2187 freemsg(mp); 2188 return (NULL); 2189 } 2190 2191 conn_con = &tpr->conn_con; 2192 addrlen = conn_con->RES_length; 2193 /* 2194 * Allow the address to be of different size than sent down 2195 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2196 * For AF_UNIX require the identical length. 2197 */ 2198 if (so->so_family == AF_UNIX ? 2199 addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) : 2200 addrlen > (t_uscalar_t)so->so_faddr_maxlen) { 2201 zcmn_err(getzoneid(), CE_WARN, 2202 "sockfs: T_conn_con with different " 2203 "length %u/%d\n", 2204 addrlen, conn_con->RES_length); 2205 soisdisconnected(so, EPROTO); 2206 mutex_exit(&so->so_lock); 2207 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2208 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2209 strseteof(SOTOV(so), 1); 2210 freemsg(mp); 2211 /* 2212 * strseteof takes care of read side wakeups, 2213 * pollwakeups, and signals. 2214 */ 2215 *wakeups = WSLEEP; 2216 *allmsgsigs = S_OUTPUT; 2217 *pollwakeups = POLLOUT; 2218 return (NULL); 2219 } 2220 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2221 if (addr == NULL) { 2222 zcmn_err(getzoneid(), CE_WARN, 2223 "sockfs: T_conn_con with invalid " 2224 "addrlen/offset %u/%d\n", 2225 addrlen, conn_con->RES_offset); 2226 mutex_exit(&so->so_lock); 2227 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2228 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2229 strseteof(SOTOV(so), 1); 2230 freemsg(mp); 2231 /* 2232 * strseteof takes care of read side wakeups, 2233 * pollwakeups, and signals. 2234 */ 2235 *wakeups = WSLEEP; 2236 *allmsgsigs = S_OUTPUT; 2237 *pollwakeups = POLLOUT; 2238 return (NULL); 2239 } 2240 2241 /* 2242 * Save for getpeername. 2243 */ 2244 if (so->so_family != AF_UNIX) { 2245 so->so_faddr_len = (socklen_t)addrlen; 2246 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2247 bcopy(addr, so->so_faddr_sa, addrlen); 2248 so->so_state |= SS_FADDR_VALID; 2249 } 2250 2251 if (so->so_peercred != NULL) 2252 crfree(so->so_peercred); 2253 so->so_peercred = DB_CRED(mp); 2254 so->so_cpid = DB_CPID(mp); 2255 if (so->so_peercred != NULL) 2256 crhold(so->so_peercred); 2257 2258 /* Wakeup anybody sleeping in sowaitconnected */ 2259 soisconnected(so); 2260 mutex_exit(&so->so_lock); 2261 2262 /* 2263 * The socket is now available for sending data. 2264 */ 2265 *wakeups = WSLEEP; 2266 *allmsgsigs = S_OUTPUT; 2267 *pollwakeups = POLLOUT; 2268 freemsg(mp); 2269 return (NULL); 2270 } 2271 2272 /* 2273 * Extra processing in case of an SSL proxy, before queuing or 2274 * forwarding to the fallback endpoint 2275 */ 2276 case T_SSL_PROXY_CONN_IND: 2277 case T_CONN_IND: 2278 /* 2279 * Verify the min size and queue the message on 2280 * the so_conn_ind_head/tail list. 2281 */ 2282 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2283 zcmn_err(getzoneid(), CE_WARN, 2284 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2285 (ptrdiff_t)(MBLKL(mp))); 2286 freemsg(mp); 2287 return (NULL); 2288 } 2289 2290 if (audit_active) 2291 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2292 if (!(so->so_state & SS_ACCEPTCONN)) { 2293 zcmn_err(getzoneid(), CE_WARN, 2294 "sockfs: T_conn_ind on non-listening socket\n"); 2295 freemsg(mp); 2296 return (NULL); 2297 } 2298 2299 if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) { 2300 /* No context: need to fall back */ 2301 struct sonode *fbso; 2302 stdata_t *fbstp; 2303 2304 tpr->type = T_CONN_IND; 2305 2306 fbso = kssl_find_fallback(so->so_kssl_ent); 2307 2308 /* 2309 * No fallback: the remote will timeout and 2310 * disconnect. 2311 */ 2312 if (fbso == NULL) { 2313 freemsg(mp); 2314 return (NULL); 2315 } 2316 fbstp = SOTOV(fbso)->v_stream; 2317 qreply(fbstp->sd_wrq->q_next, mp); 2318 return (NULL); 2319 } 2320 soqueueconnind(so, mp); 2321 *allmsgsigs = S_INPUT | S_RDNORM; 2322 *pollwakeups = POLLIN | POLLRDNORM; 2323 *wakeups = RSLEEP; 2324 return (NULL); 2325 2326 case T_ORDREL_IND: 2327 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2328 zcmn_err(getzoneid(), CE_WARN, 2329 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2330 (ptrdiff_t)(MBLKL(mp))); 2331 freemsg(mp); 2332 return (NULL); 2333 } 2334 2335 /* 2336 * Some providers send this when not fully connected. 2337 * SunLink X.25 needs to retrieve disconnect reason after 2338 * disconnect for compatibility. It uses T_ORDREL_IND 2339 * instead of T_DISCON_IND so that it may use the 2340 * endpoint after a connect failure to retrieve the 2341 * reason using an ioctl. Thus we explicitly clear 2342 * SS_ISCONNECTING here for SunLink X.25. 2343 * This is a needed TPI violation. 2344 */ 2345 mutex_enter(&so->so_lock); 2346 so->so_state &= ~SS_ISCONNECTING; 2347 socantrcvmore(so); 2348 mutex_exit(&so->so_lock); 2349 strseteof(SOTOV(so), 1); 2350 /* 2351 * strseteof takes care of read side wakeups, 2352 * pollwakeups, and signals. 2353 */ 2354 freemsg(mp); 2355 return (NULL); 2356 2357 case T_DISCON_IND: 2358 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2359 zcmn_err(getzoneid(), CE_WARN, 2360 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2361 (ptrdiff_t)(MBLKL(mp))); 2362 freemsg(mp); 2363 return (NULL); 2364 } 2365 if (so->so_state & SS_ACCEPTCONN) { 2366 /* 2367 * This is a listener. Look for a queued T_CONN_IND 2368 * with a matching sequence number and remove it 2369 * from the list. 2370 * It is normal to not find the sequence number since 2371 * the soaccept might have already dequeued it 2372 * (in which case the T_CONN_RES will fail with 2373 * TBADSEQ). 2374 */ 2375 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2376 freemsg(mp); 2377 return (0); 2378 } 2379 2380 /* 2381 * Not a listener 2382 * 2383 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2384 * Such a discon_ind appears when the peer has first done 2385 * a shutdown() followed by a close() in which case we just 2386 * want to record socantsendmore. 2387 * In this case sockfs first receives a T_ORDREL_IND followed 2388 * by a T_DISCON_IND. 2389 * Note that for other transports (e.g. TCP) we need to handle 2390 * the discon_ind in this case since it signals an error. 2391 */ 2392 mutex_enter(&so->so_lock); 2393 if ((so->so_state & SS_CANTRCVMORE) && 2394 (so->so_family == AF_UNIX)) { 2395 socantsendmore(so); 2396 mutex_exit(&so->so_lock); 2397 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2398 dprintso(so, 1, 2399 ("T_DISCON_IND: error %d\n", so->so_error)); 2400 freemsg(mp); 2401 /* 2402 * Set these variables for caller to process them. 2403 * For the else part where T_DISCON_IND is processed, 2404 * this will be done in the function being called 2405 * (strsock_discon_ind()) 2406 */ 2407 *wakeups = WSLEEP; 2408 *allmsgsigs = S_OUTPUT; 2409 *pollwakeups = POLLOUT; 2410 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2411 /* 2412 * Deferred processing of T_DISCON_IND 2413 */ 2414 so_save_discon_ind(so, mp, strsock_discon_ind); 2415 mutex_exit(&so->so_lock); 2416 } else { 2417 /* 2418 * Process T_DISCON_IND now 2419 */ 2420 (void) strsock_discon_ind(so, mp); 2421 mutex_exit(&so->so_lock); 2422 } 2423 return (NULL); 2424 2425 case T_UDERROR_IND: { 2426 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2427 void *addr; 2428 t_uscalar_t addrlen; 2429 int error; 2430 2431 dprintso(so, 0, 2432 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2433 2434 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2435 zcmn_err(getzoneid(), CE_WARN, 2436 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2437 (ptrdiff_t)(MBLKL(mp))); 2438 freemsg(mp); 2439 return (NULL); 2440 } 2441 /* Ignore on connection-oriented transports */ 2442 if (so->so_mode & SM_CONNREQUIRED) { 2443 freemsg(mp); 2444 eprintsoline(so, 0); 2445 zcmn_err(getzoneid(), CE_WARN, 2446 "sockfs: T_uderror_ind on connection-oriented " 2447 "transport\n"); 2448 return (NULL); 2449 } 2450 addrlen = tudi->DEST_length; 2451 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2452 if (addr == NULL) { 2453 zcmn_err(getzoneid(), CE_WARN, 2454 "sockfs: T_uderror_ind with invalid " 2455 "addrlen/offset %u/%d\n", 2456 addrlen, tudi->DEST_offset); 2457 freemsg(mp); 2458 return (NULL); 2459 } 2460 2461 /* Verify source address for connected socket. */ 2462 mutex_enter(&so->so_lock); 2463 if (so->so_state & SS_ISCONNECTED) { 2464 void *faddr; 2465 t_uscalar_t faddr_len; 2466 boolean_t match = B_FALSE; 2467 2468 switch (so->so_family) { 2469 case AF_INET: { 2470 /* Compare just IP address and port */ 2471 struct sockaddr_in *sin1, *sin2; 2472 2473 sin1 = (struct sockaddr_in *)so->so_faddr_sa; 2474 sin2 = (struct sockaddr_in *)addr; 2475 if (addrlen == sizeof (struct sockaddr_in) && 2476 sin1->sin_port == sin2->sin_port && 2477 sin1->sin_addr.s_addr == 2478 sin2->sin_addr.s_addr) 2479 match = B_TRUE; 2480 break; 2481 } 2482 case AF_INET6: { 2483 /* Compare just IP address and port. Not flow */ 2484 struct sockaddr_in6 *sin1, *sin2; 2485 2486 sin1 = (struct sockaddr_in6 *)so->so_faddr_sa; 2487 sin2 = (struct sockaddr_in6 *)addr; 2488 if (addrlen == sizeof (struct sockaddr_in6) && 2489 sin1->sin6_port == sin2->sin6_port && 2490 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2491 &sin2->sin6_addr)) 2492 match = B_TRUE; 2493 break; 2494 } 2495 case AF_UNIX: 2496 faddr = &so->so_ux_faddr; 2497 faddr_len = 2498 (t_uscalar_t)sizeof (so->so_ux_faddr); 2499 if (faddr_len == addrlen && 2500 bcmp(addr, faddr, addrlen) == 0) 2501 match = B_TRUE; 2502 break; 2503 default: 2504 faddr = so->so_faddr_sa; 2505 faddr_len = (t_uscalar_t)so->so_faddr_len; 2506 if (faddr_len == addrlen && 2507 bcmp(addr, faddr, addrlen) == 0) 2508 match = B_TRUE; 2509 break; 2510 } 2511 2512 if (!match) { 2513 #ifdef DEBUG 2514 dprintso(so, 0, 2515 ("sockfs: T_UDERR_IND mismatch: %s - ", 2516 pr_addr(so->so_family, 2517 (struct sockaddr *)addr, 2518 addrlen))); 2519 dprintso(so, 0, ("%s\n", 2520 pr_addr(so->so_family, so->so_faddr_sa, 2521 so->so_faddr_len))); 2522 #endif /* DEBUG */ 2523 mutex_exit(&so->so_lock); 2524 freemsg(mp); 2525 return (NULL); 2526 } 2527 /* 2528 * Make the write error nonpersistent. If the error 2529 * is zero we use ECONNRESET. 2530 * This assumes that the name space for ERROR_type 2531 * is the errno name space. 2532 */ 2533 if (tudi->ERROR_type != 0) 2534 error = tudi->ERROR_type; 2535 else 2536 error = ECONNRESET; 2537 2538 soseterror(so, error); 2539 mutex_exit(&so->so_lock); 2540 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2541 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2542 *wakeups = RSLEEP | WSLEEP; 2543 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2544 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2545 freemsg(mp); 2546 return (NULL); 2547 } 2548 /* 2549 * If the application asked for delayed errors 2550 * record the T_UDERROR_IND so_eaddr_mp and the reason in 2551 * so_delayed_error for delayed error posting. If the reason 2552 * is zero use ECONNRESET. 2553 * Note that delayed error indications do not make sense for 2554 * AF_UNIX sockets since sendto checks that the destination 2555 * address is valid at the time of the sendto. 2556 */ 2557 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2558 mutex_exit(&so->so_lock); 2559 freemsg(mp); 2560 return (NULL); 2561 } 2562 if (so->so_eaddr_mp != NULL) 2563 freemsg(so->so_eaddr_mp); 2564 2565 so->so_eaddr_mp = mp; 2566 if (tudi->ERROR_type != 0) 2567 error = tudi->ERROR_type; 2568 else 2569 error = ECONNRESET; 2570 so->so_delayed_error = (ushort_t)error; 2571 mutex_exit(&so->so_lock); 2572 return (NULL); 2573 } 2574 2575 case T_ERROR_ACK: 2576 dprintso(so, 0, 2577 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2578 tpr->error_ack.ERROR_prim, 2579 tpr->error_ack.TLI_error, 2580 tpr->error_ack.UNIX_error)); 2581 2582 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2583 zcmn_err(getzoneid(), CE_WARN, 2584 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2585 (ptrdiff_t)(MBLKL(mp))); 2586 freemsg(mp); 2587 return (NULL); 2588 } 2589 /* 2590 * Check if we were waiting for the async message 2591 */ 2592 mutex_enter(&so->so_lock); 2593 if ((so->so_flag & SOASYNC_UNBIND) && 2594 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2595 so_unlock_single(so, SOASYNC_UNBIND); 2596 mutex_exit(&so->so_lock); 2597 freemsg(mp); 2598 return (NULL); 2599 } 2600 mutex_exit(&so->so_lock); 2601 soqueueack(so, mp); 2602 return (NULL); 2603 2604 case T_OK_ACK: 2605 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2606 zcmn_err(getzoneid(), CE_WARN, 2607 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2608 (ptrdiff_t)(MBLKL(mp))); 2609 freemsg(mp); 2610 return (NULL); 2611 } 2612 /* 2613 * Check if we were waiting for the async message 2614 */ 2615 mutex_enter(&so->so_lock); 2616 if ((so->so_flag & SOASYNC_UNBIND) && 2617 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2618 dprintso(so, 1, 2619 ("strsock_proto: T_OK_ACK async unbind\n")); 2620 so_unlock_single(so, SOASYNC_UNBIND); 2621 mutex_exit(&so->so_lock); 2622 freemsg(mp); 2623 return (NULL); 2624 } 2625 mutex_exit(&so->so_lock); 2626 soqueueack(so, mp); 2627 return (NULL); 2628 2629 case T_INFO_ACK: 2630 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2631 zcmn_err(getzoneid(), CE_WARN, 2632 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2633 (ptrdiff_t)(MBLKL(mp))); 2634 freemsg(mp); 2635 return (NULL); 2636 } 2637 soqueueack(so, mp); 2638 return (NULL); 2639 2640 case T_CAPABILITY_ACK: 2641 /* 2642 * A T_capability_ack need only be large enough to hold 2643 * the PRIM_type and CAP_bits1 fields; checking for anything 2644 * larger might reject a correct response from an older 2645 * provider. 2646 */ 2647 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2648 zcmn_err(getzoneid(), CE_WARN, 2649 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2650 (ptrdiff_t)(MBLKL(mp))); 2651 freemsg(mp); 2652 return (NULL); 2653 } 2654 soqueueack(so, mp); 2655 return (NULL); 2656 2657 case T_BIND_ACK: 2658 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2659 zcmn_err(getzoneid(), CE_WARN, 2660 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2661 (ptrdiff_t)(MBLKL(mp))); 2662 freemsg(mp); 2663 return (NULL); 2664 } 2665 soqueueack(so, mp); 2666 return (NULL); 2667 2668 case T_OPTMGMT_ACK: 2669 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2670 zcmn_err(getzoneid(), CE_WARN, 2671 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2672 (ptrdiff_t)(MBLKL(mp))); 2673 freemsg(mp); 2674 return (NULL); 2675 } 2676 soqueueack(so, mp); 2677 return (NULL); 2678 default: 2679 #ifdef DEBUG 2680 zcmn_err(getzoneid(), CE_WARN, 2681 "sockfs: unknown TPI primitive %d received\n", 2682 tpr->type); 2683 #endif /* DEBUG */ 2684 freemsg(mp); 2685 return (NULL); 2686 } 2687 } 2688 2689 /* 2690 * This routine is registered with the stream head to receive other 2691 * (non-data, and non-proto) messages. 2692 * 2693 * Returns NULL if the message was consumed. 2694 * Returns an mblk to make that mblk be processed by the stream head. 2695 * 2696 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2697 * *pollwakeups) for the stream head to take action on. 2698 */ 2699 static mblk_t * 2700 strsock_misc(vnode_t *vp, mblk_t *mp, 2701 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2702 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2703 { 2704 struct sonode *so; 2705 2706 so = VTOSO(vp); 2707 2708 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2709 (void *)vp, (void *)mp, DB_TYPE(mp))); 2710 2711 /* Set default return values */ 2712 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2713 2714 switch (DB_TYPE(mp)) { 2715 case M_PCSIG: 2716 /* 2717 * This assumes that an M_PCSIG for the urgent data arrives 2718 * before the corresponding T_EXDATA_IND. 2719 * 2720 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2721 * awoken before the urgent data shows up. 2722 * For OOBINLINE this can result in select returning 2723 * only exceptions as opposed to except|read. 2724 */ 2725 if (*mp->b_rptr == SIGURG) { 2726 mutex_enter(&so->so_lock); 2727 dprintso(so, 1, 2728 ("SIGURG(%p): counts %d/%d state %s\n", 2729 (void *)vp, so->so_oobsigcnt, 2730 so->so_oobcnt, 2731 pr_state(so->so_state, so->so_mode))); 2732 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2733 dprintso(so, 1, 2734 ("after SIGURG(%p): counts %d/%d " 2735 " poll 0x%x sig 0x%x state %s\n", 2736 (void *)vp, so->so_oobsigcnt, 2737 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2738 pr_state(so->so_state, so->so_mode))); 2739 mutex_exit(&so->so_lock); 2740 } 2741 freemsg(mp); 2742 return (NULL); 2743 2744 case M_SIG: 2745 case M_HANGUP: 2746 case M_UNHANGUP: 2747 case M_ERROR: 2748 /* M_ERRORs etc are ignored */ 2749 freemsg(mp); 2750 return (NULL); 2751 2752 case M_FLUSH: 2753 /* 2754 * Do not flush read queue. If the M_FLUSH 2755 * arrives because of an impending T_discon_ind 2756 * we still have to keep any queued data - this is part of 2757 * socket semantics. 2758 */ 2759 if (*mp->b_rptr & FLUSHW) { 2760 *mp->b_rptr &= ~FLUSHR; 2761 return (mp); 2762 } 2763 freemsg(mp); 2764 return (NULL); 2765 2766 default: 2767 return (mp); 2768 } 2769 } 2770 2771 2772 /* Register to receive signals for certain events */ 2773 int 2774 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2775 { 2776 struct strsigset ss; 2777 int32_t rval; 2778 2779 /* 2780 * Note that SOLOCKED will be set except for the call from soaccept(). 2781 */ 2782 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2783 ss.ss_pid = pgrp; 2784 ss.ss_events = events; 2785 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2786 &rval)); 2787 } 2788 2789 2790 /* Register for events matching the SS_ASYNC flag */ 2791 int 2792 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2793 { 2794 int events = so->so_state & SS_ASYNC ? 2795 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2796 S_RDBAND | S_BANDURG; 2797 2798 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2799 } 2800 2801 2802 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2803 int 2804 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2805 { 2806 ASSERT(mutex_owned(&so->so_lock)); 2807 if (so->so_pgrp != 0) { 2808 int error; 2809 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2810 S_RDBAND | S_BANDURG : /* New sigs */ 2811 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2812 2813 so_lock_single(so); 2814 mutex_exit(&so->so_lock); 2815 2816 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2817 2818 mutex_enter(&so->so_lock); 2819 so_unlock_single(so, SOLOCKED); 2820 if (error) 2821 return (error); 2822 } 2823 so->so_state ^= SS_ASYNC; 2824 return (0); 2825 } 2826 2827 /* 2828 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2829 * any existing one. If passed zero, just clear the existing one. 2830 */ 2831 int 2832 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2833 { 2834 int events = so->so_state & SS_ASYNC ? 2835 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2836 S_RDBAND | S_BANDURG; 2837 int error; 2838 2839 ASSERT(mutex_owned(&so->so_lock)); 2840 2841 /* 2842 * Change socket process (group). 2843 * 2844 * strioctl (via so_set_asyncsigs) will perform permission check and 2845 * also keep a PID_HOLD to prevent the pid from being reused. 2846 */ 2847 so_lock_single(so); 2848 mutex_exit(&so->so_lock); 2849 2850 if (pgrp != 0) { 2851 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2852 pgrp, events)); 2853 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2854 if (error != 0) { 2855 eprintsoline(so, error); 2856 goto bad; 2857 } 2858 } 2859 /* Remove the previously registered process/group */ 2860 if (so->so_pgrp != 0) { 2861 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2862 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2863 if (error != 0) { 2864 eprintsoline(so, error); 2865 error = 0; 2866 } 2867 } 2868 mutex_enter(&so->so_lock); 2869 so_unlock_single(so, SOLOCKED); 2870 so->so_pgrp = pgrp; 2871 return (0); 2872 bad: 2873 mutex_enter(&so->so_lock); 2874 so_unlock_single(so, SOLOCKED); 2875 return (error); 2876 } 2877 2878 2879 2880 /* 2881 * Translate a TLI(/XTI) error into a system error as best we can. 2882 */ 2883 static const int tli_errs[] = { 2884 0, /* no error */ 2885 EADDRNOTAVAIL, /* TBADADDR */ 2886 ENOPROTOOPT, /* TBADOPT */ 2887 EACCES, /* TACCES */ 2888 EBADF, /* TBADF */ 2889 EADDRNOTAVAIL, /* TNOADDR */ 2890 EPROTO, /* TOUTSTATE */ 2891 ECONNABORTED, /* TBADSEQ */ 2892 0, /* TSYSERR - will never get */ 2893 EPROTO, /* TLOOK - should never be sent by transport */ 2894 EMSGSIZE, /* TBADDATA */ 2895 EMSGSIZE, /* TBUFOVFLW */ 2896 EPROTO, /* TFLOW */ 2897 EWOULDBLOCK, /* TNODATA */ 2898 EPROTO, /* TNODIS */ 2899 EPROTO, /* TNOUDERR */ 2900 EINVAL, /* TBADFLAG */ 2901 EPROTO, /* TNOREL */ 2902 EOPNOTSUPP, /* TNOTSUPPORT */ 2903 EPROTO, /* TSTATECHNG */ 2904 /* following represent error namespace expansion with XTI */ 2905 EPROTO, /* TNOSTRUCTYPE - never sent by transport */ 2906 EPROTO, /* TBADNAME - never sent by transport */ 2907 EPROTO, /* TBADQLEN - never sent by transport */ 2908 EADDRINUSE, /* TADDRBUSY */ 2909 EBADF, /* TINDOUT */ 2910 EBADF, /* TPROVMISMATCH */ 2911 EBADF, /* TRESQLEN */ 2912 EBADF, /* TRESADDR */ 2913 EPROTO, /* TQFULL - never sent by transport */ 2914 EPROTO, /* TPROTO */ 2915 }; 2916 2917 static int 2918 tlitosyserr(int terr) 2919 { 2920 ASSERT(terr != TSYSERR); 2921 if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0]))) 2922 return (EPROTO); 2923 else 2924 return (tli_errs[terr]); 2925 } 2926 2927 /* 2928 * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable 2929 * transport driver/module with an mblk_t chain. 2930 * 2931 * Note, we in-line putq() for the fast-path cases of q is empty, q_last and 2932 * bp are of type M_DATA. All other cases we call putq(). 2933 * 2934 * On success a zero will be return, else an errno will be returned. 2935 */ 2936 int 2937 sodput(sodirect_t *sodp, mblk_t *bp) 2938 { 2939 queue_t *q = sodp->sod_q; 2940 struct stdata *stp = (struct stdata *)q->q_ptr; 2941 mblk_t *nbp; 2942 int ret; 2943 mblk_t *last = q->q_last; 2944 int bytecnt = 0; 2945 int mblkcnt = 0; 2946 2947 2948 ASSERT(MUTEX_HELD(sodp->sod_lock)); 2949 2950 if (stp->sd_flag == STREOF) { 2951 ret = 0; 2952 goto error; 2953 } 2954 2955 if (q->q_first == NULL) { 2956 /* Q empty, really fast fast-path */ 2957 bp->b_prev = NULL; 2958 bp->b_next = NULL; 2959 q->q_first = bp; 2960 q->q_last = bp; 2961 2962 } else if (last->b_datap->db_type == M_DATA && 2963 bp->b_datap->db_type == M_DATA) { 2964 /* 2965 * Last mblk_t chain and bp are both type M_DATA so 2966 * in-line putq() here, if the DBLK_UIOA state match 2967 * add bp to the end of the current last chain, else 2968 * start a new last chain with bp. 2969 */ 2970 if ((last->b_datap->db_flags & DBLK_UIOA) == 2971 (bp->b_datap->db_flags & DBLK_UIOA)) { 2972 /* Added to end */ 2973 while ((nbp = last->b_cont) != NULL) 2974 last = nbp; 2975 last->b_cont = bp; 2976 } else { 2977 /* New last */ 2978 last->b_next = bp; 2979 bp->b_next = NULL; 2980 bp->b_prev = last; 2981 q->q_last = bp; 2982 } 2983 } else { 2984 /* 2985 * Can't use q_last so just call putq(). 2986 */ 2987 (void) putq(q, bp); 2988 return (0); 2989 } 2990 2991 /* Count bytes and mblk_t's */ 2992 do { 2993 bytecnt += MBLKL(bp); 2994 mblkcnt++; 2995 } while ((bp = bp->b_cont) != NULL); 2996 q->q_count += bytecnt; 2997 q->q_mblkcnt += mblkcnt; 2998 2999 /* Check for QFULL */ 3000 if (q->q_count >= q->q_hiwat + sodp->sod_want || 3001 q->q_mblkcnt >= q->q_hiwat) { 3002 q->q_flag |= QFULL; 3003 } 3004 3005 return (0); 3006 3007 error: 3008 do { 3009 if ((nbp = bp->b_next) != NULL) 3010 bp->b_next = NULL; 3011 freemsg(bp); 3012 } while ((bp = nbp) != NULL); 3013 3014 return (ret); 3015 } 3016 3017 /* 3018 * Sockfs sodirect read wakeup. Called from a sodirect enabled transport 3019 * driver/module to indicate that read-side data is available. 3020 * 3021 * On return the sodirect_t.lock mutex will be exited so this must be the 3022 * last sodirect_t call to guarantee atomic access of *sodp. 3023 */ 3024 void 3025 sodwakeup(sodirect_t *sodp) 3026 { 3027 queue_t *q = sodp->sod_q; 3028 struct stdata *stp = (struct stdata *)q->q_ptr; 3029 3030 ASSERT(MUTEX_HELD(sodp->sod_lock)); 3031 3032 if (stp->sd_flag & RSLEEP) { 3033 stp->sd_flag &= ~RSLEEP; 3034 cv_broadcast(&q->q_wait); 3035 } 3036 3037 if (stp->sd_rput_opt & SR_POLLIN) { 3038 stp->sd_rput_opt &= ~SR_POLLIN; 3039 mutex_exit(sodp->sod_lock); 3040 pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM); 3041 } else 3042 mutex_exit(sodp->sod_lock); 3043 } 3044