1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/inttypes.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/buf.h> 35 #include <sys/conf.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/sysmacros.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/user.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/esunddi.h> 49 #include <sys/flock.h> 50 #include <sys/modctl.h> 51 #include <sys/vtrace.h> 52 #include <sys/strsun.h> 53 #include <sys/cmn_err.h> 54 #include <sys/proc.h> 55 #include <sys/ddi.h> 56 #include <sys/kmem_impl.h> 57 58 #include <sys/suntpi.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/socketvar.h> 62 #include <netinet/in.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 68 #include <inet/kssl/ksslapi.h> 69 70 #include <c2/audit.h> 71 72 int so_default_version = SOV_SOCKSTREAM; 73 74 #ifdef DEBUG 75 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 76 int sockdebug = 0; 77 78 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 79 int sockprinterr = 0; 80 81 /* 82 * Set so_default_options to SO_DEBUG is all sockets should be created 83 * with SO_DEBUG set. This is needed to get debug printouts from the 84 * socket() call itself. 85 */ 86 int so_default_options = 0; 87 #endif /* DEBUG */ 88 89 #ifdef SOCK_TEST 90 /* 91 * Set to number of ticks to limit cv_waits for code coverage testing. 92 * Set to 1000 when SO_DEBUG is set to 2. 93 */ 94 clock_t sock_test_timelimit = 0; 95 #endif /* SOCK_TEST */ 96 97 /* 98 * For concurrency testing of e.g. opening /dev/ip which does not 99 * handle T_INFO_REQ messages. 100 */ 101 int so_no_tinfo = 0; 102 103 /* 104 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 105 * to simply ignore the T_CAPABILITY_REQ. 106 */ 107 clock_t sock_capability_timeout = 2; /* seconds */ 108 109 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 110 static void so_removehooks(struct sonode *so); 111 112 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 113 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 114 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 115 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 116 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 117 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 118 119 static int tlitosyserr(int terr); 120 121 /* 122 * Convert a socket to a stream. Invoked when the illusory sockmod 123 * is popped from the stream. 124 * Change the stream head back to default operation without losing 125 * any messages (T_conn_ind's are moved to the stream head queue). 126 */ 127 int 128 so_sock2stream(struct sonode *so) 129 { 130 struct vnode *vp = SOTOV(so); 131 queue_t *rq; 132 mblk_t *mp; 133 int error = 0; 134 135 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 136 137 mutex_enter(&so->so_lock); 138 so_lock_single(so); 139 140 ASSERT(so->so_version != SOV_STREAM); 141 142 if (so->so_state & SS_DIRECT) { 143 mblk_t **mpp; 144 int rval; 145 146 /* 147 * Tell the transport below that sockmod is being popped 148 */ 149 mutex_exit(&so->so_lock); 150 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(), 151 &rval); 152 mutex_enter(&so->so_lock); 153 if (error != 0) { 154 dprintso(so, 0, ("so_sock2stream(%p): " 155 "_SIOCSOCKFALLBACK failed\n", so)); 156 goto exit; 157 } 158 so->so_state &= ~SS_DIRECT; 159 160 for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL; 161 mpp = &mp->b_next) { 162 struct T_conn_ind *conn_ind; 163 164 /* 165 * strsock_proto() has already verified the length of 166 * this message block. 167 */ 168 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 169 170 conn_ind = (struct T_conn_ind *)mp->b_rptr; 171 if (conn_ind->OPT_length == 0 && 172 conn_ind->OPT_offset == 0) 173 continue; 174 175 if (DB_REF(mp) > 1) { 176 mblk_t *newmp; 177 size_t length; 178 cred_t *cr; 179 180 /* 181 * Copy the message block because it is used 182 * elsewhere, too. 183 */ 184 length = MBLKL(mp); 185 newmp = soallocproto(length, _ALLOC_INTR); 186 if (newmp == NULL) { 187 error = EINTR; 188 goto exit; 189 } 190 bcopy(mp->b_rptr, newmp->b_wptr, length); 191 newmp->b_wptr += length; 192 newmp->b_next = mp->b_next; 193 cr = DB_CRED(mp); 194 if (cr != NULL) 195 mblk_setcred(newmp, cr); 196 DB_CPID(newmp) = DB_CPID(mp); 197 198 /* 199 * Link the new message block into the queue 200 * and free the old one. 201 */ 202 *mpp = newmp; 203 mp->b_next = NULL; 204 freemsg(mp); 205 206 mp = newmp; 207 conn_ind = (struct T_conn_ind *)mp->b_rptr; 208 } 209 210 /* 211 * Remove options added by TCP for accept fast-path. 212 */ 213 conn_ind->OPT_length = 0; 214 conn_ind->OPT_offset = 0; 215 } 216 } 217 218 so->so_version = SOV_STREAM; 219 so->so_priv = NULL; 220 221 /* 222 * Remove the hooks in the stream head to avoid queuing more 223 * packets in sockfs. 224 */ 225 mutex_exit(&so->so_lock); 226 so_removehooks(so); 227 mutex_enter(&so->so_lock); 228 229 /* 230 * Clear any state related to urgent data. Leave any T_EXDATA_IND 231 * on the queue - the behavior of urgent data after a switch is 232 * left undefined. 233 */ 234 so->so_error = so->so_delayed_error = 0; 235 freemsg(so->so_oobmsg); 236 so->so_oobmsg = NULL; 237 so->so_oobsigcnt = so->so_oobcnt = 0; 238 239 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 240 SS_HASCONNIND|SS_SAVEDEOR); 241 ASSERT(so_verify_oobstate(so)); 242 243 freemsg(so->so_ack_mp); 244 so->so_ack_mp = NULL; 245 246 /* 247 * Flush the T_DISCON_IND on so_discon_ind_mp. 248 */ 249 so_flush_discon_ind(so); 250 251 /* 252 * Move any queued T_CONN_IND messages to stream head queue. 253 */ 254 rq = RD(strvp2wq(vp)); 255 while ((mp = so->so_conn_ind_head) != NULL) { 256 so->so_conn_ind_head = mp->b_next; 257 mp->b_next = NULL; 258 if (so->so_conn_ind_head == NULL) { 259 ASSERT(so->so_conn_ind_tail == mp); 260 so->so_conn_ind_tail = NULL; 261 } 262 dprintso(so, 0, 263 ("so_sock2stream(%p): moving T_CONN_IND\n", 264 so)); 265 266 /* Drop lock across put() */ 267 mutex_exit(&so->so_lock); 268 put(rq, mp); 269 mutex_enter(&so->so_lock); 270 } 271 272 exit: 273 ASSERT(MUTEX_HELD(&so->so_lock)); 274 so_unlock_single(so, SOLOCKED); 275 mutex_exit(&so->so_lock); 276 return (error); 277 } 278 279 /* 280 * Covert a stream back to a socket. This is invoked when the illusory 281 * sockmod is pushed on a stream (where the stream was "created" by 282 * popping the illusory sockmod). 283 * This routine can not recreate the socket state (certain aspects of 284 * it like urgent data state and the bound/connected addresses for AF_UNIX 285 * sockets can not be recreated by asking the transport for information). 286 * Thus this routine implicitly assumes that the socket is in an initial 287 * state (as if it was just created). It flushes any messages queued on the 288 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 289 */ 290 void 291 so_stream2sock(struct sonode *so) 292 { 293 struct vnode *vp = SOTOV(so); 294 295 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 296 297 mutex_enter(&so->so_lock); 298 so_lock_single(so); 299 ASSERT(so->so_version == SOV_STREAM); 300 so->so_version = SOV_SOCKSTREAM; 301 so->so_pushcnt = 0; 302 mutex_exit(&so->so_lock); 303 304 /* 305 * Set a permenent error to force any thread in sorecvmsg to 306 * return (and drop SOREADLOCKED). Clear the error once 307 * we have SOREADLOCKED. 308 * This makes a read sleeping during the I_PUSH of sockmod return 309 * EIO. 310 */ 311 strsetrerror(SOTOV(so), EIO, 1, NULL); 312 313 /* 314 * Get the read lock before flushing data to avoid 315 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 316 */ 317 mutex_enter(&so->so_lock); 318 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 319 mutex_exit(&so->so_lock); 320 321 strsetrerror(SOTOV(so), 0, 0, NULL); 322 so_installhooks(so); 323 324 /* 325 * Flush everything on the read queue. 326 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 327 * remain; those types of messages would confuse sockfs. 328 */ 329 strflushrq(vp, FLUSHALL); 330 mutex_enter(&so->so_lock); 331 332 /* 333 * Flush the T_DISCON_IND on so_discon_ind_mp. 334 */ 335 so_flush_discon_ind(so); 336 so_unlock_read(so); /* Clear SOREADLOCKED */ 337 338 so_unlock_single(so, SOLOCKED); 339 mutex_exit(&so->so_lock); 340 } 341 342 /* 343 * Install the hooks in the stream head. 344 */ 345 void 346 so_installhooks(struct sonode *so) 347 { 348 struct vnode *vp = SOTOV(so); 349 350 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 351 strsock_proto, strsock_misc); 352 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 353 } 354 355 /* 356 * Remove the hooks in the stream head. 357 */ 358 static void 359 so_removehooks(struct sonode *so) 360 { 361 struct vnode *vp = SOTOV(so); 362 363 strsetrputhooks(vp, 0, NULL, NULL); 364 strsetwputhooks(vp, 0, STRTIMOUT); 365 /* 366 * Leave read behavior as it would have been for a normal 367 * stream i.e. a read of an M_PROTO will fail. 368 */ 369 } 370 371 /* 372 * Initialize the streams side of a socket including 373 * T_info_req/ack processing. If tso is not NULL its values are used thereby 374 * avoiding the T_INFO_REQ. 375 */ 376 int 377 so_strinit(struct sonode *so, struct sonode *tso) 378 { 379 struct vnode *vp = SOTOV(so); 380 struct stdata *stp; 381 mblk_t *mp; 382 int error; 383 384 dprintso(so, 1, ("so_strinit(%p)\n", so)); 385 386 /* Preallocate an unbind_req message */ 387 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 388 mutex_enter(&so->so_lock); 389 so->so_unbind_mp = mp; 390 #ifdef DEBUG 391 so->so_options = so_default_options; 392 #endif /* DEBUG */ 393 mutex_exit(&so->so_lock); 394 395 so_installhooks(so); 396 397 /* 398 * The T_CAPABILITY_REQ should be the first message sent down because 399 * at least TCP has a fast-path for this which avoids timeouts while 400 * waiting for the T_CAPABILITY_ACK under high system load. 401 */ 402 if (tso == NULL) { 403 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 404 if (error) 405 return (error); 406 } else { 407 mutex_enter(&so->so_lock); 408 so->so_tsdu_size = tso->so_tsdu_size; 409 so->so_etsdu_size = tso->so_etsdu_size; 410 so->so_addr_size = tso->so_addr_size; 411 so->so_opt_size = tso->so_opt_size; 412 so->so_tidu_size = tso->so_tidu_size; 413 so->so_serv_type = tso->so_serv_type; 414 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 415 mutex_exit(&so->so_lock); 416 417 /* the following do_tcapability may update so->so_mode */ 418 if ((tso->so_serv_type != T_CLTS) && 419 !(tso->so_state & SS_DIRECT)) { 420 error = do_tcapability(so, TC1_ACCEPTOR_ID); 421 if (error) 422 return (error); 423 } 424 } 425 /* 426 * If the addr_size is 0 we treat it as already bound 427 * and connected. This is used by the routing socket. 428 * We set the addr_size to something to allocate a the address 429 * structures. 430 */ 431 if (so->so_addr_size == 0) { 432 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 433 /* Address size can vary with address families. */ 434 if (so->so_family == AF_INET6) 435 so->so_addr_size = 436 (t_scalar_t)sizeof (struct sockaddr_in6); 437 else 438 so->so_addr_size = 439 (t_scalar_t)sizeof (struct sockaddr_in); 440 ASSERT(so->so_unbind_mp); 441 } 442 /* 443 * Allocate the addresses. 444 */ 445 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 446 ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0); 447 so->so_laddr_maxlen = so->so_faddr_maxlen = 448 P2ROUNDUP(so->so_addr_size, KMEM_ALIGN); 449 so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP); 450 so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa 451 + so->so_laddr_maxlen); 452 453 if (so->so_family == AF_UNIX) { 454 /* 455 * Initialize AF_UNIX related fields. 456 */ 457 bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr)); 458 bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr)); 459 } 460 461 stp = vp->v_stream; 462 /* 463 * Have to keep minpsz at zero in order to allow write/send of zero 464 * bytes. 465 */ 466 mutex_enter(&stp->sd_lock); 467 if (stp->sd_qn_minpsz == 1) 468 stp->sd_qn_minpsz = 0; 469 mutex_exit(&stp->sd_lock); 470 471 return (0); 472 } 473 474 static void 475 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 476 { 477 so->so_tsdu_size = tia->TSDU_size; 478 so->so_etsdu_size = tia->ETSDU_size; 479 so->so_addr_size = tia->ADDR_size; 480 so->so_opt_size = tia->OPT_size; 481 so->so_tidu_size = tia->TIDU_size; 482 so->so_serv_type = tia->SERV_type; 483 switch (tia->CURRENT_state) { 484 case TS_UNBND: 485 break; 486 case TS_IDLE: 487 so->so_state |= SS_ISBOUND; 488 so->so_laddr_len = 0; 489 so->so_state &= ~SS_LADDR_VALID; 490 break; 491 case TS_DATA_XFER: 492 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 493 so->so_laddr_len = 0; 494 so->so_faddr_len = 0; 495 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 496 break; 497 } 498 499 /* 500 * Heuristics for determining the socket mode flags 501 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 502 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 503 * from the info ack. 504 */ 505 if (so->so_serv_type == T_CLTS) { 506 so->so_mode |= SM_ATOMIC | SM_ADDR; 507 } else { 508 so->so_mode |= SM_CONNREQUIRED; 509 if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2) 510 so->so_mode |= SM_EXDATA; 511 } 512 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 513 /* Semantics are to discard tail end of messages */ 514 so->so_mode |= SM_ATOMIC; 515 } 516 if (so->so_family == AF_UNIX) { 517 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 518 if (so->so_addr_size == -1) { 519 /* MAXPATHLEN + soun_family + nul termination */ 520 so->so_addr_size = (t_scalar_t)(MAXPATHLEN + 521 sizeof (short) + 1); 522 } 523 if (so->so_type == SOCK_STREAM) { 524 /* 525 * Make it into a byte-stream transport. 526 * SOCK_SEQPACKET sockets are unchanged. 527 */ 528 so->so_tsdu_size = 0; 529 } 530 } else if (so->so_addr_size == -1) { 531 /* 532 * Logic extracted from sockmod - have to pick some max address 533 * length in order to preallocate the addresses. 534 */ 535 so->so_addr_size = SOA_DEFSIZE; 536 } 537 if (so->so_tsdu_size == 0) 538 so->so_mode |= SM_BYTESTREAM; 539 } 540 541 static int 542 check_tinfo(struct sonode *so) 543 { 544 /* Consistency checks */ 545 if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) { 546 eprintso(so, ("service type and socket type mismatch\n")); 547 eprintsoline(so, EPROTO); 548 return (EPROTO); 549 } 550 if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) { 551 eprintso(so, ("service type and socket type mismatch\n")); 552 eprintsoline(so, EPROTO); 553 return (EPROTO); 554 } 555 if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) { 556 eprintso(so, ("service type and socket type mismatch\n")); 557 eprintsoline(so, EPROTO); 558 return (EPROTO); 559 } 560 if (so->so_family == AF_INET && 561 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 562 eprintso(so, 563 ("AF_INET must have sockaddr_in address length. Got %d\n", 564 so->so_addr_size)); 565 eprintsoline(so, EMSGSIZE); 566 return (EMSGSIZE); 567 } 568 if (so->so_family == AF_INET6 && 569 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 570 eprintso(so, 571 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 572 so->so_addr_size)); 573 eprintsoline(so, EMSGSIZE); 574 return (EMSGSIZE); 575 } 576 577 dprintso(so, 1, ( 578 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 579 so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size, 580 so->so_addr_size, so->so_opt_size, 581 so->so_tidu_size)); 582 dprintso(so, 1, ("tinfo: so_state %s\n", 583 pr_state(so->so_state, so->so_mode))); 584 return (0); 585 } 586 587 /* 588 * Send down T_info_req and wait for the ack. 589 * Record interesting T_info_ack values in the sonode. 590 */ 591 static int 592 do_tinfo(struct sonode *so) 593 { 594 struct T_info_req tir; 595 mblk_t *mp; 596 int error; 597 598 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 599 600 if (so_no_tinfo) { 601 so->so_addr_size = 0; 602 return (0); 603 } 604 605 dprintso(so, 1, ("do_tinfo(%p)\n", so)); 606 607 /* Send T_INFO_REQ */ 608 tir.PRIM_type = T_INFO_REQ; 609 mp = soallocproto1(&tir, sizeof (tir), 610 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 611 _ALLOC_INTR); 612 if (mp == NULL) { 613 eprintsoline(so, ENOBUFS); 614 return (ENOBUFS); 615 } 616 /* T_INFO_REQ has to be M_PCPROTO */ 617 DB_TYPE(mp) = M_PCPROTO; 618 619 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 620 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 621 if (error) { 622 eprintsoline(so, error); 623 return (error); 624 } 625 mutex_enter(&so->so_lock); 626 /* Wait for T_INFO_ACK */ 627 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 628 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 629 mutex_exit(&so->so_lock); 630 eprintsoline(so, error); 631 return (error); 632 } 633 634 ASSERT(mp); 635 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 636 mutex_exit(&so->so_lock); 637 freemsg(mp); 638 return (check_tinfo(so)); 639 } 640 641 /* 642 * Send down T_capability_req and wait for the ack. 643 * Record interesting T_capability_ack values in the sonode. 644 */ 645 static int 646 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 647 { 648 struct T_capability_req tcr; 649 struct T_capability_ack *tca; 650 mblk_t *mp; 651 int error; 652 653 ASSERT(cap_bits1 != 0); 654 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 655 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 656 657 if (so->so_provinfo->tpi_capability == PI_NO) 658 return (do_tinfo(so)); 659 660 if (so_no_tinfo) { 661 so->so_addr_size = 0; 662 if ((cap_bits1 &= ~TC1_INFO) == 0) 663 return (0); 664 } 665 666 dprintso(so, 1, ("do_tcapability(%p)\n", so)); 667 668 /* Send T_CAPABILITY_REQ */ 669 tcr.PRIM_type = T_CAPABILITY_REQ; 670 tcr.CAP_bits1 = cap_bits1; 671 mp = soallocproto1(&tcr, sizeof (tcr), 672 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 673 _ALLOC_INTR); 674 if (mp == NULL) { 675 eprintsoline(so, ENOBUFS); 676 return (ENOBUFS); 677 } 678 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 679 DB_TYPE(mp) = M_PCPROTO; 680 681 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 682 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 683 if (error) { 684 eprintsoline(so, error); 685 return (error); 686 } 687 mutex_enter(&so->so_lock); 688 /* Wait for T_CAPABILITY_ACK */ 689 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 690 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 691 mutex_exit(&so->so_lock); 692 PI_PROVLOCK(so->so_provinfo); 693 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) 694 so->so_provinfo->tpi_capability = PI_NO; 695 PI_PROVUNLOCK(so->so_provinfo); 696 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 697 if (cap_bits1 & TC1_INFO) { 698 /* 699 * If the T_CAPABILITY_REQ timed out and then a 700 * T_INFO_REQ gets a protocol error, most likely 701 * the capability was slow (vs. unsupported). Return 702 * ENOSR for this case as a best guess. 703 */ 704 if (error == ETIME) { 705 return ((error = do_tinfo(so)) == EPROTO ? 706 ENOSR : error); 707 } 708 return (do_tinfo(so)); 709 } 710 return (0); 711 } 712 713 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) { 714 PI_PROVLOCK(so->so_provinfo); 715 so->so_provinfo->tpi_capability = PI_YES; 716 PI_PROVUNLOCK(so->so_provinfo); 717 } 718 719 ASSERT(mp); 720 tca = (struct T_capability_ack *)mp->b_rptr; 721 722 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 723 724 cap_bits1 = tca->CAP_bits1; 725 726 if (cap_bits1 & TC1_ACCEPTOR_ID) { 727 so->so_acceptor_id = tca->ACCEPTOR_id; 728 so->so_mode |= SM_ACCEPTOR_ID; 729 } 730 731 if (cap_bits1 & TC1_INFO) 732 copy_tinfo(so, &tca->INFO_ack); 733 734 mutex_exit(&so->so_lock); 735 freemsg(mp); 736 737 if (cap_bits1 & TC1_INFO) 738 return (check_tinfo(so)); 739 740 return (0); 741 } 742 743 /* 744 * Retrieve and clear the socket error. 745 */ 746 int 747 sogeterr(struct sonode *so) 748 { 749 int error; 750 751 ASSERT(MUTEX_HELD(&so->so_lock)); 752 753 error = so->so_error; 754 so->so_error = 0; 755 756 return (error); 757 } 758 759 /* 760 * This routine is registered with the stream head to retrieve read 761 * side errors. 762 * It does not clear the socket error for a peeking read side operation. 763 * It the error is to be cleared it sets *clearerr. 764 */ 765 int 766 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 767 { 768 struct sonode *so = VTOSO(vp); 769 int error; 770 771 mutex_enter(&so->so_lock); 772 if (ispeek) { 773 error = so->so_error; 774 *clearerr = 0; 775 } else { 776 error = so->so_error; 777 so->so_error = 0; 778 *clearerr = 1; 779 } 780 mutex_exit(&so->so_lock); 781 return (error); 782 } 783 784 /* 785 * This routine is registered with the stream head to retrieve write 786 * side errors. 787 * It does not clear the socket error for a peeking read side operation. 788 * It the error is to be cleared it sets *clearerr. 789 */ 790 int 791 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 792 { 793 struct sonode *so = VTOSO(vp); 794 int error; 795 796 mutex_enter(&so->so_lock); 797 if (so->so_state & SS_CANTSENDMORE) { 798 error = EPIPE; 799 *clearerr = 0; 800 } else { 801 error = so->so_error; 802 if (ispeek) { 803 *clearerr = 0; 804 } else { 805 so->so_error = 0; 806 *clearerr = 1; 807 } 808 } 809 mutex_exit(&so->so_lock); 810 return (error); 811 } 812 813 /* 814 * Set a nonpersistent read and write error on the socket. 815 * Used when there is a T_uderror_ind for a connected socket. 816 * The caller also needs to call strsetrerror and strsetwerror 817 * after dropping the lock. 818 */ 819 void 820 soseterror(struct sonode *so, int error) 821 { 822 ASSERT(error != 0); 823 824 ASSERT(MUTEX_HELD(&so->so_lock)); 825 so->so_error = (ushort_t)error; 826 } 827 828 void 829 soisconnecting(struct sonode *so) 830 { 831 ASSERT(MUTEX_HELD(&so->so_lock)); 832 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 833 so->so_state |= SS_ISCONNECTING; 834 cv_broadcast(&so->so_state_cv); 835 } 836 837 void 838 soisconnected(struct sonode *so) 839 { 840 ASSERT(MUTEX_HELD(&so->so_lock)); 841 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 842 so->so_state |= SS_ISCONNECTED; 843 cv_broadcast(&so->so_state_cv); 844 } 845 846 /* 847 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 848 */ 849 void 850 soisdisconnected(struct sonode *so, int error) 851 { 852 ASSERT(MUTEX_HELD(&so->so_lock)); 853 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING| 854 SS_LADDR_VALID|SS_FADDR_VALID); 855 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 856 so->so_error = (ushort_t)error; 857 if (so->so_peercred != NULL) { 858 crfree(so->so_peercred); 859 so->so_peercred = NULL; 860 } 861 cv_broadcast(&so->so_state_cv); 862 } 863 864 /* 865 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 866 * Does not affect write side. 867 * The caller also has to call strsetrerror. 868 */ 869 static void 870 sobreakconn(struct sonode *so, int error) 871 { 872 ASSERT(MUTEX_HELD(&so->so_lock)); 873 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 874 so->so_error = (ushort_t)error; 875 cv_broadcast(&so->so_state_cv); 876 } 877 878 /* 879 * Can no longer send. 880 * Caller must also call strsetwerror. 881 * 882 * We mark the peer address as no longer valid for getpeername, but 883 * leave it around for so_unix_close to notify the peer (that 884 * transport has no addressing held at that layer). 885 */ 886 void 887 socantsendmore(struct sonode *so) 888 { 889 ASSERT(MUTEX_HELD(&so->so_lock)); 890 so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE; 891 cv_broadcast(&so->so_state_cv); 892 } 893 894 /* 895 * The caller must call strseteof(,1) as well as this routine 896 * to change the socket state. 897 */ 898 void 899 socantrcvmore(struct sonode *so) 900 { 901 ASSERT(MUTEX_HELD(&so->so_lock)); 902 so->so_state |= SS_CANTRCVMORE; 903 cv_broadcast(&so->so_state_cv); 904 } 905 906 /* 907 * The caller has sent down a "request_prim" primitive and wants to wait for 908 * an ack ("ack_prim") or an T_ERROR_ACK for it. 909 * The specified "ack_prim" can be a T_OK_ACK. 910 * 911 * Assumes that all the TPI acks are M_PCPROTO messages. 912 * 913 * Note that the socket is single-threaded (using so_lock_single) 914 * for all operations that generate TPI ack messages. Since 915 * only TPI ack messages are M_PCPROTO we should never receive 916 * anything except either the ack we are expecting or a T_ERROR_ACK 917 * for the same primitive. 918 */ 919 int 920 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 921 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 922 { 923 mblk_t *mp; 924 union T_primitives *tpr; 925 int error; 926 927 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 928 so, request_prim, ack_prim, min_size, mpp, wait)); 929 930 ASSERT(MUTEX_HELD(&so->so_lock)); 931 932 error = sowaitack(so, &mp, wait); 933 if (error) 934 return (error); 935 936 dprintso(so, 1, ("got msg %p\n", mp)); 937 if (DB_TYPE(mp) != M_PCPROTO || 938 MBLKL(mp) < sizeof (tpr->type)) { 939 freemsg(mp); 940 eprintsoline(so, EPROTO); 941 return (EPROTO); 942 } 943 tpr = (union T_primitives *)mp->b_rptr; 944 /* 945 * Did we get the primitive that we were asking for? 946 * For T_OK_ACK we also check that it matches the request primitive. 947 */ 948 if (tpr->type == ack_prim && 949 (ack_prim != T_OK_ACK || 950 tpr->ok_ack.CORRECT_prim == request_prim)) { 951 if (MBLKL(mp) >= (ssize_t)min_size) { 952 /* Found what we are looking for */ 953 *mpp = mp; 954 return (0); 955 } 956 /* Too short */ 957 freemsg(mp); 958 eprintsoline(so, EPROTO); 959 return (EPROTO); 960 } 961 962 if (tpr->type == T_ERROR_ACK && 963 tpr->error_ack.ERROR_prim == request_prim) { 964 /* Error to the primitive we were looking for */ 965 if (tpr->error_ack.TLI_error == TSYSERR) { 966 error = tpr->error_ack.UNIX_error; 967 } else { 968 error = tlitosyserr(tpr->error_ack.TLI_error); 969 } 970 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 971 tpr->error_ack.ERROR_prim, 972 tpr->error_ack.TLI_error, 973 tpr->error_ack.UNIX_error, 974 error)); 975 freemsg(mp); 976 return (error); 977 } 978 /* 979 * Wrong primitive or T_ERROR_ACK for the wrong primitive 980 */ 981 #ifdef DEBUG 982 if (tpr->type == T_ERROR_ACK) { 983 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 984 tpr->error_ack.ERROR_prim, 985 tpr->error_ack.TLI_error, 986 tpr->error_ack.UNIX_error)); 987 } else if (tpr->type == T_OK_ACK) { 988 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 989 tpr->ok_ack.CORRECT_prim, 990 ack_prim, request_prim)); 991 } else { 992 dprintso(so, 0, 993 ("unexpected primitive %d, expected %d for %d\n", 994 tpr->type, ack_prim, request_prim)); 995 } 996 #endif /* DEBUG */ 997 998 freemsg(mp); 999 eprintsoline(so, EPROTO); 1000 return (EPROTO); 1001 } 1002 1003 /* 1004 * Wait for a T_OK_ACK for the specified primitive. 1005 */ 1006 int 1007 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1008 { 1009 mblk_t *mp; 1010 int error; 1011 1012 error = sowaitprim(so, request_prim, T_OK_ACK, 1013 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1014 if (error) 1015 return (error); 1016 freemsg(mp); 1017 return (0); 1018 } 1019 1020 /* 1021 * Queue a received TPI ack message on so_ack_mp. 1022 */ 1023 void 1024 soqueueack(struct sonode *so, mblk_t *mp) 1025 { 1026 if (DB_TYPE(mp) != M_PCPROTO) { 1027 zcmn_err(getzoneid(), CE_WARN, 1028 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1029 *(t_scalar_t *)mp->b_rptr); 1030 freemsg(mp); 1031 return; 1032 } 1033 1034 mutex_enter(&so->so_lock); 1035 if (so->so_ack_mp != NULL) { 1036 dprintso(so, 1, ("so_ack_mp already set\n")); 1037 freemsg(so->so_ack_mp); 1038 so->so_ack_mp = NULL; 1039 } 1040 so->so_ack_mp = mp; 1041 cv_broadcast(&so->so_ack_cv); 1042 mutex_exit(&so->so_lock); 1043 } 1044 1045 /* 1046 * Wait for a TPI ack ignoring signals and errors. 1047 */ 1048 int 1049 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1050 { 1051 ASSERT(MUTEX_HELD(&so->so_lock)); 1052 1053 while (so->so_ack_mp == NULL) { 1054 #ifdef SOCK_TEST 1055 if (wait == 0 && sock_test_timelimit != 0) 1056 wait = sock_test_timelimit; 1057 #endif 1058 if (wait != 0) { 1059 /* 1060 * Only wait for the time limit. 1061 */ 1062 clock_t now; 1063 1064 time_to_wait(&now, wait); 1065 if (cv_timedwait(&so->so_ack_cv, &so->so_lock, 1066 now) == -1) { 1067 eprintsoline(so, ETIME); 1068 return (ETIME); 1069 } 1070 } 1071 else 1072 cv_wait(&so->so_ack_cv, &so->so_lock); 1073 } 1074 *mpp = so->so_ack_mp; 1075 #ifdef DEBUG 1076 { 1077 union T_primitives *tpr; 1078 mblk_t *mp = *mpp; 1079 1080 tpr = (union T_primitives *)mp->b_rptr; 1081 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1082 ASSERT(tpr->type == T_OK_ACK || 1083 tpr->type == T_ERROR_ACK || 1084 tpr->type == T_BIND_ACK || 1085 tpr->type == T_CAPABILITY_ACK || 1086 tpr->type == T_INFO_ACK || 1087 tpr->type == T_OPTMGMT_ACK); 1088 } 1089 #endif /* DEBUG */ 1090 so->so_ack_mp = NULL; 1091 return (0); 1092 } 1093 1094 /* 1095 * Queue a received T_CONN_IND message on so_conn_ind_head/tail. 1096 */ 1097 void 1098 soqueueconnind(struct sonode *so, mblk_t *mp) 1099 { 1100 if (DB_TYPE(mp) != M_PROTO) { 1101 zcmn_err(getzoneid(), CE_WARN, 1102 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1103 freemsg(mp); 1104 return; 1105 } 1106 1107 mutex_enter(&so->so_lock); 1108 ASSERT(mp->b_next == NULL); 1109 if (so->so_conn_ind_head == NULL) { 1110 so->so_conn_ind_head = mp; 1111 so->so_state |= SS_HASCONNIND; 1112 } else { 1113 ASSERT(so->so_state & SS_HASCONNIND); 1114 ASSERT(so->so_conn_ind_tail->b_next == NULL); 1115 so->so_conn_ind_tail->b_next = mp; 1116 } 1117 so->so_conn_ind_tail = mp; 1118 /* Wakeup a single consumer of the T_CONN_IND */ 1119 cv_signal(&so->so_connind_cv); 1120 mutex_exit(&so->so_lock); 1121 } 1122 1123 /* 1124 * Wait for a T_CONN_IND. 1125 * Don't wait if nonblocking. 1126 * Accept signals and socket errors. 1127 */ 1128 int 1129 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1130 { 1131 mblk_t *mp; 1132 int error = 0; 1133 1134 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1135 mutex_enter(&so->so_lock); 1136 check_error: 1137 if (so->so_error) { 1138 error = sogeterr(so); 1139 if (error) { 1140 mutex_exit(&so->so_lock); 1141 return (error); 1142 } 1143 } 1144 1145 if (so->so_conn_ind_head == NULL) { 1146 if (fmode & (FNDELAY|FNONBLOCK)) { 1147 error = EWOULDBLOCK; 1148 goto done; 1149 } 1150 if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) { 1151 error = EINTR; 1152 goto done; 1153 } 1154 goto check_error; 1155 } 1156 mp = so->so_conn_ind_head; 1157 so->so_conn_ind_head = mp->b_next; 1158 mp->b_next = NULL; 1159 if (so->so_conn_ind_head == NULL) { 1160 ASSERT(so->so_conn_ind_tail == mp); 1161 so->so_conn_ind_tail = NULL; 1162 so->so_state &= ~SS_HASCONNIND; 1163 } 1164 *mpp = mp; 1165 done: 1166 mutex_exit(&so->so_lock); 1167 return (error); 1168 } 1169 1170 /* 1171 * Flush a T_CONN_IND matching the sequence number from the list. 1172 * Return zero if found; non-zero otherwise. 1173 * This is called very infrequently thus it is ok to do a linear search. 1174 */ 1175 int 1176 soflushconnind(struct sonode *so, t_scalar_t seqno) 1177 { 1178 mblk_t *prevmp, *mp; 1179 struct T_conn_ind *tci; 1180 1181 mutex_enter(&so->so_lock); 1182 for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL; 1183 prevmp = mp, mp = mp->b_next) { 1184 tci = (struct T_conn_ind *)mp->b_rptr; 1185 if (tci->SEQ_number == seqno) { 1186 dprintso(so, 1, 1187 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1188 /* Deleting last? */ 1189 if (so->so_conn_ind_tail == mp) { 1190 so->so_conn_ind_tail = prevmp; 1191 } 1192 if (prevmp == NULL) { 1193 /* Deleting first */ 1194 so->so_conn_ind_head = mp->b_next; 1195 } else { 1196 prevmp->b_next = mp->b_next; 1197 } 1198 mp->b_next = NULL; 1199 if (so->so_conn_ind_head == NULL) { 1200 ASSERT(so->so_conn_ind_tail == NULL); 1201 so->so_state &= ~SS_HASCONNIND; 1202 } else { 1203 ASSERT(so->so_conn_ind_tail != NULL); 1204 } 1205 so->so_error = ECONNABORTED; 1206 mutex_exit(&so->so_lock); 1207 1208 /* 1209 * T_KSSL_PROXY_CONN_IND may carry a handle for 1210 * an SSL context, and needs to be released. 1211 */ 1212 if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) && 1213 (mp->b_cont != NULL)) { 1214 kssl_ctx_t kssl_ctx; 1215 1216 ASSERT(MBLKL(mp->b_cont) == 1217 sizeof (kssl_ctx_t)); 1218 kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr); 1219 kssl_release_ctx(kssl_ctx); 1220 } 1221 freemsg(mp); 1222 return (0); 1223 } 1224 } 1225 mutex_exit(&so->so_lock); 1226 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1227 return (-1); 1228 } 1229 1230 /* 1231 * Wait until the socket is connected or there is an error. 1232 * fmode should contain any nonblocking flags. nosig should be 1233 * set if the caller does not want the wait to be interrupted by a signal. 1234 */ 1235 int 1236 sowaitconnected(struct sonode *so, int fmode, int nosig) 1237 { 1238 int error; 1239 1240 ASSERT(MUTEX_HELD(&so->so_lock)); 1241 1242 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1243 SS_ISCONNECTING && so->so_error == 0) { 1244 1245 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", so)); 1246 if (fmode & (FNDELAY|FNONBLOCK)) 1247 return (EINPROGRESS); 1248 1249 if (nosig) 1250 cv_wait(&so->so_state_cv, &so->so_lock); 1251 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1252 /* 1253 * Return EINTR and let the application use 1254 * nonblocking techniques for detecting when 1255 * the connection has been established. 1256 */ 1257 return (EINTR); 1258 } 1259 dprintso(so, 1, ("awoken on %p\n", so)); 1260 } 1261 1262 if (so->so_error != 0) { 1263 error = sogeterr(so); 1264 ASSERT(error != 0); 1265 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1266 return (error); 1267 } 1268 if (!(so->so_state & SS_ISCONNECTED)) { 1269 /* 1270 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1271 * zero errno. Or another thread could have consumed so_error 1272 * e.g. by calling read. 1273 */ 1274 error = ECONNREFUSED; 1275 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1276 return (error); 1277 } 1278 return (0); 1279 } 1280 1281 1282 /* 1283 * Handle the signal generation aspect of urgent data. 1284 */ 1285 static void 1286 so_oob_sig(struct sonode *so, int extrasig, 1287 strsigset_t *signals, strpollset_t *pollwakeups) 1288 { 1289 ASSERT(MUTEX_HELD(&so->so_lock)); 1290 1291 ASSERT(so_verify_oobstate(so)); 1292 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1293 if (so->so_oobsigcnt > so->so_oobcnt) { 1294 /* 1295 * Signal has already been generated once for this 1296 * urgent "event". However, since TCP can receive updated 1297 * urgent pointers we still generate a signal. 1298 */ 1299 ASSERT(so->so_state & SS_OOBPEND); 1300 if (extrasig) { 1301 *signals |= S_RDBAND; 1302 *pollwakeups |= POLLRDBAND; 1303 } 1304 return; 1305 } 1306 1307 so->so_oobsigcnt++; 1308 ASSERT(so->so_oobsigcnt > 0); /* Wraparound */ 1309 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1310 1311 /* 1312 * Record (for select/poll) that urgent data is pending. 1313 */ 1314 so->so_state |= SS_OOBPEND; 1315 /* 1316 * New urgent data on the way so forget about any old 1317 * urgent data. 1318 */ 1319 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1320 if (so->so_oobmsg != NULL) { 1321 dprintso(so, 1, ("sock: discarding old oob\n")); 1322 freemsg(so->so_oobmsg); 1323 so->so_oobmsg = NULL; 1324 } 1325 *signals |= S_RDBAND; 1326 *pollwakeups |= POLLRDBAND; 1327 ASSERT(so_verify_oobstate(so)); 1328 } 1329 1330 /* 1331 * Handle the processing of the T_EXDATA_IND with urgent data. 1332 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1333 */ 1334 /* ARGSUSED2 */ 1335 static mblk_t * 1336 so_oob_exdata(struct sonode *so, mblk_t *mp, 1337 strsigset_t *signals, strpollset_t *pollwakeups) 1338 { 1339 ASSERT(MUTEX_HELD(&so->so_lock)); 1340 1341 ASSERT(so_verify_oobstate(so)); 1342 1343 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1344 1345 so->so_oobcnt++; 1346 ASSERT(so->so_oobcnt > 0); /* wraparound? */ 1347 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1348 1349 /* 1350 * Set MSGMARK for SIOCATMARK. 1351 */ 1352 mp->b_flag |= MSGMARK; 1353 1354 ASSERT(so_verify_oobstate(so)); 1355 return (mp); 1356 } 1357 1358 /* 1359 * Handle the processing of the actual urgent data. 1360 * Returns the data mblk if it should be queued on the read queue. 1361 */ 1362 static mblk_t * 1363 so_oob_data(struct sonode *so, mblk_t *mp, 1364 strsigset_t *signals, strpollset_t *pollwakeups) 1365 { 1366 ASSERT(MUTEX_HELD(&so->so_lock)); 1367 1368 ASSERT(so_verify_oobstate(so)); 1369 1370 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1371 ASSERT(mp != NULL); 1372 /* 1373 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1374 * Otherwise we store it in so_oobmsg. 1375 */ 1376 ASSERT(so->so_oobmsg == NULL); 1377 if (so->so_options & SO_OOBINLINE) { 1378 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1379 *signals |= S_INPUT | S_RDNORM; 1380 } else { 1381 *pollwakeups |= POLLRDBAND; 1382 so->so_state |= SS_HAVEOOBDATA; 1383 so->so_oobmsg = mp; 1384 mp = NULL; 1385 } 1386 ASSERT(so_verify_oobstate(so)); 1387 return (mp); 1388 } 1389 1390 /* 1391 * Caller must hold the mutex. 1392 * For delayed processing, save the T_DISCON_IND received 1393 * from below on so_discon_ind_mp. 1394 * When the message is processed the framework will call: 1395 * (*func)(so, mp); 1396 */ 1397 static void 1398 so_save_discon_ind(struct sonode *so, 1399 mblk_t *mp, 1400 void (*func)(struct sonode *so, mblk_t *)) 1401 { 1402 ASSERT(MUTEX_HELD(&so->so_lock)); 1403 1404 /* 1405 * Discard new T_DISCON_IND if we have already received another. 1406 * Currently the earlier message can either be on so_discon_ind_mp 1407 * or being processed. 1408 */ 1409 if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1410 zcmn_err(getzoneid(), CE_WARN, 1411 "sockfs: received unexpected additional T_DISCON_IND\n"); 1412 freemsg(mp); 1413 return; 1414 } 1415 mp->b_prev = (mblk_t *)func; 1416 mp->b_next = NULL; 1417 so->so_discon_ind_mp = mp; 1418 } 1419 1420 /* 1421 * Caller must hold the mutex and make sure that either SOLOCKED 1422 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1423 * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp. 1424 * Need to ensure that strsock_proto() will not end up sleeping for 1425 * SOASYNC_UNBIND, while executing this function. 1426 */ 1427 void 1428 so_drain_discon_ind(struct sonode *so) 1429 { 1430 mblk_t *bp; 1431 void (*func)(struct sonode *so, mblk_t *); 1432 1433 ASSERT(MUTEX_HELD(&so->so_lock)); 1434 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1435 1436 /* Process T_DISCON_IND on so_discon_ind_mp */ 1437 if ((bp = so->so_discon_ind_mp) != NULL) { 1438 so->so_discon_ind_mp = NULL; 1439 func = (void (*)())bp->b_prev; 1440 bp->b_prev = NULL; 1441 1442 /* 1443 * This (*func) is supposed to generate a message downstream 1444 * and we need to have a flag set until the corresponding 1445 * upstream message reaches stream head. 1446 * When processing T_DISCON_IND in strsock_discon_ind 1447 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1448 * drop the flag after we get the ACK in strsock_proto. 1449 */ 1450 (void) (*func)(so, bp); 1451 } 1452 } 1453 1454 /* 1455 * Caller must hold the mutex. 1456 * Remove the T_DISCON_IND on so_discon_ind_mp. 1457 */ 1458 void 1459 so_flush_discon_ind(struct sonode *so) 1460 { 1461 mblk_t *bp; 1462 1463 ASSERT(MUTEX_HELD(&so->so_lock)); 1464 1465 /* 1466 * Remove T_DISCON_IND mblk at so_discon_ind_mp. 1467 */ 1468 if ((bp = so->so_discon_ind_mp) != NULL) { 1469 so->so_discon_ind_mp = NULL; 1470 bp->b_prev = NULL; 1471 freemsg(bp); 1472 } 1473 } 1474 1475 /* 1476 * Caller must hold the mutex. 1477 * 1478 * This function is used to process the T_DISCON_IND message. It does 1479 * immediate processing when called from strsock_proto and delayed 1480 * processing of discon_ind saved on so_discon_ind_mp when called from 1481 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1482 * so_discon_ind_mp for delayed processing, this function is registered 1483 * as the callback function to process the message. 1484 * 1485 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1486 * unbind operation, and should be released only after we receive the ACK 1487 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1488 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1489 * sent from either this function or tcp_unbind(), flushing away any TPI 1490 * message that is being sent down and stays in a lower module's queue. 1491 * 1492 * This function drops so_lock and grabs it again. 1493 */ 1494 static void 1495 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1496 { 1497 struct vnode *vp; 1498 struct stdata *stp; 1499 union T_primitives *tpr; 1500 struct T_unbind_req *ubr; 1501 mblk_t *mp; 1502 int error; 1503 1504 ASSERT(MUTEX_HELD(&so->so_lock)); 1505 ASSERT(discon_mp); 1506 ASSERT(discon_mp->b_rptr); 1507 1508 tpr = (union T_primitives *)discon_mp->b_rptr; 1509 ASSERT(tpr->type == T_DISCON_IND); 1510 1511 vp = SOTOV(so); 1512 stp = vp->v_stream; 1513 ASSERT(stp); 1514 1515 /* 1516 * Not a listener 1517 */ 1518 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1519 1520 /* 1521 * This assumes that the name space for DISCON_reason 1522 * is the errno name space. 1523 */ 1524 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1525 1526 /* 1527 * Unbind with the transport without blocking. 1528 * If we've already received a T_DISCON_IND do not unbind. 1529 * 1530 * If there is no preallocated unbind message, we have already 1531 * unbound with the transport 1532 * 1533 * If the socket is not bound, no need to unbind. 1534 */ 1535 mp = so->so_unbind_mp; 1536 if (mp == NULL) { 1537 ASSERT(!(so->so_state & SS_ISBOUND)); 1538 mutex_exit(&so->so_lock); 1539 } else if (!(so->so_state & SS_ISBOUND)) { 1540 mutex_exit(&so->so_lock); 1541 } else { 1542 so->so_unbind_mp = NULL; 1543 1544 /* 1545 * Is another T_DISCON_IND being processed. 1546 */ 1547 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1548 1549 /* 1550 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1551 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1552 * only after we receive the ACK in strsock_proto. 1553 */ 1554 so->so_flag |= SOASYNC_UNBIND; 1555 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1556 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1557 mutex_exit(&so->so_lock); 1558 1559 /* 1560 * Send down T_UNBIND_REQ ignoring flow control. 1561 * XXX Assumes that MSG_IGNFLOW implies that this thread 1562 * does not run service procedures. 1563 */ 1564 ASSERT(DB_TYPE(mp) == M_PROTO); 1565 ubr = (struct T_unbind_req *)mp->b_rptr; 1566 mp->b_wptr += sizeof (*ubr); 1567 ubr->PRIM_type = T_UNBIND_REQ; 1568 1569 /* 1570 * Flush the read and write side (except stream head read queue) 1571 * and send down T_UNBIND_REQ. 1572 */ 1573 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1574 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1575 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1576 /* LINTED - warning: statement has no consequent: if */ 1577 if (error) { 1578 eprintsoline(so, error); 1579 } 1580 } 1581 1582 if (tpr->discon_ind.DISCON_reason != 0) 1583 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1584 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1585 strseteof(SOTOV(so), 1); 1586 /* 1587 * strseteof takes care of read side wakeups, 1588 * pollwakeups, and signals. 1589 */ 1590 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1591 freemsg(discon_mp); 1592 1593 1594 pollwakeup(&stp->sd_pollist, POLLOUT); 1595 mutex_enter(&stp->sd_lock); 1596 1597 /* 1598 * Wake sleeping write 1599 */ 1600 if (stp->sd_flag & WSLEEP) { 1601 stp->sd_flag &= ~WSLEEP; 1602 cv_broadcast(&stp->sd_wrq->q_wait); 1603 } 1604 1605 /* 1606 * strsendsig can handle multiple signals with a 1607 * single call. Send SIGPOLL for S_OUTPUT event. 1608 */ 1609 if (stp->sd_sigflags & S_OUTPUT) 1610 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1611 1612 mutex_exit(&stp->sd_lock); 1613 mutex_enter(&so->so_lock); 1614 } 1615 1616 /* 1617 * This routine is registered with the stream head to receive M_PROTO 1618 * and M_PCPROTO messages. 1619 * 1620 * Returns NULL if the message was consumed. 1621 * Returns an mblk to make that mblk be processed (and queued) by the stream 1622 * head. 1623 * 1624 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1625 * *pollwakeups) for the stream head to take action on. Note that since 1626 * sockets always deliver SIGIO for every new piece of data this routine 1627 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1628 * 1629 * This routine handles all data related TPI messages independent of 1630 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1631 * arrive on a SOCK_STREAM. 1632 */ 1633 static mblk_t * 1634 strsock_proto(vnode_t *vp, mblk_t *mp, 1635 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1636 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1637 { 1638 union T_primitives *tpr; 1639 struct sonode *so; 1640 1641 so = VTOSO(vp); 1642 1643 dprintso(so, 1, ("strsock_proto(%p, %p)\n", vp, mp)); 1644 1645 /* Set default return values */ 1646 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1647 1648 ASSERT(DB_TYPE(mp) == M_PROTO || 1649 DB_TYPE(mp) == M_PCPROTO); 1650 1651 if (MBLKL(mp) < sizeof (tpr->type)) { 1652 /* The message is too short to even contain the primitive */ 1653 zcmn_err(getzoneid(), CE_WARN, 1654 "sockfs: Too short TPI message received. Len = %ld\n", 1655 (ptrdiff_t)(MBLKL(mp))); 1656 freemsg(mp); 1657 return (NULL); 1658 } 1659 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1660 /* The read pointer is not aligned correctly for TPI */ 1661 zcmn_err(getzoneid(), CE_WARN, 1662 "sockfs: Unaligned TPI message received. rptr = %p\n", 1663 (void *)mp->b_rptr); 1664 freemsg(mp); 1665 return (NULL); 1666 } 1667 tpr = (union T_primitives *)mp->b_rptr; 1668 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1669 1670 switch (tpr->type) { 1671 1672 case T_DATA_IND: 1673 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1674 zcmn_err(getzoneid(), CE_WARN, 1675 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1676 (ptrdiff_t)(MBLKL(mp))); 1677 freemsg(mp); 1678 return (NULL); 1679 } 1680 /* 1681 * Ignore zero-length T_DATA_IND messages. These might be 1682 * generated by some transports. 1683 * This is needed to prevent read (which skips the M_PROTO 1684 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1685 * on a non-blocking socket after select/poll has indicated 1686 * that data is available). 1687 */ 1688 if (msgdsize(mp->b_cont) == 0) { 1689 dprintso(so, 0, 1690 ("strsock_proto: zero length T_DATA_IND\n")); 1691 freemsg(mp); 1692 return (NULL); 1693 } 1694 *allmsgsigs = S_INPUT | S_RDNORM; 1695 *pollwakeups = POLLIN | POLLRDNORM; 1696 *wakeups = RSLEEP; 1697 return (mp); 1698 1699 case T_UNITDATA_IND: { 1700 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1701 void *addr; 1702 t_uscalar_t addrlen; 1703 1704 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1705 zcmn_err(getzoneid(), CE_WARN, 1706 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1707 (ptrdiff_t)(MBLKL(mp))); 1708 freemsg(mp); 1709 return (NULL); 1710 } 1711 1712 /* Is this is not a connected datagram socket? */ 1713 if ((so->so_mode & SM_CONNREQUIRED) || 1714 !(so->so_state & SS_ISCONNECTED)) { 1715 /* 1716 * Not a connected datagram socket. Look for 1717 * the SO_UNIX_CLOSE option. If such an option is found 1718 * discard the message (since it has no meaning 1719 * unless connected). 1720 */ 1721 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1722 tudi->OPT_length != 0) { 1723 void *opt; 1724 t_uscalar_t optlen = tudi->OPT_length; 1725 1726 opt = sogetoff(mp, tudi->OPT_offset, 1727 optlen, __TPI_ALIGN_SIZE); 1728 if (opt == NULL) { 1729 /* The len/off falls outside mp */ 1730 freemsg(mp); 1731 mutex_enter(&so->so_lock); 1732 soseterror(so, EPROTO); 1733 mutex_exit(&so->so_lock); 1734 zcmn_err(getzoneid(), CE_WARN, 1735 "sockfs: T_unidata_ind with " 1736 "invalid optlen/offset %u/%d\n", 1737 optlen, tudi->OPT_offset); 1738 return (NULL); 1739 } 1740 if (so_getopt_unix_close(opt, optlen)) { 1741 freemsg(mp); 1742 return (NULL); 1743 } 1744 } 1745 *allmsgsigs = S_INPUT | S_RDNORM; 1746 *pollwakeups = POLLIN | POLLRDNORM; 1747 *wakeups = RSLEEP; 1748 #ifdef C2_AUDIT 1749 if (audit_active) 1750 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1751 mp, 0); 1752 #endif /* C2_AUDIT */ 1753 return (mp); 1754 } 1755 1756 /* 1757 * A connect datagram socket. For AF_INET{,6} we verify that 1758 * the source address matches the "connected to" address. 1759 * The semantics of AF_UNIX sockets is to not verify 1760 * the source address. 1761 * Note that this source address verification is transport 1762 * specific. Thus the real fix would be to extent TPI 1763 * to allow T_CONN_REQ messages to be send to connectionless 1764 * transport providers and always let the transport provider 1765 * do whatever filtering is needed. 1766 * 1767 * The verification/filtering semantics for transports 1768 * other than AF_INET and AF_UNIX are unknown. The choice 1769 * would be to either filter using bcmp or let all messages 1770 * get through. This code does not filter other address 1771 * families since this at least allows the application to 1772 * work around any missing filtering. 1773 * 1774 * XXX Should we move filtering to UDP/ICMP??? 1775 * That would require passing e.g. a T_DISCON_REQ to UDP 1776 * when the socket becomes unconnected. 1777 */ 1778 addrlen = tudi->SRC_length; 1779 /* 1780 * The alignment restriction is really to strict but 1781 * we want enough alignment to inspect the fields of 1782 * a sockaddr_in. 1783 */ 1784 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1785 __TPI_ALIGN_SIZE); 1786 if (addr == NULL) { 1787 freemsg(mp); 1788 mutex_enter(&so->so_lock); 1789 soseterror(so, EPROTO); 1790 mutex_exit(&so->so_lock); 1791 zcmn_err(getzoneid(), CE_WARN, 1792 "sockfs: T_unidata_ind with invalid " 1793 "addrlen/offset %u/%d\n", 1794 addrlen, tudi->SRC_offset); 1795 return (NULL); 1796 } 1797 1798 if (so->so_family == AF_INET) { 1799 /* 1800 * For AF_INET we allow wildcarding both sin_addr 1801 * and sin_port. 1802 */ 1803 struct sockaddr_in *faddr, *sin; 1804 1805 /* Prevent so_faddr_sa from changing while accessed */ 1806 mutex_enter(&so->so_lock); 1807 ASSERT(so->so_faddr_len == 1808 (socklen_t)sizeof (struct sockaddr_in)); 1809 faddr = (struct sockaddr_in *)so->so_faddr_sa; 1810 sin = (struct sockaddr_in *)addr; 1811 if (addrlen != 1812 (t_uscalar_t)sizeof (struct sockaddr_in) || 1813 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1814 faddr->sin_addr.s_addr != INADDR_ANY) || 1815 (so->so_type != SOCK_RAW && 1816 sin->sin_port != faddr->sin_port && 1817 faddr->sin_port != 0)) { 1818 #ifdef DEBUG 1819 dprintso(so, 0, 1820 ("sockfs: T_UNITDATA_IND mismatch: %s", 1821 pr_addr(so->so_family, 1822 (struct sockaddr *)addr, 1823 addrlen))); 1824 dprintso(so, 0, (" - %s\n", 1825 pr_addr(so->so_family, so->so_faddr_sa, 1826 (t_uscalar_t)so->so_faddr_len))); 1827 #endif /* DEBUG */ 1828 mutex_exit(&so->so_lock); 1829 freemsg(mp); 1830 return (NULL); 1831 } 1832 mutex_exit(&so->so_lock); 1833 } else if (so->so_family == AF_INET6) { 1834 /* 1835 * For AF_INET6 we allow wildcarding both sin6_addr 1836 * and sin6_port. 1837 */ 1838 struct sockaddr_in6 *faddr6, *sin6; 1839 static struct in6_addr zeroes; /* inits to all zeros */ 1840 1841 /* Prevent so_faddr_sa from changing while accessed */ 1842 mutex_enter(&so->so_lock); 1843 ASSERT(so->so_faddr_len == 1844 (socklen_t)sizeof (struct sockaddr_in6)); 1845 faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa; 1846 sin6 = (struct sockaddr_in6 *)addr; 1847 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1848 if (addrlen != 1849 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1850 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1851 &faddr6->sin6_addr) && 1852 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1853 (so->so_type != SOCK_RAW && 1854 sin6->sin6_port != faddr6->sin6_port && 1855 faddr6->sin6_port != 0)) { 1856 #ifdef DEBUG 1857 dprintso(so, 0, 1858 ("sockfs: T_UNITDATA_IND mismatch: %s", 1859 pr_addr(so->so_family, 1860 (struct sockaddr *)addr, 1861 addrlen))); 1862 dprintso(so, 0, (" - %s\n", 1863 pr_addr(so->so_family, so->so_faddr_sa, 1864 (t_uscalar_t)so->so_faddr_len))); 1865 #endif /* DEBUG */ 1866 mutex_exit(&so->so_lock); 1867 freemsg(mp); 1868 return (NULL); 1869 } 1870 mutex_exit(&so->so_lock); 1871 } else if (so->so_family == AF_UNIX && 1872 msgdsize(mp->b_cont) == 0 && 1873 tudi->OPT_length != 0) { 1874 /* 1875 * Attempt to extract AF_UNIX 1876 * SO_UNIX_CLOSE indication from options. 1877 */ 1878 void *opt; 1879 t_uscalar_t optlen = tudi->OPT_length; 1880 1881 opt = sogetoff(mp, tudi->OPT_offset, 1882 optlen, __TPI_ALIGN_SIZE); 1883 if (opt == NULL) { 1884 /* The len/off falls outside mp */ 1885 freemsg(mp); 1886 mutex_enter(&so->so_lock); 1887 soseterror(so, EPROTO); 1888 mutex_exit(&so->so_lock); 1889 zcmn_err(getzoneid(), CE_WARN, 1890 "sockfs: T_unidata_ind with invalid " 1891 "optlen/offset %u/%d\n", 1892 optlen, tudi->OPT_offset); 1893 return (NULL); 1894 } 1895 /* 1896 * If we received a unix close indication mark the 1897 * socket and discard this message. 1898 */ 1899 if (so_getopt_unix_close(opt, optlen)) { 1900 mutex_enter(&so->so_lock); 1901 sobreakconn(so, ECONNRESET); 1902 mutex_exit(&so->so_lock); 1903 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1904 freemsg(mp); 1905 *pollwakeups = POLLIN | POLLRDNORM; 1906 *allmsgsigs = S_INPUT | S_RDNORM; 1907 *wakeups = RSLEEP; 1908 return (NULL); 1909 } 1910 } 1911 *allmsgsigs = S_INPUT | S_RDNORM; 1912 *pollwakeups = POLLIN | POLLRDNORM; 1913 *wakeups = RSLEEP; 1914 return (mp); 1915 } 1916 1917 case T_OPTDATA_IND: { 1918 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1919 1920 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1921 zcmn_err(getzoneid(), CE_WARN, 1922 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1923 (ptrdiff_t)(MBLKL(mp))); 1924 freemsg(mp); 1925 return (NULL); 1926 } 1927 /* 1928 * Allow zero-length messages carrying options. 1929 * This is used when carrying the SO_UNIX_CLOSE option. 1930 */ 1931 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 1932 tdi->OPT_length != 0) { 1933 /* 1934 * Attempt to extract AF_UNIX close indication 1935 * from the options. Ignore any other options - 1936 * those are handled once the message is removed 1937 * from the queue. 1938 * The close indication message should not carry data. 1939 */ 1940 void *opt; 1941 t_uscalar_t optlen = tdi->OPT_length; 1942 1943 opt = sogetoff(mp, tdi->OPT_offset, 1944 optlen, __TPI_ALIGN_SIZE); 1945 if (opt == NULL) { 1946 /* The len/off falls outside mp */ 1947 freemsg(mp); 1948 mutex_enter(&so->so_lock); 1949 soseterror(so, EPROTO); 1950 mutex_exit(&so->so_lock); 1951 zcmn_err(getzoneid(), CE_WARN, 1952 "sockfs: T_optdata_ind with invalid " 1953 "optlen/offset %u/%d\n", 1954 optlen, tdi->OPT_offset); 1955 return (NULL); 1956 } 1957 /* 1958 * If we received a close indication mark the 1959 * socket and discard this message. 1960 */ 1961 if (so_getopt_unix_close(opt, optlen)) { 1962 mutex_enter(&so->so_lock); 1963 socantsendmore(so); 1964 mutex_exit(&so->so_lock); 1965 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1966 freemsg(mp); 1967 return (NULL); 1968 } 1969 } 1970 *allmsgsigs = S_INPUT | S_RDNORM; 1971 *pollwakeups = POLLIN | POLLRDNORM; 1972 *wakeups = RSLEEP; 1973 return (mp); 1974 } 1975 1976 case T_EXDATA_IND: { 1977 mblk_t *mctl, *mdata; 1978 mblk_t *lbp; 1979 union T_primitives *tprp; 1980 struct stdata *stp; 1981 queue_t *qp; 1982 1983 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 1984 zcmn_err(getzoneid(), CE_WARN, 1985 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 1986 (ptrdiff_t)(MBLKL(mp))); 1987 freemsg(mp); 1988 return (NULL); 1989 } 1990 /* 1991 * Ignore zero-length T_EXDATA_IND messages. These might be 1992 * generated by some transports. 1993 * 1994 * This is needed to prevent read (which skips the M_PROTO 1995 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1996 * on a non-blocking socket after select/poll has indicated 1997 * that data is available). 1998 */ 1999 dprintso(so, 1, 2000 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 2001 vp, so->so_oobsigcnt, so->so_oobcnt, 2002 pr_state(so->so_state, so->so_mode))); 2003 2004 if (msgdsize(mp->b_cont) == 0) { 2005 dprintso(so, 0, 2006 ("strsock_proto: zero length T_EXDATA_IND\n")); 2007 freemsg(mp); 2008 return (NULL); 2009 } 2010 2011 /* 2012 * Split into the T_EXDATA_IND and the M_DATA part. 2013 * We process these three pieces separately: 2014 * signal generation 2015 * handling T_EXDATA_IND 2016 * handling M_DATA component 2017 */ 2018 mctl = mp; 2019 mdata = mctl->b_cont; 2020 mctl->b_cont = NULL; 2021 mutex_enter(&so->so_lock); 2022 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2023 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2024 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2025 2026 stp = vp->v_stream; 2027 ASSERT(stp != NULL); 2028 qp = _RD(stp->sd_wrq); 2029 2030 mutex_enter(QLOCK(qp)); 2031 lbp = qp->q_last; 2032 2033 /* 2034 * We want to avoid queueing up a string of T_EXDATA_IND 2035 * messages with no intervening data messages at the stream 2036 * head. These messages contribute to the total message 2037 * count. Eventually this can lead to STREAMS flow contol 2038 * and also cause TCP to advertise a zero window condition 2039 * to the peer. This can happen in the degenerate case where 2040 * the sender and receiver exchange only OOB data. The sender 2041 * only sends messages with MSG_OOB flag and the receiver 2042 * receives only MSG_OOB messages and does not use SO_OOBINLINE. 2043 * An example of this scenario has been reported in applications 2044 * that use OOB data to exchange heart beats. Flow control 2045 * relief will never happen if the application only reads OOB 2046 * data which is done directly by sorecvoob() and the 2047 * T_EXDATA_IND messages at the streamhead won't be consumed. 2048 * Note that there is no correctness issue in compressing the 2049 * string of T_EXDATA_IND messages into a single T_EXDATA_IND 2050 * message. A single read that does not specify MSG_OOB will 2051 * read across all the marks in a loop in sotpi_recvmsg(). 2052 * Each mark is individually distinguishable only if the 2053 * T_EXDATA_IND messages are separated by data messages. 2054 */ 2055 if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) { 2056 tprp = (union T_primitives *)lbp->b_rptr; 2057 if ((tprp->type == T_EXDATA_IND) && 2058 !(so->so_options & SO_OOBINLINE)) { 2059 2060 /* 2061 * free the new M_PROTO message 2062 */ 2063 freemsg(mctl); 2064 2065 /* 2066 * adjust the OOB count and OOB signal count 2067 * just incremented for the new OOB data. 2068 */ 2069 so->so_oobcnt--; 2070 so->so_oobsigcnt--; 2071 mutex_exit(QLOCK(qp)); 2072 mutex_exit(&so->so_lock); 2073 return (NULL); 2074 } 2075 } 2076 mutex_exit(QLOCK(qp)); 2077 2078 /* 2079 * Pass the T_EXDATA_IND and the M_DATA back separately 2080 * by using b_next linkage. (The stream head will queue any 2081 * b_next linked messages separately.) This is needed 2082 * since MSGMARK applies to the last by of the message 2083 * hence we can not have any M_DATA component attached 2084 * to the marked T_EXDATA_IND. Note that the stream head 2085 * will not consolidate M_DATA messages onto an MSGMARK'ed 2086 * message in order to preserve the constraint that 2087 * the T_EXDATA_IND always is a separate message. 2088 */ 2089 ASSERT(mctl != NULL); 2090 mctl->b_next = mdata; 2091 mp = mctl; 2092 #ifdef DEBUG 2093 if (mdata == NULL) { 2094 dprintso(so, 1, 2095 ("after outofline T_EXDATA_IND(%p): " 2096 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2097 vp, so->so_oobsigcnt, 2098 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2099 pr_state(so->so_state, so->so_mode))); 2100 } else { 2101 dprintso(so, 1, 2102 ("after inline T_EXDATA_IND(%p): " 2103 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2104 vp, so->so_oobsigcnt, 2105 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2106 pr_state(so->so_state, so->so_mode))); 2107 } 2108 #endif /* DEBUG */ 2109 mutex_exit(&so->so_lock); 2110 *wakeups = RSLEEP; 2111 return (mp); 2112 } 2113 2114 case T_CONN_CON: { 2115 struct T_conn_con *conn_con; 2116 void *addr; 2117 t_uscalar_t addrlen; 2118 2119 /* 2120 * Verify the state, update the state to ISCONNECTED, 2121 * record the potentially new address in the message, 2122 * and drop the message. 2123 */ 2124 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2125 zcmn_err(getzoneid(), CE_WARN, 2126 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2127 (ptrdiff_t)(MBLKL(mp))); 2128 freemsg(mp); 2129 return (NULL); 2130 } 2131 2132 mutex_enter(&so->so_lock); 2133 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2134 SS_ISCONNECTING) { 2135 mutex_exit(&so->so_lock); 2136 dprintso(so, 1, 2137 ("T_CONN_CON: state %x\n", so->so_state)); 2138 freemsg(mp); 2139 return (NULL); 2140 } 2141 2142 conn_con = &tpr->conn_con; 2143 addrlen = conn_con->RES_length; 2144 /* 2145 * Allow the address to be of different size than sent down 2146 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2147 * For AF_UNIX require the identical length. 2148 */ 2149 if (so->so_family == AF_UNIX ? 2150 addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) : 2151 addrlen > (t_uscalar_t)so->so_faddr_maxlen) { 2152 zcmn_err(getzoneid(), CE_WARN, 2153 "sockfs: T_conn_con with different " 2154 "length %u/%d\n", 2155 addrlen, conn_con->RES_length); 2156 soisdisconnected(so, EPROTO); 2157 mutex_exit(&so->so_lock); 2158 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2159 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2160 strseteof(SOTOV(so), 1); 2161 freemsg(mp); 2162 /* 2163 * strseteof takes care of read side wakeups, 2164 * pollwakeups, and signals. 2165 */ 2166 *wakeups = WSLEEP; 2167 *allmsgsigs = S_OUTPUT; 2168 *pollwakeups = POLLOUT; 2169 return (NULL); 2170 } 2171 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2172 if (addr == NULL) { 2173 zcmn_err(getzoneid(), CE_WARN, 2174 "sockfs: T_conn_con with invalid " 2175 "addrlen/offset %u/%d\n", 2176 addrlen, conn_con->RES_offset); 2177 mutex_exit(&so->so_lock); 2178 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2179 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2180 strseteof(SOTOV(so), 1); 2181 freemsg(mp); 2182 /* 2183 * strseteof takes care of read side wakeups, 2184 * pollwakeups, and signals. 2185 */ 2186 *wakeups = WSLEEP; 2187 *allmsgsigs = S_OUTPUT; 2188 *pollwakeups = POLLOUT; 2189 return (NULL); 2190 } 2191 2192 /* 2193 * Save for getpeername. 2194 */ 2195 if (so->so_family != AF_UNIX) { 2196 so->so_faddr_len = (socklen_t)addrlen; 2197 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2198 bcopy(addr, so->so_faddr_sa, addrlen); 2199 so->so_state |= SS_FADDR_VALID; 2200 } 2201 2202 if (so->so_peercred != NULL) 2203 crfree(so->so_peercred); 2204 so->so_peercred = DB_CRED(mp); 2205 so->so_cpid = DB_CPID(mp); 2206 if (so->so_peercred != NULL) 2207 crhold(so->so_peercred); 2208 2209 /* Wakeup anybody sleeping in sowaitconnected */ 2210 soisconnected(so); 2211 mutex_exit(&so->so_lock); 2212 2213 /* 2214 * The socket is now available for sending data. 2215 */ 2216 *wakeups = WSLEEP; 2217 *allmsgsigs = S_OUTPUT; 2218 *pollwakeups = POLLOUT; 2219 freemsg(mp); 2220 return (NULL); 2221 } 2222 2223 /* 2224 * Extra processing in case of an SSL proxy, before queuing or 2225 * forwarding to the fallback endpoint 2226 */ 2227 case T_SSL_PROXY_CONN_IND: 2228 case T_CONN_IND: 2229 /* 2230 * Verify the min size and queue the message on 2231 * the so_conn_ind_head/tail list. 2232 */ 2233 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2234 zcmn_err(getzoneid(), CE_WARN, 2235 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2236 (ptrdiff_t)(MBLKL(mp))); 2237 freemsg(mp); 2238 return (NULL); 2239 } 2240 2241 #ifdef C2_AUDIT 2242 if (audit_active) 2243 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2244 #endif /* C2_AUDIT */ 2245 if (!(so->so_state & SS_ACCEPTCONN)) { 2246 zcmn_err(getzoneid(), CE_WARN, 2247 "sockfs: T_conn_ind on non-listening socket\n"); 2248 freemsg(mp); 2249 return (NULL); 2250 } 2251 2252 if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) { 2253 /* No context: need to fall back */ 2254 struct sonode *fbso; 2255 stdata_t *fbstp; 2256 2257 tpr->type = T_CONN_IND; 2258 2259 fbso = kssl_find_fallback(so->so_kssl_ent); 2260 2261 /* 2262 * No fallback: the remote will timeout and 2263 * disconnect. 2264 */ 2265 if (fbso == NULL) { 2266 freemsg(mp); 2267 return (NULL); 2268 } 2269 fbstp = SOTOV(fbso)->v_stream; 2270 qreply(fbstp->sd_wrq->q_next, mp); 2271 return (NULL); 2272 } 2273 soqueueconnind(so, mp); 2274 *allmsgsigs = S_INPUT | S_RDNORM; 2275 *pollwakeups = POLLIN | POLLRDNORM; 2276 *wakeups = RSLEEP; 2277 return (NULL); 2278 2279 case T_ORDREL_IND: 2280 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2281 zcmn_err(getzoneid(), CE_WARN, 2282 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2283 (ptrdiff_t)(MBLKL(mp))); 2284 freemsg(mp); 2285 return (NULL); 2286 } 2287 2288 /* 2289 * Some providers send this when not fully connected. 2290 * SunLink X.25 needs to retrieve disconnect reason after 2291 * disconnect for compatibility. It uses T_ORDREL_IND 2292 * instead of T_DISCON_IND so that it may use the 2293 * endpoint after a connect failure to retrieve the 2294 * reason using an ioctl. Thus we explicitly clear 2295 * SS_ISCONNECTING here for SunLink X.25. 2296 * This is a needed TPI violation. 2297 */ 2298 mutex_enter(&so->so_lock); 2299 so->so_state &= ~SS_ISCONNECTING; 2300 socantrcvmore(so); 2301 mutex_exit(&so->so_lock); 2302 strseteof(SOTOV(so), 1); 2303 /* 2304 * strseteof takes care of read side wakeups, 2305 * pollwakeups, and signals. 2306 */ 2307 freemsg(mp); 2308 return (NULL); 2309 2310 case T_DISCON_IND: 2311 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2312 zcmn_err(getzoneid(), CE_WARN, 2313 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2314 (ptrdiff_t)(MBLKL(mp))); 2315 freemsg(mp); 2316 return (NULL); 2317 } 2318 if (so->so_state & SS_ACCEPTCONN) { 2319 /* 2320 * This is a listener. Look for a queued T_CONN_IND 2321 * with a matching sequence number and remove it 2322 * from the list. 2323 * It is normal to not find the sequence number since 2324 * the soaccept might have already dequeued it 2325 * (in which case the T_CONN_RES will fail with 2326 * TBADSEQ). 2327 */ 2328 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2329 freemsg(mp); 2330 return (0); 2331 } 2332 2333 /* 2334 * Not a listener 2335 * 2336 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2337 * Such a discon_ind appears when the peer has first done 2338 * a shutdown() followed by a close() in which case we just 2339 * want to record socantsendmore. 2340 * In this case sockfs first receives a T_ORDREL_IND followed 2341 * by a T_DISCON_IND. 2342 * Note that for other transports (e.g. TCP) we need to handle 2343 * the discon_ind in this case since it signals an error. 2344 */ 2345 mutex_enter(&so->so_lock); 2346 if ((so->so_state & SS_CANTRCVMORE) && 2347 (so->so_family == AF_UNIX)) { 2348 socantsendmore(so); 2349 mutex_exit(&so->so_lock); 2350 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2351 dprintso(so, 1, 2352 ("T_DISCON_IND: error %d\n", so->so_error)); 2353 freemsg(mp); 2354 /* 2355 * Set these variables for caller to process them. 2356 * For the else part where T_DISCON_IND is processed, 2357 * this will be done in the function being called 2358 * (strsock_discon_ind()) 2359 */ 2360 *wakeups = WSLEEP; 2361 *allmsgsigs = S_OUTPUT; 2362 *pollwakeups = POLLOUT; 2363 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2364 /* 2365 * Deferred processing of T_DISCON_IND 2366 */ 2367 so_save_discon_ind(so, mp, strsock_discon_ind); 2368 mutex_exit(&so->so_lock); 2369 } else { 2370 /* 2371 * Process T_DISCON_IND now 2372 */ 2373 (void) strsock_discon_ind(so, mp); 2374 mutex_exit(&so->so_lock); 2375 } 2376 return (NULL); 2377 2378 case T_UDERROR_IND: { 2379 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2380 void *addr; 2381 t_uscalar_t addrlen; 2382 int error; 2383 2384 dprintso(so, 0, 2385 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2386 2387 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2388 zcmn_err(getzoneid(), CE_WARN, 2389 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2390 (ptrdiff_t)(MBLKL(mp))); 2391 freemsg(mp); 2392 return (NULL); 2393 } 2394 /* Ignore on connection-oriented transports */ 2395 if (so->so_mode & SM_CONNREQUIRED) { 2396 freemsg(mp); 2397 eprintsoline(so, 0); 2398 zcmn_err(getzoneid(), CE_WARN, 2399 "sockfs: T_uderror_ind on connection-oriented " 2400 "transport\n"); 2401 return (NULL); 2402 } 2403 addrlen = tudi->DEST_length; 2404 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2405 if (addr == NULL) { 2406 zcmn_err(getzoneid(), CE_WARN, 2407 "sockfs: T_uderror_ind with invalid " 2408 "addrlen/offset %u/%d\n", 2409 addrlen, tudi->DEST_offset); 2410 freemsg(mp); 2411 return (NULL); 2412 } 2413 2414 /* Verify source address for connected socket. */ 2415 mutex_enter(&so->so_lock); 2416 if (so->so_state & SS_ISCONNECTED) { 2417 void *faddr; 2418 t_uscalar_t faddr_len; 2419 boolean_t match = B_FALSE; 2420 2421 switch (so->so_family) { 2422 case AF_INET: { 2423 /* Compare just IP address and port */ 2424 struct sockaddr_in *sin1, *sin2; 2425 2426 sin1 = (struct sockaddr_in *)so->so_faddr_sa; 2427 sin2 = (struct sockaddr_in *)addr; 2428 if (addrlen == sizeof (struct sockaddr_in) && 2429 sin1->sin_port == sin2->sin_port && 2430 sin1->sin_addr.s_addr == 2431 sin2->sin_addr.s_addr) 2432 match = B_TRUE; 2433 break; 2434 } 2435 case AF_INET6: { 2436 /* Compare just IP address and port. Not flow */ 2437 struct sockaddr_in6 *sin1, *sin2; 2438 2439 sin1 = (struct sockaddr_in6 *)so->so_faddr_sa; 2440 sin2 = (struct sockaddr_in6 *)addr; 2441 if (addrlen == sizeof (struct sockaddr_in6) && 2442 sin1->sin6_port == sin2->sin6_port && 2443 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2444 &sin2->sin6_addr)) 2445 match = B_TRUE; 2446 break; 2447 } 2448 case AF_UNIX: 2449 faddr = &so->so_ux_faddr; 2450 faddr_len = 2451 (t_uscalar_t)sizeof (so->so_ux_faddr); 2452 if (faddr_len == addrlen && 2453 bcmp(addr, faddr, addrlen) == 0) 2454 match = B_TRUE; 2455 break; 2456 default: 2457 faddr = so->so_faddr_sa; 2458 faddr_len = (t_uscalar_t)so->so_faddr_len; 2459 if (faddr_len == addrlen && 2460 bcmp(addr, faddr, addrlen) == 0) 2461 match = B_TRUE; 2462 break; 2463 } 2464 2465 if (!match) { 2466 #ifdef DEBUG 2467 dprintso(so, 0, 2468 ("sockfs: T_UDERR_IND mismatch: %s - ", 2469 pr_addr(so->so_family, 2470 (struct sockaddr *)addr, 2471 addrlen))); 2472 dprintso(so, 0, ("%s\n", 2473 pr_addr(so->so_family, so->so_faddr_sa, 2474 so->so_faddr_len))); 2475 #endif /* DEBUG */ 2476 mutex_exit(&so->so_lock); 2477 freemsg(mp); 2478 return (NULL); 2479 } 2480 /* 2481 * Make the write error nonpersistent. If the error 2482 * is zero we use ECONNRESET. 2483 * This assumes that the name space for ERROR_type 2484 * is the errno name space. 2485 */ 2486 if (tudi->ERROR_type != 0) 2487 error = tudi->ERROR_type; 2488 else 2489 error = ECONNRESET; 2490 2491 soseterror(so, error); 2492 mutex_exit(&so->so_lock); 2493 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2494 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2495 *wakeups = RSLEEP | WSLEEP; 2496 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2497 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2498 freemsg(mp); 2499 return (NULL); 2500 } 2501 /* 2502 * If the application asked for delayed errors 2503 * record the T_UDERROR_IND so_eaddr_mp and the reason in 2504 * so_delayed_error for delayed error posting. If the reason 2505 * is zero use ECONNRESET. 2506 * Note that delayed error indications do not make sense for 2507 * AF_UNIX sockets since sendto checks that the destination 2508 * address is valid at the time of the sendto. 2509 */ 2510 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2511 mutex_exit(&so->so_lock); 2512 freemsg(mp); 2513 return (NULL); 2514 } 2515 if (so->so_eaddr_mp != NULL) 2516 freemsg(so->so_eaddr_mp); 2517 2518 so->so_eaddr_mp = mp; 2519 if (tudi->ERROR_type != 0) 2520 error = tudi->ERROR_type; 2521 else 2522 error = ECONNRESET; 2523 so->so_delayed_error = (ushort_t)error; 2524 mutex_exit(&so->so_lock); 2525 return (NULL); 2526 } 2527 2528 case T_ERROR_ACK: 2529 dprintso(so, 0, 2530 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2531 tpr->error_ack.ERROR_prim, 2532 tpr->error_ack.TLI_error, 2533 tpr->error_ack.UNIX_error)); 2534 2535 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2536 zcmn_err(getzoneid(), CE_WARN, 2537 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2538 (ptrdiff_t)(MBLKL(mp))); 2539 freemsg(mp); 2540 return (NULL); 2541 } 2542 /* 2543 * Check if we were waiting for the async message 2544 */ 2545 mutex_enter(&so->so_lock); 2546 if ((so->so_flag & SOASYNC_UNBIND) && 2547 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2548 so_unlock_single(so, SOASYNC_UNBIND); 2549 mutex_exit(&so->so_lock); 2550 freemsg(mp); 2551 return (NULL); 2552 } 2553 mutex_exit(&so->so_lock); 2554 soqueueack(so, mp); 2555 return (NULL); 2556 2557 case T_OK_ACK: 2558 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2559 zcmn_err(getzoneid(), CE_WARN, 2560 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2561 (ptrdiff_t)(MBLKL(mp))); 2562 freemsg(mp); 2563 return (NULL); 2564 } 2565 /* 2566 * Check if we were waiting for the async message 2567 */ 2568 mutex_enter(&so->so_lock); 2569 if ((so->so_flag & SOASYNC_UNBIND) && 2570 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2571 dprintso(so, 1, 2572 ("strsock_proto: T_OK_ACK async unbind\n")); 2573 so_unlock_single(so, SOASYNC_UNBIND); 2574 mutex_exit(&so->so_lock); 2575 freemsg(mp); 2576 return (NULL); 2577 } 2578 mutex_exit(&so->so_lock); 2579 soqueueack(so, mp); 2580 return (NULL); 2581 2582 case T_INFO_ACK: 2583 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2584 zcmn_err(getzoneid(), CE_WARN, 2585 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2586 (ptrdiff_t)(MBLKL(mp))); 2587 freemsg(mp); 2588 return (NULL); 2589 } 2590 soqueueack(so, mp); 2591 return (NULL); 2592 2593 case T_CAPABILITY_ACK: 2594 /* 2595 * A T_capability_ack need only be large enough to hold 2596 * the PRIM_type and CAP_bits1 fields; checking for anything 2597 * larger might reject a correct response from an older 2598 * provider. 2599 */ 2600 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2601 zcmn_err(getzoneid(), CE_WARN, 2602 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2603 (ptrdiff_t)(MBLKL(mp))); 2604 freemsg(mp); 2605 return (NULL); 2606 } 2607 soqueueack(so, mp); 2608 return (NULL); 2609 2610 case T_BIND_ACK: 2611 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2612 zcmn_err(getzoneid(), CE_WARN, 2613 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2614 (ptrdiff_t)(MBLKL(mp))); 2615 freemsg(mp); 2616 return (NULL); 2617 } 2618 soqueueack(so, mp); 2619 return (NULL); 2620 2621 case T_OPTMGMT_ACK: 2622 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2623 zcmn_err(getzoneid(), CE_WARN, 2624 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2625 (ptrdiff_t)(MBLKL(mp))); 2626 freemsg(mp); 2627 return (NULL); 2628 } 2629 soqueueack(so, mp); 2630 return (NULL); 2631 default: 2632 #ifdef DEBUG 2633 zcmn_err(getzoneid(), CE_WARN, 2634 "sockfs: unknown TPI primitive %d received\n", 2635 tpr->type); 2636 #endif /* DEBUG */ 2637 freemsg(mp); 2638 return (NULL); 2639 } 2640 } 2641 2642 /* 2643 * This routine is registered with the stream head to receive other 2644 * (non-data, and non-proto) messages. 2645 * 2646 * Returns NULL if the message was consumed. 2647 * Returns an mblk to make that mblk be processed by the stream head. 2648 * 2649 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2650 * *pollwakeups) for the stream head to take action on. 2651 */ 2652 static mblk_t * 2653 strsock_misc(vnode_t *vp, mblk_t *mp, 2654 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2655 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2656 { 2657 struct sonode *so; 2658 2659 so = VTOSO(vp); 2660 2661 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2662 vp, mp, DB_TYPE(mp))); 2663 2664 /* Set default return values */ 2665 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2666 2667 switch (DB_TYPE(mp)) { 2668 case M_PCSIG: 2669 /* 2670 * This assumes that an M_PCSIG for the urgent data arrives 2671 * before the corresponding T_EXDATA_IND. 2672 * 2673 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2674 * awoken before the urgent data shows up. 2675 * For OOBINLINE this can result in select returning 2676 * only exceptions as opposed to except|read. 2677 */ 2678 if (*mp->b_rptr == SIGURG) { 2679 mutex_enter(&so->so_lock); 2680 dprintso(so, 1, 2681 ("SIGURG(%p): counts %d/%d state %s\n", 2682 vp, so->so_oobsigcnt, 2683 so->so_oobcnt, 2684 pr_state(so->so_state, so->so_mode))); 2685 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2686 dprintso(so, 1, 2687 ("after SIGURG(%p): counts %d/%d " 2688 " poll 0x%x sig 0x%x state %s\n", 2689 vp, so->so_oobsigcnt, 2690 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2691 pr_state(so->so_state, so->so_mode))); 2692 mutex_exit(&so->so_lock); 2693 } 2694 freemsg(mp); 2695 return (NULL); 2696 2697 case M_SIG: 2698 case M_HANGUP: 2699 case M_UNHANGUP: 2700 case M_ERROR: 2701 /* M_ERRORs etc are ignored */ 2702 freemsg(mp); 2703 return (NULL); 2704 2705 case M_FLUSH: 2706 /* 2707 * Do not flush read queue. If the M_FLUSH 2708 * arrives because of an impending T_discon_ind 2709 * we still have to keep any queued data - this is part of 2710 * socket semantics. 2711 */ 2712 if (*mp->b_rptr & FLUSHW) { 2713 *mp->b_rptr &= ~FLUSHR; 2714 return (mp); 2715 } 2716 freemsg(mp); 2717 return (NULL); 2718 2719 default: 2720 return (mp); 2721 } 2722 } 2723 2724 2725 /* Register to receive signals for certain events */ 2726 int 2727 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2728 { 2729 struct strsigset ss; 2730 int32_t rval; 2731 2732 /* 2733 * Note that SOLOCKED will be set except for the call from soaccept(). 2734 */ 2735 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2736 ss.ss_pid = pgrp; 2737 ss.ss_events = events; 2738 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2739 &rval)); 2740 } 2741 2742 2743 /* Register for events matching the SS_ASYNC flag */ 2744 int 2745 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2746 { 2747 int events = so->so_state & SS_ASYNC ? 2748 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2749 S_RDBAND | S_BANDURG; 2750 2751 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2752 } 2753 2754 2755 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2756 int 2757 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2758 { 2759 ASSERT(mutex_owned(&so->so_lock)); 2760 if (so->so_pgrp != 0) { 2761 int error; 2762 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2763 S_RDBAND | S_BANDURG : /* New sigs */ 2764 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2765 2766 so_lock_single(so); 2767 mutex_exit(&so->so_lock); 2768 2769 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2770 2771 mutex_enter(&so->so_lock); 2772 so_unlock_single(so, SOLOCKED); 2773 if (error) 2774 return (error); 2775 } 2776 so->so_state ^= SS_ASYNC; 2777 return (0); 2778 } 2779 2780 /* 2781 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2782 * any existing one. If passed zero, just clear the existing one. 2783 */ 2784 int 2785 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2786 { 2787 int events = so->so_state & SS_ASYNC ? 2788 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2789 S_RDBAND | S_BANDURG; 2790 int error; 2791 2792 ASSERT(mutex_owned(&so->so_lock)); 2793 2794 /* 2795 * Change socket process (group). 2796 * 2797 * strioctl (via so_set_asyncsigs) will perform permission check and 2798 * also keep a PID_HOLD to prevent the pid from being reused. 2799 */ 2800 so_lock_single(so); 2801 mutex_exit(&so->so_lock); 2802 2803 if (pgrp != 0) { 2804 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2805 pgrp, events)); 2806 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2807 if (error != 0) { 2808 eprintsoline(so, error); 2809 goto bad; 2810 } 2811 } 2812 /* Remove the previously registered process/group */ 2813 if (so->so_pgrp != 0) { 2814 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2815 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2816 if (error != 0) { 2817 eprintsoline(so, error); 2818 error = 0; 2819 } 2820 } 2821 mutex_enter(&so->so_lock); 2822 so_unlock_single(so, SOLOCKED); 2823 so->so_pgrp = pgrp; 2824 return (0); 2825 bad: 2826 mutex_enter(&so->so_lock); 2827 so_unlock_single(so, SOLOCKED); 2828 return (error); 2829 } 2830 2831 2832 2833 /* 2834 * Translate a TLI(/XTI) error into a system error as best we can. 2835 */ 2836 static const int tli_errs[] = { 2837 0, /* no error */ 2838 EADDRNOTAVAIL, /* TBADADDR */ 2839 ENOPROTOOPT, /* TBADOPT */ 2840 EACCES, /* TACCES */ 2841 EBADF, /* TBADF */ 2842 EADDRNOTAVAIL, /* TNOADDR */ 2843 EPROTO, /* TOUTSTATE */ 2844 ECONNABORTED, /* TBADSEQ */ 2845 0, /* TSYSERR - will never get */ 2846 EPROTO, /* TLOOK - should never be sent by transport */ 2847 EMSGSIZE, /* TBADDATA */ 2848 EMSGSIZE, /* TBUFOVFLW */ 2849 EPROTO, /* TFLOW */ 2850 EWOULDBLOCK, /* TNODATA */ 2851 EPROTO, /* TNODIS */ 2852 EPROTO, /* TNOUDERR */ 2853 EINVAL, /* TBADFLAG */ 2854 EPROTO, /* TNOREL */ 2855 EOPNOTSUPP, /* TNOTSUPPORT */ 2856 EPROTO, /* TSTATECHNG */ 2857 /* following represent error namespace expansion with XTI */ 2858 EPROTO, /* TNOSTRUCTYPE - never sent by transport */ 2859 EPROTO, /* TBADNAME - never sent by transport */ 2860 EPROTO, /* TBADQLEN - never sent by transport */ 2861 EADDRINUSE, /* TADDRBUSY */ 2862 EBADF, /* TINDOUT */ 2863 EBADF, /* TPROVMISMATCH */ 2864 EBADF, /* TRESQLEN */ 2865 EBADF, /* TRESADDR */ 2866 EPROTO, /* TQFULL - never sent by transport */ 2867 EPROTO, /* TPROTO */ 2868 }; 2869 2870 static int 2871 tlitosyserr(int terr) 2872 { 2873 ASSERT(terr != TSYSERR); 2874 if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0]))) 2875 return (EPROTO); 2876 else 2877 return (tli_errs[terr]); 2878 } 2879