1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/inttypes.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/buf.h> 33 #include <sys/conf.h> 34 #include <sys/cred.h> 35 #include <sys/kmem.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/user.h> 44 #include <sys/stream.h> 45 #include <sys/strsubr.h> 46 #include <sys/esunddi.h> 47 #include <sys/flock.h> 48 #include <sys/modctl.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/cmn_err.h> 52 #include <sys/proc.h> 53 #include <sys/ddi.h> 54 55 #include <sys/suntpi.h> 56 #include <sys/socket.h> 57 #include <sys/sockio.h> 58 #include <sys/socketvar.h> 59 #include <sys/sodirect.h> 60 #include <netinet/in.h> 61 #include <inet/common.h> 62 #include <inet/proto_set.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 68 #include <inet/kssl/ksslapi.h> 69 70 #include <c2/audit.h> 71 72 #include <fs/sockfs/socktpi.h> 73 #include <fs/sockfs/socktpi_impl.h> 74 #include <sys/dcopy.h> 75 76 int so_default_version = SOV_SOCKSTREAM; 77 78 #ifdef DEBUG 79 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 80 int sockdebug = 0; 81 82 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 83 int sockprinterr = 0; 84 85 /* 86 * Set so_default_options to SO_DEBUG is all sockets should be created 87 * with SO_DEBUG set. This is needed to get debug printouts from the 88 * socket() call itself. 89 */ 90 int so_default_options = 0; 91 #endif /* DEBUG */ 92 93 #ifdef SOCK_TEST 94 /* 95 * Set to number of ticks to limit cv_waits for code coverage testing. 96 * Set to 1000 when SO_DEBUG is set to 2. 97 */ 98 clock_t sock_test_timelimit = 0; 99 #endif /* SOCK_TEST */ 100 101 /* 102 * For concurrency testing of e.g. opening /dev/ip which does not 103 * handle T_INFO_REQ messages. 104 */ 105 int so_no_tinfo = 0; 106 107 /* 108 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 109 * to simply ignore the T_CAPABILITY_REQ. 110 */ 111 clock_t sock_capability_timeout = 2; /* seconds */ 112 113 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 114 static void so_removehooks(struct sonode *so); 115 116 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 117 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 118 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 119 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 120 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 121 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 122 /* 123 * STREAMS based sodirect put/wakeup functions. 124 */ 125 static int sodput(sodirect_t *, mblk_t *); 126 static void sodwakeup(sodirect_t *); 127 128 /* 129 * Called by sockinit() when sockfs is loaded. 130 */ 131 int 132 sostr_init() 133 { 134 sod_init(); 135 return (0); 136 } 137 138 /* 139 * Convert a socket to a stream. Invoked when the illusory sockmod 140 * is popped from the stream. 141 * Change the stream head back to default operation without losing 142 * any messages (T_conn_ind's are moved to the stream head queue). 143 */ 144 int 145 so_sock2stream(struct sonode *so) 146 { 147 struct vnode *vp = SOTOV(so); 148 queue_t *rq; 149 mblk_t *mp; 150 int error = 0; 151 sotpi_info_t *sti = SOTOTPI(so); 152 153 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 154 155 mutex_enter(&so->so_lock); 156 so_lock_single(so); 157 158 ASSERT(so->so_version != SOV_STREAM); 159 160 if (sti->sti_direct) { 161 mblk_t **mpp; 162 int rval; 163 164 /* 165 * Tell the transport below that sockmod is being popped 166 */ 167 mutex_exit(&so->so_lock); 168 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(), 169 &rval); 170 mutex_enter(&so->so_lock); 171 if (error != 0) { 172 dprintso(so, 0, ("so_sock2stream(%p): " 173 "_SIOCSOCKFALLBACK failed\n", (void *)so)); 174 goto exit; 175 } 176 sti->sti_direct = 0; 177 178 for (mpp = &sti->sti_conn_ind_head; (mp = *mpp) != NULL; 179 mpp = &mp->b_next) { 180 struct T_conn_ind *conn_ind; 181 182 /* 183 * strsock_proto() has already verified the length of 184 * this message block. 185 */ 186 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 187 188 conn_ind = (struct T_conn_ind *)mp->b_rptr; 189 if (conn_ind->OPT_length == 0 && 190 conn_ind->OPT_offset == 0) 191 continue; 192 193 if (DB_REF(mp) > 1) { 194 mblk_t *newmp; 195 size_t length; 196 cred_t *cr; 197 198 /* 199 * Copy the message block because it is used 200 * elsewhere, too. 201 */ 202 length = MBLKL(mp); 203 newmp = soallocproto(length, _ALLOC_INTR); 204 if (newmp == NULL) { 205 error = EINTR; 206 goto exit; 207 } 208 bcopy(mp->b_rptr, newmp->b_wptr, length); 209 newmp->b_wptr += length; 210 newmp->b_next = mp->b_next; 211 cr = DB_CRED(mp); 212 if (cr != NULL) 213 mblk_setcred(newmp, cr); 214 DB_CPID(newmp) = DB_CPID(mp); 215 216 /* 217 * Link the new message block into the queue 218 * and free the old one. 219 */ 220 *mpp = newmp; 221 mp->b_next = NULL; 222 freemsg(mp); 223 224 mp = newmp; 225 conn_ind = (struct T_conn_ind *)mp->b_rptr; 226 } 227 228 /* 229 * Remove options added by TCP for accept fast-path. 230 */ 231 conn_ind->OPT_length = 0; 232 conn_ind->OPT_offset = 0; 233 } 234 } 235 236 so->so_version = SOV_STREAM; 237 so->so_proto_handle = NULL; 238 239 /* 240 * Remove the hooks in the stream head to avoid queuing more 241 * packets in sockfs. 242 */ 243 mutex_exit(&so->so_lock); 244 so_removehooks(so); 245 mutex_enter(&so->so_lock); 246 247 /* 248 * Clear any state related to urgent data. Leave any T_EXDATA_IND 249 * on the queue - the behavior of urgent data after a switch is 250 * left undefined. 251 */ 252 so->so_error = sti->sti_delayed_error = 0; 253 freemsg(so->so_oobmsg); 254 so->so_oobmsg = NULL; 255 sti->sti_oobsigcnt = sti->sti_oobcnt = 0; 256 257 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 258 SS_SAVEDEOR); 259 ASSERT(so_verify_oobstate(so)); 260 261 freemsg(sti->sti_ack_mp); 262 sti->sti_ack_mp = NULL; 263 264 /* 265 * Flush the T_DISCON_IND on sti_discon_ind_mp. 266 */ 267 so_flush_discon_ind(so); 268 269 /* 270 * Move any queued T_CONN_IND messages to stream head queue. 271 */ 272 rq = RD(strvp2wq(vp)); 273 while ((mp = sti->sti_conn_ind_head) != NULL) { 274 sti->sti_conn_ind_head = mp->b_next; 275 mp->b_next = NULL; 276 if (sti->sti_conn_ind_head == NULL) { 277 ASSERT(sti->sti_conn_ind_tail == mp); 278 sti->sti_conn_ind_tail = NULL; 279 } 280 dprintso(so, 0, 281 ("so_sock2stream(%p): moving T_CONN_IND\n", (void *)so)); 282 283 /* Drop lock across put() */ 284 mutex_exit(&so->so_lock); 285 put(rq, mp); 286 mutex_enter(&so->so_lock); 287 } 288 289 exit: 290 ASSERT(MUTEX_HELD(&so->so_lock)); 291 so_unlock_single(so, SOLOCKED); 292 mutex_exit(&so->so_lock); 293 return (error); 294 } 295 296 /* 297 * Covert a stream back to a socket. This is invoked when the illusory 298 * sockmod is pushed on a stream (where the stream was "created" by 299 * popping the illusory sockmod). 300 * This routine can not recreate the socket state (certain aspects of 301 * it like urgent data state and the bound/connected addresses for AF_UNIX 302 * sockets can not be recreated by asking the transport for information). 303 * Thus this routine implicitly assumes that the socket is in an initial 304 * state (as if it was just created). It flushes any messages queued on the 305 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 306 */ 307 void 308 so_stream2sock(struct sonode *so) 309 { 310 struct vnode *vp = SOTOV(so); 311 sotpi_info_t *sti = SOTOTPI(so); 312 313 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 314 315 mutex_enter(&so->so_lock); 316 so_lock_single(so); 317 ASSERT(so->so_version == SOV_STREAM); 318 so->so_version = SOV_SOCKSTREAM; 319 sti->sti_pushcnt = 0; 320 mutex_exit(&so->so_lock); 321 322 /* 323 * Set a permenent error to force any thread in sorecvmsg to 324 * return (and drop SOREADLOCKED). Clear the error once 325 * we have SOREADLOCKED. 326 * This makes a read sleeping during the I_PUSH of sockmod return 327 * EIO. 328 */ 329 strsetrerror(SOTOV(so), EIO, 1, NULL); 330 331 /* 332 * Get the read lock before flushing data to avoid 333 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 334 */ 335 mutex_enter(&so->so_lock); 336 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 337 mutex_exit(&so->so_lock); 338 339 strsetrerror(SOTOV(so), 0, 0, NULL); 340 so_installhooks(so); 341 342 /* 343 * Flush everything on the read queue. 344 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 345 * remain; those types of messages would confuse sockfs. 346 */ 347 strflushrq(vp, FLUSHALL); 348 mutex_enter(&so->so_lock); 349 350 /* 351 * Flush the T_DISCON_IND on sti_discon_ind_mp. 352 */ 353 so_flush_discon_ind(so); 354 so_unlock_read(so); /* Clear SOREADLOCKED */ 355 356 so_unlock_single(so, SOLOCKED); 357 mutex_exit(&so->so_lock); 358 } 359 360 /* 361 * Install the hooks in the stream head. 362 */ 363 void 364 so_installhooks(struct sonode *so) 365 { 366 struct vnode *vp = SOTOV(so); 367 368 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 369 strsock_proto, strsock_misc); 370 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 371 } 372 373 /* 374 * Remove the hooks in the stream head. 375 */ 376 static void 377 so_removehooks(struct sonode *so) 378 { 379 struct vnode *vp = SOTOV(so); 380 381 strsetrputhooks(vp, 0, NULL, NULL); 382 strsetwputhooks(vp, 0, STRTIMOUT); 383 /* 384 * Leave read behavior as it would have been for a normal 385 * stream i.e. a read of an M_PROTO will fail. 386 */ 387 } 388 389 void 390 so_basic_strinit(struct sonode *so) 391 { 392 struct vnode *vp = SOTOV(so); 393 struct stdata *stp; 394 mblk_t *mp; 395 sotpi_info_t *sti = SOTOTPI(so); 396 397 /* Preallocate an unbind_req message */ 398 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 399 mutex_enter(&so->so_lock); 400 sti->sti_unbind_mp = mp; 401 #ifdef DEBUG 402 so->so_options = so_default_options; 403 #endif /* DEBUG */ 404 mutex_exit(&so->so_lock); 405 406 so_installhooks(so); 407 408 stp = vp->v_stream; 409 /* 410 * Have to keep minpsz at zero in order to allow write/send of zero 411 * bytes. 412 */ 413 mutex_enter(&stp->sd_lock); 414 if (stp->sd_qn_minpsz == 1) 415 stp->sd_qn_minpsz = 0; 416 mutex_exit(&stp->sd_lock); 417 418 /* 419 * If sodirect capable allocate and initialize sodirect_t. 420 * Note, SS_SODIRECT is set in socktpi_open(). 421 */ 422 if ((so->so_state & SS_SODIRECT) && 423 !(so->so_state & SS_FALLBACK_PENDING)) { 424 sod_sock_init(so, stp, sodput, sodwakeup, &stp->sd_lock); 425 } 426 } 427 428 /* 429 * Initialize the streams side of a socket including 430 * T_info_req/ack processing. If tso is not NULL its values are used thereby 431 * avoiding the T_INFO_REQ. 432 */ 433 int 434 so_strinit(struct sonode *so, struct sonode *tso) 435 { 436 sotpi_info_t *sti = SOTOTPI(so); 437 sotpi_info_t *tsti; 438 int error; 439 440 so_basic_strinit(so); 441 442 /* 443 * The T_CAPABILITY_REQ should be the first message sent down because 444 * at least TCP has a fast-path for this which avoids timeouts while 445 * waiting for the T_CAPABILITY_ACK under high system load. 446 */ 447 if (tso == NULL) { 448 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 449 if (error) 450 return (error); 451 } else { 452 tsti = SOTOTPI(tso); 453 454 mutex_enter(&so->so_lock); 455 sti->sti_tsdu_size = tsti->sti_tsdu_size; 456 sti->sti_etsdu_size = tsti->sti_etsdu_size; 457 sti->sti_addr_size = tsti->sti_addr_size; 458 sti->sti_opt_size = tsti->sti_opt_size; 459 sti->sti_tidu_size = tsti->sti_tidu_size; 460 sti->sti_serv_type = tsti->sti_serv_type; 461 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 462 mutex_exit(&so->so_lock); 463 464 /* the following do_tcapability may update so->so_mode */ 465 if ((tsti->sti_serv_type != T_CLTS) && 466 (sti->sti_direct == 0)) { 467 error = do_tcapability(so, TC1_ACCEPTOR_ID); 468 if (error) 469 return (error); 470 } 471 } 472 /* 473 * If the addr_size is 0 we treat it as already bound 474 * and connected. This is used by the routing socket. 475 * We set the addr_size to something to allocate a the address 476 * structures. 477 */ 478 if (sti->sti_addr_size == 0) { 479 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 480 /* Address size can vary with address families. */ 481 if (so->so_family == AF_INET6) 482 sti->sti_addr_size = 483 (t_scalar_t)sizeof (struct sockaddr_in6); 484 else 485 sti->sti_addr_size = 486 (t_scalar_t)sizeof (struct sockaddr_in); 487 ASSERT(sti->sti_unbind_mp); 488 } 489 490 so_alloc_addr(so, sti->sti_addr_size); 491 492 return (0); 493 } 494 495 static void 496 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 497 { 498 sotpi_info_t *sti = SOTOTPI(so); 499 500 sti->sti_tsdu_size = tia->TSDU_size; 501 sti->sti_etsdu_size = tia->ETSDU_size; 502 sti->sti_addr_size = tia->ADDR_size; 503 sti->sti_opt_size = tia->OPT_size; 504 sti->sti_tidu_size = tia->TIDU_size; 505 sti->sti_serv_type = tia->SERV_type; 506 switch (tia->CURRENT_state) { 507 case TS_UNBND: 508 break; 509 case TS_IDLE: 510 so->so_state |= SS_ISBOUND; 511 sti->sti_laddr_len = 0; 512 sti->sti_laddr_valid = 0; 513 break; 514 case TS_DATA_XFER: 515 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 516 sti->sti_laddr_len = 0; 517 sti->sti_faddr_len = 0; 518 sti->sti_laddr_valid = 0; 519 sti->sti_faddr_valid = 0; 520 break; 521 } 522 523 /* 524 * Heuristics for determining the socket mode flags 525 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 526 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 527 * from the info ack. 528 */ 529 if (sti->sti_serv_type == T_CLTS) { 530 so->so_mode |= SM_ATOMIC | SM_ADDR; 531 } else { 532 so->so_mode |= SM_CONNREQUIRED; 533 if (sti->sti_etsdu_size != 0 && sti->sti_etsdu_size != -2) 534 so->so_mode |= SM_EXDATA; 535 } 536 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 537 /* Semantics are to discard tail end of messages */ 538 so->so_mode |= SM_ATOMIC; 539 } 540 if (so->so_family == AF_UNIX) { 541 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 542 if (sti->sti_addr_size == -1) { 543 /* MAXPATHLEN + soun_family + nul termination */ 544 sti->sti_addr_size = (t_scalar_t)(MAXPATHLEN + 545 sizeof (short) + 1); 546 } 547 if (so->so_type == SOCK_STREAM) { 548 /* 549 * Make it into a byte-stream transport. 550 * SOCK_SEQPACKET sockets are unchanged. 551 */ 552 sti->sti_tsdu_size = 0; 553 } 554 } else if (sti->sti_addr_size == -1) { 555 /* 556 * Logic extracted from sockmod - have to pick some max address 557 * length in order to preallocate the addresses. 558 */ 559 sti->sti_addr_size = SOA_DEFSIZE; 560 } 561 if (sti->sti_tsdu_size == 0) 562 so->so_mode |= SM_BYTESTREAM; 563 } 564 565 static int 566 check_tinfo(struct sonode *so) 567 { 568 sotpi_info_t *sti = SOTOTPI(so); 569 570 /* Consistency checks */ 571 if (so->so_type == SOCK_DGRAM && sti->sti_serv_type != T_CLTS) { 572 eprintso(so, ("service type and socket type mismatch\n")); 573 eprintsoline(so, EPROTO); 574 return (EPROTO); 575 } 576 if (so->so_type == SOCK_STREAM && sti->sti_serv_type == T_CLTS) { 577 eprintso(so, ("service type and socket type mismatch\n")); 578 eprintsoline(so, EPROTO); 579 return (EPROTO); 580 } 581 if (so->so_type == SOCK_SEQPACKET && sti->sti_serv_type == T_CLTS) { 582 eprintso(so, ("service type and socket type mismatch\n")); 583 eprintsoline(so, EPROTO); 584 return (EPROTO); 585 } 586 if (so->so_family == AF_INET && 587 sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 588 eprintso(so, 589 ("AF_INET must have sockaddr_in address length. Got %d\n", 590 sti->sti_addr_size)); 591 eprintsoline(so, EMSGSIZE); 592 return (EMSGSIZE); 593 } 594 if (so->so_family == AF_INET6 && 595 sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 596 eprintso(so, 597 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 598 sti->sti_addr_size)); 599 eprintsoline(so, EMSGSIZE); 600 return (EMSGSIZE); 601 } 602 603 dprintso(so, 1, ( 604 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 605 sti->sti_serv_type, sti->sti_tsdu_size, sti->sti_etsdu_size, 606 sti->sti_addr_size, sti->sti_opt_size, 607 sti->sti_tidu_size)); 608 dprintso(so, 1, ("tinfo: so_state %s\n", 609 pr_state(so->so_state, so->so_mode))); 610 return (0); 611 } 612 613 /* 614 * Send down T_info_req and wait for the ack. 615 * Record interesting T_info_ack values in the sonode. 616 */ 617 static int 618 do_tinfo(struct sonode *so) 619 { 620 struct T_info_req tir; 621 mblk_t *mp; 622 int error; 623 624 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 625 626 if (so_no_tinfo) { 627 SOTOTPI(so)->sti_addr_size = 0; 628 return (0); 629 } 630 631 dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so)); 632 633 /* Send T_INFO_REQ */ 634 tir.PRIM_type = T_INFO_REQ; 635 mp = soallocproto1(&tir, sizeof (tir), 636 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 637 _ALLOC_INTR); 638 if (mp == NULL) { 639 eprintsoline(so, ENOBUFS); 640 return (ENOBUFS); 641 } 642 /* T_INFO_REQ has to be M_PCPROTO */ 643 DB_TYPE(mp) = M_PCPROTO; 644 645 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 646 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 647 if (error) { 648 eprintsoline(so, error); 649 return (error); 650 } 651 mutex_enter(&so->so_lock); 652 /* Wait for T_INFO_ACK */ 653 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 654 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 655 mutex_exit(&so->so_lock); 656 eprintsoline(so, error); 657 return (error); 658 } 659 660 ASSERT(mp); 661 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 662 mutex_exit(&so->so_lock); 663 freemsg(mp); 664 return (check_tinfo(so)); 665 } 666 667 /* 668 * Send down T_capability_req and wait for the ack. 669 * Record interesting T_capability_ack values in the sonode. 670 */ 671 static int 672 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 673 { 674 struct T_capability_req tcr; 675 struct T_capability_ack *tca; 676 mblk_t *mp; 677 int error; 678 sotpi_info_t *sti = SOTOTPI(so); 679 680 ASSERT(cap_bits1 != 0); 681 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 682 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 683 684 if (sti->sti_provinfo->tpi_capability == PI_NO) 685 return (do_tinfo(so)); 686 687 if (so_no_tinfo) { 688 sti->sti_addr_size = 0; 689 if ((cap_bits1 &= ~TC1_INFO) == 0) 690 return (0); 691 } 692 693 dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so)); 694 695 /* Send T_CAPABILITY_REQ */ 696 tcr.PRIM_type = T_CAPABILITY_REQ; 697 tcr.CAP_bits1 = cap_bits1; 698 mp = soallocproto1(&tcr, sizeof (tcr), 699 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 700 _ALLOC_INTR); 701 if (mp == NULL) { 702 eprintsoline(so, ENOBUFS); 703 return (ENOBUFS); 704 } 705 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 706 DB_TYPE(mp) = M_PCPROTO; 707 708 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 709 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 710 if (error) { 711 eprintsoline(so, error); 712 return (error); 713 } 714 mutex_enter(&so->so_lock); 715 /* Wait for T_CAPABILITY_ACK */ 716 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 717 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 718 mutex_exit(&so->so_lock); 719 PI_PROVLOCK(sti->sti_provinfo); 720 if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) 721 sti->sti_provinfo->tpi_capability = PI_NO; 722 PI_PROVUNLOCK(sti->sti_provinfo); 723 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 724 if (cap_bits1 & TC1_INFO) { 725 /* 726 * If the T_CAPABILITY_REQ timed out and then a 727 * T_INFO_REQ gets a protocol error, most likely 728 * the capability was slow (vs. unsupported). Return 729 * ENOSR for this case as a best guess. 730 */ 731 if (error == ETIME) { 732 return ((error = do_tinfo(so)) == EPROTO ? 733 ENOSR : error); 734 } 735 return (do_tinfo(so)); 736 } 737 return (0); 738 } 739 740 ASSERT(mp); 741 tca = (struct T_capability_ack *)mp->b_rptr; 742 743 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 744 so_proc_tcapability_ack(so, tca); 745 746 cap_bits1 = tca->CAP_bits1; 747 748 mutex_exit(&so->so_lock); 749 freemsg(mp); 750 751 if (cap_bits1 & TC1_INFO) 752 return (check_tinfo(so)); 753 754 return (0); 755 } 756 757 /* 758 * Process a T_CAPABILITY_ACK 759 */ 760 void 761 so_proc_tcapability_ack(struct sonode *so, struct T_capability_ack *tca) 762 { 763 sotpi_info_t *sti = SOTOTPI(so); 764 765 if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) { 766 PI_PROVLOCK(sti->sti_provinfo); 767 sti->sti_provinfo->tpi_capability = PI_YES; 768 PI_PROVUNLOCK(sti->sti_provinfo); 769 } 770 771 if (tca->CAP_bits1 & TC1_ACCEPTOR_ID) { 772 sti->sti_acceptor_id = tca->ACCEPTOR_id; 773 so->so_mode |= SM_ACCEPTOR_ID; 774 } 775 776 if (tca->CAP_bits1 & TC1_INFO) 777 copy_tinfo(so, &tca->INFO_ack); 778 } 779 780 /* 781 * Retrieve socket error, clear error if not peek. 782 */ 783 int 784 sogeterr(struct sonode *so, boolean_t clear_err) 785 { 786 int error; 787 788 ASSERT(MUTEX_HELD(&so->so_lock)); 789 790 error = so->so_error; 791 if (clear_err) 792 so->so_error = 0; 793 794 return (error); 795 } 796 797 /* 798 * This routine is registered with the stream head to retrieve read 799 * side errors. 800 * It does not clear the socket error for a peeking read side operation. 801 * It the error is to be cleared it sets *clearerr. 802 */ 803 int 804 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 805 { 806 struct sonode *so = VTOSO(vp); 807 int error; 808 809 mutex_enter(&so->so_lock); 810 if (ispeek) { 811 error = so->so_error; 812 *clearerr = 0; 813 } else { 814 error = so->so_error; 815 so->so_error = 0; 816 *clearerr = 1; 817 } 818 mutex_exit(&so->so_lock); 819 return (error); 820 } 821 822 /* 823 * This routine is registered with the stream head to retrieve write 824 * side errors. 825 * It does not clear the socket error for a peeking read side operation. 826 * It the error is to be cleared it sets *clearerr. 827 */ 828 int 829 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 830 { 831 struct sonode *so = VTOSO(vp); 832 int error; 833 834 mutex_enter(&so->so_lock); 835 if (so->so_state & SS_CANTSENDMORE) { 836 error = EPIPE; 837 *clearerr = 0; 838 } else { 839 error = so->so_error; 840 if (ispeek) { 841 *clearerr = 0; 842 } else { 843 so->so_error = 0; 844 *clearerr = 1; 845 } 846 } 847 mutex_exit(&so->so_lock); 848 return (error); 849 } 850 851 /* 852 * Set a nonpersistent read and write error on the socket. 853 * Used when there is a T_uderror_ind for a connected socket. 854 * The caller also needs to call strsetrerror and strsetwerror 855 * after dropping the lock. 856 */ 857 void 858 soseterror(struct sonode *so, int error) 859 { 860 ASSERT(error != 0); 861 862 ASSERT(MUTEX_HELD(&so->so_lock)); 863 so->so_error = (ushort_t)error; 864 } 865 866 void 867 soisconnecting(struct sonode *so) 868 { 869 ASSERT(MUTEX_HELD(&so->so_lock)); 870 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 871 so->so_state |= SS_ISCONNECTING; 872 cv_broadcast(&so->so_state_cv); 873 } 874 875 void 876 soisconnected(struct sonode *so) 877 { 878 ASSERT(MUTEX_HELD(&so->so_lock)); 879 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 880 so->so_state |= SS_ISCONNECTED; 881 cv_broadcast(&so->so_state_cv); 882 } 883 884 /* 885 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 886 */ 887 void 888 soisdisconnected(struct sonode *so, int error) 889 { 890 ASSERT(MUTEX_HELD(&so->so_lock)); 891 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 892 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 893 so->so_error = (ushort_t)error; 894 if (so->so_peercred != NULL) { 895 crfree(so->so_peercred); 896 so->so_peercred = NULL; 897 } 898 cv_broadcast(&so->so_state_cv); 899 } 900 901 /* 902 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 903 * Does not affect write side. 904 * The caller also has to call strsetrerror. 905 */ 906 static void 907 sobreakconn(struct sonode *so, int error) 908 { 909 ASSERT(MUTEX_HELD(&so->so_lock)); 910 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 911 so->so_error = (ushort_t)error; 912 cv_broadcast(&so->so_state_cv); 913 } 914 915 /* 916 * Can no longer send. 917 * Caller must also call strsetwerror. 918 * 919 * We mark the peer address as no longer valid for getpeername, but 920 * leave it around for so_unix_close to notify the peer (that 921 * transport has no addressing held at that layer). 922 */ 923 void 924 socantsendmore(struct sonode *so) 925 { 926 ASSERT(MUTEX_HELD(&so->so_lock)); 927 so->so_state |= SS_CANTSENDMORE; 928 cv_broadcast(&so->so_state_cv); 929 } 930 931 /* 932 * The caller must call strseteof(,1) as well as this routine 933 * to change the socket state. 934 */ 935 void 936 socantrcvmore(struct sonode *so) 937 { 938 ASSERT(MUTEX_HELD(&so->so_lock)); 939 so->so_state |= SS_CANTRCVMORE; 940 cv_broadcast(&so->so_state_cv); 941 } 942 943 /* 944 * The caller has sent down a "request_prim" primitive and wants to wait for 945 * an ack ("ack_prim") or an T_ERROR_ACK for it. 946 * The specified "ack_prim" can be a T_OK_ACK. 947 * 948 * Assumes that all the TPI acks are M_PCPROTO messages. 949 * 950 * Note that the socket is single-threaded (using so_lock_single) 951 * for all operations that generate TPI ack messages. Since 952 * only TPI ack messages are M_PCPROTO we should never receive 953 * anything except either the ack we are expecting or a T_ERROR_ACK 954 * for the same primitive. 955 */ 956 int 957 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 958 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 959 { 960 mblk_t *mp; 961 union T_primitives *tpr; 962 int error; 963 964 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 965 (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait)); 966 967 ASSERT(MUTEX_HELD(&so->so_lock)); 968 969 error = sowaitack(so, &mp, wait); 970 if (error) 971 return (error); 972 973 dprintso(so, 1, ("got msg %p\n", (void *)mp)); 974 if (DB_TYPE(mp) != M_PCPROTO || 975 MBLKL(mp) < sizeof (tpr->type)) { 976 freemsg(mp); 977 eprintsoline(so, EPROTO); 978 return (EPROTO); 979 } 980 tpr = (union T_primitives *)mp->b_rptr; 981 /* 982 * Did we get the primitive that we were asking for? 983 * For T_OK_ACK we also check that it matches the request primitive. 984 */ 985 if (tpr->type == ack_prim && 986 (ack_prim != T_OK_ACK || 987 tpr->ok_ack.CORRECT_prim == request_prim)) { 988 if (MBLKL(mp) >= (ssize_t)min_size) { 989 /* Found what we are looking for */ 990 *mpp = mp; 991 return (0); 992 } 993 /* Too short */ 994 freemsg(mp); 995 eprintsoline(so, EPROTO); 996 return (EPROTO); 997 } 998 999 if (tpr->type == T_ERROR_ACK && 1000 tpr->error_ack.ERROR_prim == request_prim) { 1001 /* Error to the primitive we were looking for */ 1002 if (tpr->error_ack.TLI_error == TSYSERR) { 1003 error = tpr->error_ack.UNIX_error; 1004 } else { 1005 error = proto_tlitosyserr(tpr->error_ack.TLI_error); 1006 } 1007 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 1008 tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error, 1009 tpr->error_ack.UNIX_error, error)); 1010 freemsg(mp); 1011 return (error); 1012 } 1013 /* 1014 * Wrong primitive or T_ERROR_ACK for the wrong primitive 1015 */ 1016 #ifdef DEBUG 1017 if (tpr->type == T_ERROR_ACK) { 1018 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 1019 tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error, 1020 tpr->error_ack.UNIX_error)); 1021 } else if (tpr->type == T_OK_ACK) { 1022 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 1023 tpr->ok_ack.CORRECT_prim, ack_prim, request_prim)); 1024 } else { 1025 dprintso(so, 0, 1026 ("unexpected primitive %d, expected %d for %d\n", 1027 tpr->type, ack_prim, request_prim)); 1028 } 1029 #endif /* DEBUG */ 1030 1031 freemsg(mp); 1032 eprintsoline(so, EPROTO); 1033 return (EPROTO); 1034 } 1035 1036 /* 1037 * Wait for a T_OK_ACK for the specified primitive. 1038 */ 1039 int 1040 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1041 { 1042 mblk_t *mp; 1043 int error; 1044 1045 error = sowaitprim(so, request_prim, T_OK_ACK, 1046 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1047 if (error) 1048 return (error); 1049 freemsg(mp); 1050 return (0); 1051 } 1052 1053 /* 1054 * Queue a received TPI ack message on sti_ack_mp. 1055 */ 1056 void 1057 soqueueack(struct sonode *so, mblk_t *mp) 1058 { 1059 sotpi_info_t *sti = SOTOTPI(so); 1060 1061 if (DB_TYPE(mp) != M_PCPROTO) { 1062 zcmn_err(getzoneid(), CE_WARN, 1063 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1064 *(t_scalar_t *)mp->b_rptr); 1065 freemsg(mp); 1066 return; 1067 } 1068 1069 mutex_enter(&so->so_lock); 1070 if (sti->sti_ack_mp != NULL) { 1071 dprintso(so, 1, ("sti_ack_mp already set\n")); 1072 freemsg(sti->sti_ack_mp); 1073 sti->sti_ack_mp = NULL; 1074 } 1075 sti->sti_ack_mp = mp; 1076 cv_broadcast(&sti->sti_ack_cv); 1077 mutex_exit(&so->so_lock); 1078 } 1079 1080 /* 1081 * Wait for a TPI ack ignoring signals and errors. 1082 */ 1083 int 1084 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1085 { 1086 sotpi_info_t *sti = SOTOTPI(so); 1087 1088 ASSERT(MUTEX_HELD(&so->so_lock)); 1089 1090 while (sti->sti_ack_mp == NULL) { 1091 #ifdef SOCK_TEST 1092 if (wait == 0 && sock_test_timelimit != 0) 1093 wait = sock_test_timelimit; 1094 #endif 1095 if (wait != 0) { 1096 /* 1097 * Only wait for the time limit. 1098 */ 1099 clock_t now; 1100 1101 time_to_wait(&now, wait); 1102 if (cv_timedwait(&sti->sti_ack_cv, &so->so_lock, 1103 now) == -1) { 1104 eprintsoline(so, ETIME); 1105 return (ETIME); 1106 } 1107 } 1108 else 1109 cv_wait(&sti->sti_ack_cv, &so->so_lock); 1110 } 1111 *mpp = sti->sti_ack_mp; 1112 #ifdef DEBUG 1113 { 1114 union T_primitives *tpr; 1115 mblk_t *mp = *mpp; 1116 1117 tpr = (union T_primitives *)mp->b_rptr; 1118 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1119 ASSERT(tpr->type == T_OK_ACK || 1120 tpr->type == T_ERROR_ACK || 1121 tpr->type == T_BIND_ACK || 1122 tpr->type == T_CAPABILITY_ACK || 1123 tpr->type == T_INFO_ACK || 1124 tpr->type == T_OPTMGMT_ACK); 1125 } 1126 #endif /* DEBUG */ 1127 sti->sti_ack_mp = NULL; 1128 return (0); 1129 } 1130 1131 /* 1132 * Queue a received T_CONN_IND message on sti_conn_ind_head/tail. 1133 */ 1134 void 1135 soqueueconnind(struct sonode *so, mblk_t *mp) 1136 { 1137 sotpi_info_t *sti = SOTOTPI(so); 1138 1139 if (DB_TYPE(mp) != M_PROTO) { 1140 zcmn_err(getzoneid(), CE_WARN, 1141 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1142 freemsg(mp); 1143 return; 1144 } 1145 1146 mutex_enter(&so->so_lock); 1147 ASSERT(mp->b_next == NULL); 1148 if (sti->sti_conn_ind_head == NULL) { 1149 sti->sti_conn_ind_head = mp; 1150 } else { 1151 ASSERT(sti->sti_conn_ind_tail->b_next == NULL); 1152 sti->sti_conn_ind_tail->b_next = mp; 1153 } 1154 sti->sti_conn_ind_tail = mp; 1155 /* Wakeup a single consumer of the T_CONN_IND */ 1156 cv_signal(&so->so_acceptq_cv); 1157 mutex_exit(&so->so_lock); 1158 } 1159 1160 /* 1161 * Wait for a T_CONN_IND. 1162 * Don't wait if nonblocking. 1163 * Accept signals and socket errors. 1164 */ 1165 int 1166 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1167 { 1168 mblk_t *mp; 1169 sotpi_info_t *sti = SOTOTPI(so); 1170 int error = 0; 1171 1172 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1173 mutex_enter(&so->so_lock); 1174 check_error: 1175 if (so->so_error) { 1176 error = sogeterr(so, B_TRUE); 1177 if (error) { 1178 mutex_exit(&so->so_lock); 1179 return (error); 1180 } 1181 } 1182 1183 if (sti->sti_conn_ind_head == NULL) { 1184 if (fmode & (FNDELAY|FNONBLOCK)) { 1185 error = EWOULDBLOCK; 1186 goto done; 1187 } 1188 1189 if (so->so_state & SS_CLOSING) { 1190 error = EINTR; 1191 goto done; 1192 } 1193 1194 if (!cv_wait_sig_swap(&so->so_acceptq_cv, &so->so_lock)) { 1195 error = EINTR; 1196 goto done; 1197 } 1198 goto check_error; 1199 } 1200 mp = sti->sti_conn_ind_head; 1201 sti->sti_conn_ind_head = mp->b_next; 1202 mp->b_next = NULL; 1203 if (sti->sti_conn_ind_head == NULL) { 1204 ASSERT(sti->sti_conn_ind_tail == mp); 1205 sti->sti_conn_ind_tail = NULL; 1206 } 1207 *mpp = mp; 1208 done: 1209 mutex_exit(&so->so_lock); 1210 return (error); 1211 } 1212 1213 /* 1214 * Flush a T_CONN_IND matching the sequence number from the list. 1215 * Return zero if found; non-zero otherwise. 1216 * This is called very infrequently thus it is ok to do a linear search. 1217 */ 1218 int 1219 soflushconnind(struct sonode *so, t_scalar_t seqno) 1220 { 1221 mblk_t *prevmp, *mp; 1222 struct T_conn_ind *tci; 1223 sotpi_info_t *sti = SOTOTPI(so); 1224 1225 mutex_enter(&so->so_lock); 1226 for (prevmp = NULL, mp = sti->sti_conn_ind_head; mp != NULL; 1227 prevmp = mp, mp = mp->b_next) { 1228 tci = (struct T_conn_ind *)mp->b_rptr; 1229 if (tci->SEQ_number == seqno) { 1230 dprintso(so, 1, 1231 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1232 /* Deleting last? */ 1233 if (sti->sti_conn_ind_tail == mp) { 1234 sti->sti_conn_ind_tail = prevmp; 1235 } 1236 if (prevmp == NULL) { 1237 /* Deleting first */ 1238 sti->sti_conn_ind_head = mp->b_next; 1239 } else { 1240 prevmp->b_next = mp->b_next; 1241 } 1242 mp->b_next = NULL; 1243 1244 ASSERT((sti->sti_conn_ind_head == NULL && 1245 sti->sti_conn_ind_tail == NULL) || 1246 (sti->sti_conn_ind_head != NULL && 1247 sti->sti_conn_ind_tail != NULL)); 1248 1249 so->so_error = ECONNABORTED; 1250 mutex_exit(&so->so_lock); 1251 1252 /* 1253 * T_KSSL_PROXY_CONN_IND may carry a handle for 1254 * an SSL context, and needs to be released. 1255 */ 1256 if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) && 1257 (mp->b_cont != NULL)) { 1258 kssl_ctx_t kssl_ctx; 1259 1260 ASSERT(MBLKL(mp->b_cont) == 1261 sizeof (kssl_ctx_t)); 1262 kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr); 1263 kssl_release_ctx(kssl_ctx); 1264 } 1265 freemsg(mp); 1266 return (0); 1267 } 1268 } 1269 mutex_exit(&so->so_lock); 1270 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1271 return (-1); 1272 } 1273 1274 /* 1275 * Wait until the socket is connected or there is an error. 1276 * fmode should contain any nonblocking flags. nosig should be 1277 * set if the caller does not want the wait to be interrupted by a signal. 1278 */ 1279 int 1280 sowaitconnected(struct sonode *so, int fmode, int nosig) 1281 { 1282 int error; 1283 1284 ASSERT(MUTEX_HELD(&so->so_lock)); 1285 1286 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1287 SS_ISCONNECTING && so->so_error == 0) { 1288 1289 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", 1290 (void *)so)); 1291 if (fmode & (FNDELAY|FNONBLOCK)) 1292 return (EINPROGRESS); 1293 1294 if (so->so_state & SS_CLOSING) 1295 return (EINTR); 1296 1297 if (nosig) 1298 cv_wait(&so->so_state_cv, &so->so_lock); 1299 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1300 /* 1301 * Return EINTR and let the application use 1302 * nonblocking techniques for detecting when 1303 * the connection has been established. 1304 */ 1305 return (EINTR); 1306 } 1307 dprintso(so, 1, ("awoken on %p\n", (void *)so)); 1308 } 1309 1310 if (so->so_error != 0) { 1311 error = sogeterr(so, B_TRUE); 1312 ASSERT(error != 0); 1313 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1314 return (error); 1315 } 1316 if (!(so->so_state & SS_ISCONNECTED)) { 1317 /* 1318 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1319 * zero errno. Or another thread could have consumed so_error 1320 * e.g. by calling read. 1321 */ 1322 error = ECONNREFUSED; 1323 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1324 return (error); 1325 } 1326 return (0); 1327 } 1328 1329 1330 /* 1331 * Handle the signal generation aspect of urgent data. 1332 */ 1333 static void 1334 so_oob_sig(struct sonode *so, int extrasig, 1335 strsigset_t *signals, strpollset_t *pollwakeups) 1336 { 1337 sotpi_info_t *sti = SOTOTPI(so); 1338 1339 ASSERT(MUTEX_HELD(&so->so_lock)); 1340 1341 ASSERT(so_verify_oobstate(so)); 1342 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1343 if (sti->sti_oobsigcnt > sti->sti_oobcnt) { 1344 /* 1345 * Signal has already been generated once for this 1346 * urgent "event". However, since TCP can receive updated 1347 * urgent pointers we still generate a signal. 1348 */ 1349 ASSERT(so->so_state & SS_OOBPEND); 1350 if (extrasig) { 1351 *signals |= S_RDBAND; 1352 *pollwakeups |= POLLRDBAND; 1353 } 1354 return; 1355 } 1356 1357 sti->sti_oobsigcnt++; 1358 ASSERT(sti->sti_oobsigcnt > 0); /* Wraparound */ 1359 ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt); 1360 1361 /* 1362 * Record (for select/poll) that urgent data is pending. 1363 */ 1364 so->so_state |= SS_OOBPEND; 1365 /* 1366 * New urgent data on the way so forget about any old 1367 * urgent data. 1368 */ 1369 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1370 if (so->so_oobmsg != NULL) { 1371 dprintso(so, 1, ("sock: discarding old oob\n")); 1372 freemsg(so->so_oobmsg); 1373 so->so_oobmsg = NULL; 1374 } 1375 *signals |= S_RDBAND; 1376 *pollwakeups |= POLLRDBAND; 1377 ASSERT(so_verify_oobstate(so)); 1378 } 1379 1380 /* 1381 * Handle the processing of the T_EXDATA_IND with urgent data. 1382 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1383 */ 1384 /* ARGSUSED2 */ 1385 static mblk_t * 1386 so_oob_exdata(struct sonode *so, mblk_t *mp, 1387 strsigset_t *signals, strpollset_t *pollwakeups) 1388 { 1389 sotpi_info_t *sti = SOTOTPI(so); 1390 1391 ASSERT(MUTEX_HELD(&so->so_lock)); 1392 1393 ASSERT(so_verify_oobstate(so)); 1394 1395 ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt); 1396 1397 sti->sti_oobcnt++; 1398 ASSERT(sti->sti_oobcnt > 0); /* wraparound? */ 1399 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1400 1401 /* 1402 * Set MSGMARK for SIOCATMARK. 1403 */ 1404 mp->b_flag |= MSGMARK; 1405 1406 ASSERT(so_verify_oobstate(so)); 1407 return (mp); 1408 } 1409 1410 /* 1411 * Handle the processing of the actual urgent data. 1412 * Returns the data mblk if it should be queued on the read queue. 1413 */ 1414 static mblk_t * 1415 so_oob_data(struct sonode *so, mblk_t *mp, 1416 strsigset_t *signals, strpollset_t *pollwakeups) 1417 { 1418 sotpi_info_t *sti = SOTOTPI(so); 1419 1420 ASSERT(MUTEX_HELD(&so->so_lock)); 1421 1422 ASSERT(so_verify_oobstate(so)); 1423 1424 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1425 ASSERT(mp != NULL); 1426 /* 1427 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1428 * Otherwise we store it in so_oobmsg. 1429 */ 1430 ASSERT(so->so_oobmsg == NULL); 1431 if (so->so_options & SO_OOBINLINE) { 1432 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1433 *signals |= S_INPUT | S_RDNORM; 1434 } else { 1435 *pollwakeups |= POLLRDBAND; 1436 so->so_state |= SS_HAVEOOBDATA; 1437 so->so_oobmsg = mp; 1438 mp = NULL; 1439 } 1440 ASSERT(so_verify_oobstate(so)); 1441 return (mp); 1442 } 1443 1444 /* 1445 * Caller must hold the mutex. 1446 * For delayed processing, save the T_DISCON_IND received 1447 * from below on sti_discon_ind_mp. 1448 * When the message is processed the framework will call: 1449 * (*func)(so, mp); 1450 */ 1451 static void 1452 so_save_discon_ind(struct sonode *so, 1453 mblk_t *mp, 1454 void (*func)(struct sonode *so, mblk_t *)) 1455 { 1456 sotpi_info_t *sti = SOTOTPI(so); 1457 1458 ASSERT(MUTEX_HELD(&so->so_lock)); 1459 1460 /* 1461 * Discard new T_DISCON_IND if we have already received another. 1462 * Currently the earlier message can either be on sti_discon_ind_mp 1463 * or being processed. 1464 */ 1465 if (sti->sti_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1466 zcmn_err(getzoneid(), CE_WARN, 1467 "sockfs: received unexpected additional T_DISCON_IND\n"); 1468 freemsg(mp); 1469 return; 1470 } 1471 mp->b_prev = (mblk_t *)func; 1472 mp->b_next = NULL; 1473 sti->sti_discon_ind_mp = mp; 1474 } 1475 1476 /* 1477 * Caller must hold the mutex and make sure that either SOLOCKED 1478 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1479 * Perform delayed processing of T_DISCON_IND message on sti_discon_ind_mp. 1480 * Need to ensure that strsock_proto() will not end up sleeping for 1481 * SOASYNC_UNBIND, while executing this function. 1482 */ 1483 void 1484 so_drain_discon_ind(struct sonode *so) 1485 { 1486 mblk_t *bp; 1487 void (*func)(struct sonode *so, mblk_t *); 1488 sotpi_info_t *sti = SOTOTPI(so); 1489 1490 ASSERT(MUTEX_HELD(&so->so_lock)); 1491 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1492 1493 /* Process T_DISCON_IND on sti_discon_ind_mp */ 1494 if ((bp = sti->sti_discon_ind_mp) != NULL) { 1495 sti->sti_discon_ind_mp = NULL; 1496 func = (void (*)())bp->b_prev; 1497 bp->b_prev = NULL; 1498 1499 /* 1500 * This (*func) is supposed to generate a message downstream 1501 * and we need to have a flag set until the corresponding 1502 * upstream message reaches stream head. 1503 * When processing T_DISCON_IND in strsock_discon_ind 1504 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1505 * drop the flag after we get the ACK in strsock_proto. 1506 */ 1507 (void) (*func)(so, bp); 1508 } 1509 } 1510 1511 /* 1512 * Caller must hold the mutex. 1513 * Remove the T_DISCON_IND on sti_discon_ind_mp. 1514 */ 1515 void 1516 so_flush_discon_ind(struct sonode *so) 1517 { 1518 mblk_t *bp; 1519 sotpi_info_t *sti = SOTOTPI(so); 1520 1521 ASSERT(MUTEX_HELD(&so->so_lock)); 1522 1523 /* 1524 * Remove T_DISCON_IND mblk at sti_discon_ind_mp. 1525 */ 1526 if ((bp = sti->sti_discon_ind_mp) != NULL) { 1527 sti->sti_discon_ind_mp = NULL; 1528 bp->b_prev = NULL; 1529 freemsg(bp); 1530 } 1531 } 1532 1533 /* 1534 * Caller must hold the mutex. 1535 * 1536 * This function is used to process the T_DISCON_IND message. It does 1537 * immediate processing when called from strsock_proto and delayed 1538 * processing of discon_ind saved on sti_discon_ind_mp when called from 1539 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1540 * sti_discon_ind_mp for delayed processing, this function is registered 1541 * as the callback function to process the message. 1542 * 1543 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1544 * unbind operation, and should be released only after we receive the ACK 1545 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1546 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1547 * sent from either this function or tcp_unbind(), flushing away any TPI 1548 * message that is being sent down and stays in a lower module's queue. 1549 * 1550 * This function drops so_lock and grabs it again. 1551 */ 1552 static void 1553 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1554 { 1555 struct vnode *vp; 1556 struct stdata *stp; 1557 union T_primitives *tpr; 1558 struct T_unbind_req *ubr; 1559 mblk_t *mp; 1560 int error; 1561 sotpi_info_t *sti = SOTOTPI(so); 1562 1563 ASSERT(MUTEX_HELD(&so->so_lock)); 1564 ASSERT(discon_mp); 1565 ASSERT(discon_mp->b_rptr); 1566 1567 tpr = (union T_primitives *)discon_mp->b_rptr; 1568 ASSERT(tpr->type == T_DISCON_IND); 1569 1570 vp = SOTOV(so); 1571 stp = vp->v_stream; 1572 ASSERT(stp); 1573 1574 /* 1575 * Not a listener 1576 */ 1577 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1578 1579 /* 1580 * This assumes that the name space for DISCON_reason 1581 * is the errno name space. 1582 */ 1583 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1584 sti->sti_laddr_valid = 0; 1585 sti->sti_faddr_valid = 0; 1586 1587 /* 1588 * Unbind with the transport without blocking. 1589 * If we've already received a T_DISCON_IND do not unbind. 1590 * 1591 * If there is no preallocated unbind message, we have already 1592 * unbound with the transport 1593 * 1594 * If the socket is not bound, no need to unbind. 1595 */ 1596 mp = sti->sti_unbind_mp; 1597 if (mp == NULL) { 1598 ASSERT(!(so->so_state & SS_ISBOUND)); 1599 mutex_exit(&so->so_lock); 1600 } else if (!(so->so_state & SS_ISBOUND)) { 1601 mutex_exit(&so->so_lock); 1602 } else { 1603 sti->sti_unbind_mp = NULL; 1604 1605 /* 1606 * Is another T_DISCON_IND being processed. 1607 */ 1608 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1609 1610 /* 1611 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1612 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1613 * only after we receive the ACK in strsock_proto. 1614 */ 1615 so->so_flag |= SOASYNC_UNBIND; 1616 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1617 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1618 sti->sti_laddr_valid = 0; 1619 mutex_exit(&so->so_lock); 1620 1621 /* 1622 * Send down T_UNBIND_REQ ignoring flow control. 1623 * XXX Assumes that MSG_IGNFLOW implies that this thread 1624 * does not run service procedures. 1625 */ 1626 ASSERT(DB_TYPE(mp) == M_PROTO); 1627 ubr = (struct T_unbind_req *)mp->b_rptr; 1628 mp->b_wptr += sizeof (*ubr); 1629 ubr->PRIM_type = T_UNBIND_REQ; 1630 1631 /* 1632 * Flush the read and write side (except stream head read queue) 1633 * and send down T_UNBIND_REQ. 1634 */ 1635 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1636 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1637 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1638 /* LINTED - warning: statement has no consequent: if */ 1639 if (error) { 1640 eprintsoline(so, error); 1641 } 1642 } 1643 1644 if (tpr->discon_ind.DISCON_reason != 0) 1645 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1646 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1647 strseteof(SOTOV(so), 1); 1648 /* 1649 * strseteof takes care of read side wakeups, 1650 * pollwakeups, and signals. 1651 */ 1652 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1653 freemsg(discon_mp); 1654 1655 1656 pollwakeup(&stp->sd_pollist, POLLOUT); 1657 mutex_enter(&stp->sd_lock); 1658 1659 /* 1660 * Wake sleeping write 1661 */ 1662 if (stp->sd_flag & WSLEEP) { 1663 stp->sd_flag &= ~WSLEEP; 1664 cv_broadcast(&stp->sd_wrq->q_wait); 1665 } 1666 1667 /* 1668 * strsendsig can handle multiple signals with a 1669 * single call. Send SIGPOLL for S_OUTPUT event. 1670 */ 1671 if (stp->sd_sigflags & S_OUTPUT) 1672 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1673 1674 mutex_exit(&stp->sd_lock); 1675 mutex_enter(&so->so_lock); 1676 } 1677 1678 /* 1679 * This routine is registered with the stream head to receive M_PROTO 1680 * and M_PCPROTO messages. 1681 * 1682 * Returns NULL if the message was consumed. 1683 * Returns an mblk to make that mblk be processed (and queued) by the stream 1684 * head. 1685 * 1686 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1687 * *pollwakeups) for the stream head to take action on. Note that since 1688 * sockets always deliver SIGIO for every new piece of data this routine 1689 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1690 * 1691 * This routine handles all data related TPI messages independent of 1692 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1693 * arrive on a SOCK_STREAM. 1694 */ 1695 static mblk_t * 1696 strsock_proto(vnode_t *vp, mblk_t *mp, 1697 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1698 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1699 { 1700 union T_primitives *tpr; 1701 struct sonode *so; 1702 sotpi_info_t *sti; 1703 1704 so = VTOSO(vp); 1705 sti = SOTOTPI(so); 1706 1707 dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp)); 1708 1709 /* Set default return values */ 1710 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1711 1712 ASSERT(DB_TYPE(mp) == M_PROTO || 1713 DB_TYPE(mp) == M_PCPROTO); 1714 1715 if (MBLKL(mp) < sizeof (tpr->type)) { 1716 /* The message is too short to even contain the primitive */ 1717 zcmn_err(getzoneid(), CE_WARN, 1718 "sockfs: Too short TPI message received. Len = %ld\n", 1719 (ptrdiff_t)(MBLKL(mp))); 1720 freemsg(mp); 1721 return (NULL); 1722 } 1723 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1724 /* The read pointer is not aligned correctly for TPI */ 1725 zcmn_err(getzoneid(), CE_WARN, 1726 "sockfs: Unaligned TPI message received. rptr = %p\n", 1727 (void *)mp->b_rptr); 1728 freemsg(mp); 1729 return (NULL); 1730 } 1731 tpr = (union T_primitives *)mp->b_rptr; 1732 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1733 1734 switch (tpr->type) { 1735 1736 case T_DATA_IND: 1737 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1738 zcmn_err(getzoneid(), CE_WARN, 1739 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1740 (ptrdiff_t)(MBLKL(mp))); 1741 freemsg(mp); 1742 return (NULL); 1743 } 1744 /* 1745 * Ignore zero-length T_DATA_IND messages. These might be 1746 * generated by some transports. 1747 * This is needed to prevent read (which skips the M_PROTO 1748 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1749 * on a non-blocking socket after select/poll has indicated 1750 * that data is available). 1751 */ 1752 if (msgdsize(mp->b_cont) == 0) { 1753 dprintso(so, 0, 1754 ("strsock_proto: zero length T_DATA_IND\n")); 1755 freemsg(mp); 1756 return (NULL); 1757 } 1758 *allmsgsigs = S_INPUT | S_RDNORM; 1759 *pollwakeups = POLLIN | POLLRDNORM; 1760 *wakeups = RSLEEP; 1761 return (mp); 1762 1763 case T_UNITDATA_IND: { 1764 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1765 void *addr; 1766 t_uscalar_t addrlen; 1767 1768 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1769 zcmn_err(getzoneid(), CE_WARN, 1770 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1771 (ptrdiff_t)(MBLKL(mp))); 1772 freemsg(mp); 1773 return (NULL); 1774 } 1775 1776 /* Is this is not a connected datagram socket? */ 1777 if ((so->so_mode & SM_CONNREQUIRED) || 1778 !(so->so_state & SS_ISCONNECTED)) { 1779 /* 1780 * Not a connected datagram socket. Look for 1781 * the SO_UNIX_CLOSE option. If such an option is found 1782 * discard the message (since it has no meaning 1783 * unless connected). 1784 */ 1785 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1786 tudi->OPT_length != 0) { 1787 void *opt; 1788 t_uscalar_t optlen = tudi->OPT_length; 1789 1790 opt = sogetoff(mp, tudi->OPT_offset, 1791 optlen, __TPI_ALIGN_SIZE); 1792 if (opt == NULL) { 1793 /* The len/off falls outside mp */ 1794 freemsg(mp); 1795 mutex_enter(&so->so_lock); 1796 soseterror(so, EPROTO); 1797 mutex_exit(&so->so_lock); 1798 zcmn_err(getzoneid(), CE_WARN, 1799 "sockfs: T_unidata_ind with " 1800 "invalid optlen/offset %u/%d\n", 1801 optlen, tudi->OPT_offset); 1802 return (NULL); 1803 } 1804 if (so_getopt_unix_close(opt, optlen)) { 1805 freemsg(mp); 1806 return (NULL); 1807 } 1808 } 1809 *allmsgsigs = S_INPUT | S_RDNORM; 1810 *pollwakeups = POLLIN | POLLRDNORM; 1811 *wakeups = RSLEEP; 1812 if (audit_active) 1813 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1814 mp, 0); 1815 return (mp); 1816 } 1817 1818 /* 1819 * A connect datagram socket. For AF_INET{,6} we verify that 1820 * the source address matches the "connected to" address. 1821 * The semantics of AF_UNIX sockets is to not verify 1822 * the source address. 1823 * Note that this source address verification is transport 1824 * specific. Thus the real fix would be to extent TPI 1825 * to allow T_CONN_REQ messages to be send to connectionless 1826 * transport providers and always let the transport provider 1827 * do whatever filtering is needed. 1828 * 1829 * The verification/filtering semantics for transports 1830 * other than AF_INET and AF_UNIX are unknown. The choice 1831 * would be to either filter using bcmp or let all messages 1832 * get through. This code does not filter other address 1833 * families since this at least allows the application to 1834 * work around any missing filtering. 1835 * 1836 * XXX Should we move filtering to UDP/ICMP??? 1837 * That would require passing e.g. a T_DISCON_REQ to UDP 1838 * when the socket becomes unconnected. 1839 */ 1840 addrlen = tudi->SRC_length; 1841 /* 1842 * The alignment restriction is really to strict but 1843 * we want enough alignment to inspect the fields of 1844 * a sockaddr_in. 1845 */ 1846 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1847 __TPI_ALIGN_SIZE); 1848 if (addr == NULL) { 1849 freemsg(mp); 1850 mutex_enter(&so->so_lock); 1851 soseterror(so, EPROTO); 1852 mutex_exit(&so->so_lock); 1853 zcmn_err(getzoneid(), CE_WARN, 1854 "sockfs: T_unidata_ind with invalid " 1855 "addrlen/offset %u/%d\n", 1856 addrlen, tudi->SRC_offset); 1857 return (NULL); 1858 } 1859 1860 if (so->so_family == AF_INET) { 1861 /* 1862 * For AF_INET we allow wildcarding both sin_addr 1863 * and sin_port. 1864 */ 1865 struct sockaddr_in *faddr, *sin; 1866 1867 /* Prevent sti_faddr_sa from changing while accessed */ 1868 mutex_enter(&so->so_lock); 1869 ASSERT(sti->sti_faddr_len == 1870 (socklen_t)sizeof (struct sockaddr_in)); 1871 faddr = (struct sockaddr_in *)sti->sti_faddr_sa; 1872 sin = (struct sockaddr_in *)addr; 1873 if (addrlen != 1874 (t_uscalar_t)sizeof (struct sockaddr_in) || 1875 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1876 faddr->sin_addr.s_addr != INADDR_ANY) || 1877 (so->so_type != SOCK_RAW && 1878 sin->sin_port != faddr->sin_port && 1879 faddr->sin_port != 0)) { 1880 #ifdef DEBUG 1881 dprintso(so, 0, 1882 ("sockfs: T_UNITDATA_IND mismatch: %s", 1883 pr_addr(so->so_family, 1884 (struct sockaddr *)addr, addrlen))); 1885 dprintso(so, 0, (" - %s\n", 1886 pr_addr(so->so_family, sti->sti_faddr_sa, 1887 (t_uscalar_t)sti->sti_faddr_len))); 1888 #endif /* DEBUG */ 1889 mutex_exit(&so->so_lock); 1890 freemsg(mp); 1891 return (NULL); 1892 } 1893 mutex_exit(&so->so_lock); 1894 } else if (so->so_family == AF_INET6) { 1895 /* 1896 * For AF_INET6 we allow wildcarding both sin6_addr 1897 * and sin6_port. 1898 */ 1899 struct sockaddr_in6 *faddr6, *sin6; 1900 static struct in6_addr zeroes; /* inits to all zeros */ 1901 1902 /* Prevent sti_faddr_sa from changing while accessed */ 1903 mutex_enter(&so->so_lock); 1904 ASSERT(sti->sti_faddr_len == 1905 (socklen_t)sizeof (struct sockaddr_in6)); 1906 faddr6 = (struct sockaddr_in6 *)sti->sti_faddr_sa; 1907 sin6 = (struct sockaddr_in6 *)addr; 1908 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1909 if (addrlen != 1910 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1911 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1912 &faddr6->sin6_addr) && 1913 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1914 (so->so_type != SOCK_RAW && 1915 sin6->sin6_port != faddr6->sin6_port && 1916 faddr6->sin6_port != 0)) { 1917 #ifdef DEBUG 1918 dprintso(so, 0, 1919 ("sockfs: T_UNITDATA_IND mismatch: %s", 1920 pr_addr(so->so_family, 1921 (struct sockaddr *)addr, addrlen))); 1922 dprintso(so, 0, (" - %s\n", 1923 pr_addr(so->so_family, sti->sti_faddr_sa, 1924 (t_uscalar_t)sti->sti_faddr_len))); 1925 #endif /* DEBUG */ 1926 mutex_exit(&so->so_lock); 1927 freemsg(mp); 1928 return (NULL); 1929 } 1930 mutex_exit(&so->so_lock); 1931 } else if (so->so_family == AF_UNIX && 1932 msgdsize(mp->b_cont) == 0 && 1933 tudi->OPT_length != 0) { 1934 /* 1935 * Attempt to extract AF_UNIX 1936 * SO_UNIX_CLOSE indication from options. 1937 */ 1938 void *opt; 1939 t_uscalar_t optlen = tudi->OPT_length; 1940 1941 opt = sogetoff(mp, tudi->OPT_offset, 1942 optlen, __TPI_ALIGN_SIZE); 1943 if (opt == NULL) { 1944 /* The len/off falls outside mp */ 1945 freemsg(mp); 1946 mutex_enter(&so->so_lock); 1947 soseterror(so, EPROTO); 1948 mutex_exit(&so->so_lock); 1949 zcmn_err(getzoneid(), CE_WARN, 1950 "sockfs: T_unidata_ind with invalid " 1951 "optlen/offset %u/%d\n", 1952 optlen, tudi->OPT_offset); 1953 return (NULL); 1954 } 1955 /* 1956 * If we received a unix close indication mark the 1957 * socket and discard this message. 1958 */ 1959 if (so_getopt_unix_close(opt, optlen)) { 1960 mutex_enter(&so->so_lock); 1961 sobreakconn(so, ECONNRESET); 1962 mutex_exit(&so->so_lock); 1963 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1964 freemsg(mp); 1965 *pollwakeups = POLLIN | POLLRDNORM; 1966 *allmsgsigs = S_INPUT | S_RDNORM; 1967 *wakeups = RSLEEP; 1968 return (NULL); 1969 } 1970 } 1971 *allmsgsigs = S_INPUT | S_RDNORM; 1972 *pollwakeups = POLLIN | POLLRDNORM; 1973 *wakeups = RSLEEP; 1974 return (mp); 1975 } 1976 1977 case T_OPTDATA_IND: { 1978 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1979 1980 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1981 zcmn_err(getzoneid(), CE_WARN, 1982 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1983 (ptrdiff_t)(MBLKL(mp))); 1984 freemsg(mp); 1985 return (NULL); 1986 } 1987 /* 1988 * Allow zero-length messages carrying options. 1989 * This is used when carrying the SO_UNIX_CLOSE option. 1990 */ 1991 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 1992 tdi->OPT_length != 0) { 1993 /* 1994 * Attempt to extract AF_UNIX close indication 1995 * from the options. Ignore any other options - 1996 * those are handled once the message is removed 1997 * from the queue. 1998 * The close indication message should not carry data. 1999 */ 2000 void *opt; 2001 t_uscalar_t optlen = tdi->OPT_length; 2002 2003 opt = sogetoff(mp, tdi->OPT_offset, 2004 optlen, __TPI_ALIGN_SIZE); 2005 if (opt == NULL) { 2006 /* The len/off falls outside mp */ 2007 freemsg(mp); 2008 mutex_enter(&so->so_lock); 2009 soseterror(so, EPROTO); 2010 mutex_exit(&so->so_lock); 2011 zcmn_err(getzoneid(), CE_WARN, 2012 "sockfs: T_optdata_ind with invalid " 2013 "optlen/offset %u/%d\n", 2014 optlen, tdi->OPT_offset); 2015 return (NULL); 2016 } 2017 /* 2018 * If we received a close indication mark the 2019 * socket and discard this message. 2020 */ 2021 if (so_getopt_unix_close(opt, optlen)) { 2022 mutex_enter(&so->so_lock); 2023 socantsendmore(so); 2024 sti->sti_faddr_valid = 0; 2025 mutex_exit(&so->so_lock); 2026 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2027 freemsg(mp); 2028 return (NULL); 2029 } 2030 } 2031 *allmsgsigs = S_INPUT | S_RDNORM; 2032 *pollwakeups = POLLIN | POLLRDNORM; 2033 *wakeups = RSLEEP; 2034 return (mp); 2035 } 2036 2037 case T_EXDATA_IND: { 2038 mblk_t *mctl, *mdata; 2039 mblk_t *lbp; 2040 union T_primitives *tprp; 2041 struct stdata *stp; 2042 queue_t *qp; 2043 2044 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 2045 zcmn_err(getzoneid(), CE_WARN, 2046 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 2047 (ptrdiff_t)(MBLKL(mp))); 2048 freemsg(mp); 2049 return (NULL); 2050 } 2051 /* 2052 * Ignore zero-length T_EXDATA_IND messages. These might be 2053 * generated by some transports. 2054 * 2055 * This is needed to prevent read (which skips the M_PROTO 2056 * part) to unexpectedly return 0 (or return EWOULDBLOCK 2057 * on a non-blocking socket after select/poll has indicated 2058 * that data is available). 2059 */ 2060 dprintso(so, 1, 2061 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 2062 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2063 pr_state(so->so_state, so->so_mode))); 2064 2065 if (msgdsize(mp->b_cont) == 0) { 2066 dprintso(so, 0, 2067 ("strsock_proto: zero length T_EXDATA_IND\n")); 2068 freemsg(mp); 2069 return (NULL); 2070 } 2071 2072 /* 2073 * Split into the T_EXDATA_IND and the M_DATA part. 2074 * We process these three pieces separately: 2075 * signal generation 2076 * handling T_EXDATA_IND 2077 * handling M_DATA component 2078 */ 2079 mctl = mp; 2080 mdata = mctl->b_cont; 2081 mctl->b_cont = NULL; 2082 mutex_enter(&so->so_lock); 2083 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2084 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2085 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2086 2087 stp = vp->v_stream; 2088 ASSERT(stp != NULL); 2089 qp = _RD(stp->sd_wrq); 2090 2091 mutex_enter(QLOCK(qp)); 2092 lbp = qp->q_last; 2093 2094 /* 2095 * We want to avoid queueing up a string of T_EXDATA_IND 2096 * messages with no intervening data messages at the stream 2097 * head. These messages contribute to the total message 2098 * count. Eventually this can lead to STREAMS flow contol 2099 * and also cause TCP to advertise a zero window condition 2100 * to the peer. This can happen in the degenerate case where 2101 * the sender and receiver exchange only OOB data. The sender 2102 * only sends messages with MSG_OOB flag and the receiver 2103 * receives only MSG_OOB messages and does not use SO_OOBINLINE. 2104 * An example of this scenario has been reported in applications 2105 * that use OOB data to exchange heart beats. Flow control 2106 * relief will never happen if the application only reads OOB 2107 * data which is done directly by sorecvoob() and the 2108 * T_EXDATA_IND messages at the streamhead won't be consumed. 2109 * Note that there is no correctness issue in compressing the 2110 * string of T_EXDATA_IND messages into a single T_EXDATA_IND 2111 * message. A single read that does not specify MSG_OOB will 2112 * read across all the marks in a loop in sotpi_recvmsg(). 2113 * Each mark is individually distinguishable only if the 2114 * T_EXDATA_IND messages are separated by data messages. 2115 */ 2116 if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) { 2117 tprp = (union T_primitives *)lbp->b_rptr; 2118 if ((tprp->type == T_EXDATA_IND) && 2119 !(so->so_options & SO_OOBINLINE)) { 2120 2121 /* 2122 * free the new M_PROTO message 2123 */ 2124 freemsg(mctl); 2125 2126 /* 2127 * adjust the OOB count and OOB signal count 2128 * just incremented for the new OOB data. 2129 */ 2130 sti->sti_oobcnt--; 2131 sti->sti_oobsigcnt--; 2132 mutex_exit(QLOCK(qp)); 2133 mutex_exit(&so->so_lock); 2134 return (NULL); 2135 } 2136 } 2137 mutex_exit(QLOCK(qp)); 2138 2139 /* 2140 * Pass the T_EXDATA_IND and the M_DATA back separately 2141 * by using b_next linkage. (The stream head will queue any 2142 * b_next linked messages separately.) This is needed 2143 * since MSGMARK applies to the last by of the message 2144 * hence we can not have any M_DATA component attached 2145 * to the marked T_EXDATA_IND. Note that the stream head 2146 * will not consolidate M_DATA messages onto an MSGMARK'ed 2147 * message in order to preserve the constraint that 2148 * the T_EXDATA_IND always is a separate message. 2149 */ 2150 ASSERT(mctl != NULL); 2151 mctl->b_next = mdata; 2152 mp = mctl; 2153 #ifdef DEBUG 2154 if (mdata == NULL) { 2155 dprintso(so, 1, 2156 ("after outofline T_EXDATA_IND(%p): " 2157 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2158 (void *)vp, sti->sti_oobsigcnt, 2159 sti->sti_oobcnt, *pollwakeups, *allmsgsigs, 2160 pr_state(so->so_state, so->so_mode))); 2161 } else { 2162 dprintso(so, 1, 2163 ("after inline T_EXDATA_IND(%p): " 2164 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2165 (void *)vp, sti->sti_oobsigcnt, 2166 sti->sti_oobcnt, *pollwakeups, *allmsgsigs, 2167 pr_state(so->so_state, so->so_mode))); 2168 } 2169 #endif /* DEBUG */ 2170 mutex_exit(&so->so_lock); 2171 *wakeups = RSLEEP; 2172 return (mp); 2173 } 2174 2175 case T_CONN_CON: { 2176 struct T_conn_con *conn_con; 2177 void *addr; 2178 t_uscalar_t addrlen; 2179 2180 /* 2181 * Verify the state, update the state to ISCONNECTED, 2182 * record the potentially new address in the message, 2183 * and drop the message. 2184 */ 2185 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2186 zcmn_err(getzoneid(), CE_WARN, 2187 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2188 (ptrdiff_t)(MBLKL(mp))); 2189 freemsg(mp); 2190 return (NULL); 2191 } 2192 2193 mutex_enter(&so->so_lock); 2194 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2195 SS_ISCONNECTING) { 2196 mutex_exit(&so->so_lock); 2197 dprintso(so, 1, 2198 ("T_CONN_CON: state %x\n", so->so_state)); 2199 freemsg(mp); 2200 return (NULL); 2201 } 2202 2203 conn_con = &tpr->conn_con; 2204 addrlen = conn_con->RES_length; 2205 /* 2206 * Allow the address to be of different size than sent down 2207 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2208 * For AF_UNIX require the identical length. 2209 */ 2210 if (so->so_family == AF_UNIX ? 2211 addrlen != (t_uscalar_t)sizeof (sti->sti_ux_laddr) : 2212 addrlen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2213 zcmn_err(getzoneid(), CE_WARN, 2214 "sockfs: T_conn_con with different " 2215 "length %u/%d\n", 2216 addrlen, conn_con->RES_length); 2217 soisdisconnected(so, EPROTO); 2218 sti->sti_laddr_valid = 0; 2219 sti->sti_faddr_valid = 0; 2220 mutex_exit(&so->so_lock); 2221 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2222 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2223 strseteof(SOTOV(so), 1); 2224 freemsg(mp); 2225 /* 2226 * strseteof takes care of read side wakeups, 2227 * pollwakeups, and signals. 2228 */ 2229 *wakeups = WSLEEP; 2230 *allmsgsigs = S_OUTPUT; 2231 *pollwakeups = POLLOUT; 2232 return (NULL); 2233 } 2234 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2235 if (addr == NULL) { 2236 zcmn_err(getzoneid(), CE_WARN, 2237 "sockfs: T_conn_con with invalid " 2238 "addrlen/offset %u/%d\n", 2239 addrlen, conn_con->RES_offset); 2240 mutex_exit(&so->so_lock); 2241 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2242 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2243 strseteof(SOTOV(so), 1); 2244 freemsg(mp); 2245 /* 2246 * strseteof takes care of read side wakeups, 2247 * pollwakeups, and signals. 2248 */ 2249 *wakeups = WSLEEP; 2250 *allmsgsigs = S_OUTPUT; 2251 *pollwakeups = POLLOUT; 2252 return (NULL); 2253 } 2254 2255 /* 2256 * Save for getpeername. 2257 */ 2258 if (so->so_family != AF_UNIX) { 2259 sti->sti_faddr_len = (socklen_t)addrlen; 2260 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2261 bcopy(addr, sti->sti_faddr_sa, addrlen); 2262 sti->sti_faddr_valid = 1; 2263 } 2264 2265 if (so->so_peercred != NULL) 2266 crfree(so->so_peercred); 2267 so->so_peercred = DB_CRED(mp); 2268 so->so_cpid = DB_CPID(mp); 2269 if (so->so_peercred != NULL) 2270 crhold(so->so_peercred); 2271 2272 /* Wakeup anybody sleeping in sowaitconnected */ 2273 soisconnected(so); 2274 mutex_exit(&so->so_lock); 2275 2276 /* 2277 * The socket is now available for sending data. 2278 */ 2279 *wakeups = WSLEEP; 2280 *allmsgsigs = S_OUTPUT; 2281 *pollwakeups = POLLOUT; 2282 freemsg(mp); 2283 return (NULL); 2284 } 2285 2286 /* 2287 * Extra processing in case of an SSL proxy, before queuing or 2288 * forwarding to the fallback endpoint 2289 */ 2290 case T_SSL_PROXY_CONN_IND: 2291 case T_CONN_IND: 2292 /* 2293 * Verify the min size and queue the message on 2294 * the sti_conn_ind_head/tail list. 2295 */ 2296 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2297 zcmn_err(getzoneid(), CE_WARN, 2298 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2299 (ptrdiff_t)(MBLKL(mp))); 2300 freemsg(mp); 2301 return (NULL); 2302 } 2303 2304 if (audit_active) 2305 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2306 if (!(so->so_state & SS_ACCEPTCONN)) { 2307 zcmn_err(getzoneid(), CE_WARN, 2308 "sockfs: T_conn_ind on non-listening socket\n"); 2309 freemsg(mp); 2310 return (NULL); 2311 } 2312 2313 if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) { 2314 /* No context: need to fall back */ 2315 struct sonode *fbso; 2316 stdata_t *fbstp; 2317 2318 tpr->type = T_CONN_IND; 2319 2320 fbso = kssl_find_fallback(sti->sti_kssl_ent); 2321 2322 /* 2323 * No fallback: the remote will timeout and 2324 * disconnect. 2325 */ 2326 if (fbso == NULL) { 2327 freemsg(mp); 2328 return (NULL); 2329 } 2330 fbstp = SOTOV(fbso)->v_stream; 2331 qreply(fbstp->sd_wrq->q_next, mp); 2332 return (NULL); 2333 } 2334 soqueueconnind(so, mp); 2335 *allmsgsigs = S_INPUT | S_RDNORM; 2336 *pollwakeups = POLLIN | POLLRDNORM; 2337 *wakeups = RSLEEP; 2338 return (NULL); 2339 2340 case T_ORDREL_IND: 2341 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2342 zcmn_err(getzoneid(), CE_WARN, 2343 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2344 (ptrdiff_t)(MBLKL(mp))); 2345 freemsg(mp); 2346 return (NULL); 2347 } 2348 2349 /* 2350 * Some providers send this when not fully connected. 2351 * SunLink X.25 needs to retrieve disconnect reason after 2352 * disconnect for compatibility. It uses T_ORDREL_IND 2353 * instead of T_DISCON_IND so that it may use the 2354 * endpoint after a connect failure to retrieve the 2355 * reason using an ioctl. Thus we explicitly clear 2356 * SS_ISCONNECTING here for SunLink X.25. 2357 * This is a needed TPI violation. 2358 */ 2359 mutex_enter(&so->so_lock); 2360 so->so_state &= ~SS_ISCONNECTING; 2361 socantrcvmore(so); 2362 mutex_exit(&so->so_lock); 2363 strseteof(SOTOV(so), 1); 2364 /* 2365 * strseteof takes care of read side wakeups, 2366 * pollwakeups, and signals. 2367 */ 2368 freemsg(mp); 2369 return (NULL); 2370 2371 case T_DISCON_IND: 2372 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2373 zcmn_err(getzoneid(), CE_WARN, 2374 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2375 (ptrdiff_t)(MBLKL(mp))); 2376 freemsg(mp); 2377 return (NULL); 2378 } 2379 if (so->so_state & SS_ACCEPTCONN) { 2380 /* 2381 * This is a listener. Look for a queued T_CONN_IND 2382 * with a matching sequence number and remove it 2383 * from the list. 2384 * It is normal to not find the sequence number since 2385 * the soaccept might have already dequeued it 2386 * (in which case the T_CONN_RES will fail with 2387 * TBADSEQ). 2388 */ 2389 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2390 freemsg(mp); 2391 return (0); 2392 } 2393 2394 /* 2395 * Not a listener 2396 * 2397 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2398 * Such a discon_ind appears when the peer has first done 2399 * a shutdown() followed by a close() in which case we just 2400 * want to record socantsendmore. 2401 * In this case sockfs first receives a T_ORDREL_IND followed 2402 * by a T_DISCON_IND. 2403 * Note that for other transports (e.g. TCP) we need to handle 2404 * the discon_ind in this case since it signals an error. 2405 */ 2406 mutex_enter(&so->so_lock); 2407 if ((so->so_state & SS_CANTRCVMORE) && 2408 (so->so_family == AF_UNIX)) { 2409 socantsendmore(so); 2410 sti->sti_faddr_valid = 0; 2411 mutex_exit(&so->so_lock); 2412 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2413 dprintso(so, 1, 2414 ("T_DISCON_IND: error %d\n", so->so_error)); 2415 freemsg(mp); 2416 /* 2417 * Set these variables for caller to process them. 2418 * For the else part where T_DISCON_IND is processed, 2419 * this will be done in the function being called 2420 * (strsock_discon_ind()) 2421 */ 2422 *wakeups = WSLEEP; 2423 *allmsgsigs = S_OUTPUT; 2424 *pollwakeups = POLLOUT; 2425 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2426 /* 2427 * Deferred processing of T_DISCON_IND 2428 */ 2429 so_save_discon_ind(so, mp, strsock_discon_ind); 2430 mutex_exit(&so->so_lock); 2431 } else { 2432 /* 2433 * Process T_DISCON_IND now 2434 */ 2435 (void) strsock_discon_ind(so, mp); 2436 mutex_exit(&so->so_lock); 2437 } 2438 return (NULL); 2439 2440 case T_UDERROR_IND: { 2441 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2442 void *addr; 2443 t_uscalar_t addrlen; 2444 int error; 2445 2446 dprintso(so, 0, 2447 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2448 2449 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2450 zcmn_err(getzoneid(), CE_WARN, 2451 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2452 (ptrdiff_t)(MBLKL(mp))); 2453 freemsg(mp); 2454 return (NULL); 2455 } 2456 /* Ignore on connection-oriented transports */ 2457 if (so->so_mode & SM_CONNREQUIRED) { 2458 freemsg(mp); 2459 eprintsoline(so, 0); 2460 zcmn_err(getzoneid(), CE_WARN, 2461 "sockfs: T_uderror_ind on connection-oriented " 2462 "transport\n"); 2463 return (NULL); 2464 } 2465 addrlen = tudi->DEST_length; 2466 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2467 if (addr == NULL) { 2468 zcmn_err(getzoneid(), CE_WARN, 2469 "sockfs: T_uderror_ind with invalid " 2470 "addrlen/offset %u/%d\n", 2471 addrlen, tudi->DEST_offset); 2472 freemsg(mp); 2473 return (NULL); 2474 } 2475 2476 /* Verify source address for connected socket. */ 2477 mutex_enter(&so->so_lock); 2478 if (so->so_state & SS_ISCONNECTED) { 2479 void *faddr; 2480 t_uscalar_t faddr_len; 2481 boolean_t match = B_FALSE; 2482 2483 switch (so->so_family) { 2484 case AF_INET: { 2485 /* Compare just IP address and port */ 2486 struct sockaddr_in *sin1, *sin2; 2487 2488 sin1 = (struct sockaddr_in *)sti->sti_faddr_sa; 2489 sin2 = (struct sockaddr_in *)addr; 2490 if (addrlen == sizeof (struct sockaddr_in) && 2491 sin1->sin_port == sin2->sin_port && 2492 sin1->sin_addr.s_addr == 2493 sin2->sin_addr.s_addr) 2494 match = B_TRUE; 2495 break; 2496 } 2497 case AF_INET6: { 2498 /* Compare just IP address and port. Not flow */ 2499 struct sockaddr_in6 *sin1, *sin2; 2500 2501 sin1 = (struct sockaddr_in6 *)sti->sti_faddr_sa; 2502 sin2 = (struct sockaddr_in6 *)addr; 2503 if (addrlen == sizeof (struct sockaddr_in6) && 2504 sin1->sin6_port == sin2->sin6_port && 2505 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2506 &sin2->sin6_addr)) 2507 match = B_TRUE; 2508 break; 2509 } 2510 case AF_UNIX: 2511 faddr = &sti->sti_ux_faddr; 2512 faddr_len = 2513 (t_uscalar_t)sizeof (sti->sti_ux_faddr); 2514 if (faddr_len == addrlen && 2515 bcmp(addr, faddr, addrlen) == 0) 2516 match = B_TRUE; 2517 break; 2518 default: 2519 faddr = sti->sti_faddr_sa; 2520 faddr_len = (t_uscalar_t)sti->sti_faddr_len; 2521 if (faddr_len == addrlen && 2522 bcmp(addr, faddr, addrlen) == 0) 2523 match = B_TRUE; 2524 break; 2525 } 2526 2527 if (!match) { 2528 #ifdef DEBUG 2529 dprintso(so, 0, 2530 ("sockfs: T_UDERR_IND mismatch: %s - ", 2531 pr_addr(so->so_family, 2532 (struct sockaddr *)addr, addrlen))); 2533 dprintso(so, 0, ("%s\n", 2534 pr_addr(so->so_family, sti->sti_faddr_sa, 2535 sti->sti_faddr_len))); 2536 #endif /* DEBUG */ 2537 mutex_exit(&so->so_lock); 2538 freemsg(mp); 2539 return (NULL); 2540 } 2541 /* 2542 * Make the write error nonpersistent. If the error 2543 * is zero we use ECONNRESET. 2544 * This assumes that the name space for ERROR_type 2545 * is the errno name space. 2546 */ 2547 if (tudi->ERROR_type != 0) 2548 error = tudi->ERROR_type; 2549 else 2550 error = ECONNRESET; 2551 2552 soseterror(so, error); 2553 mutex_exit(&so->so_lock); 2554 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2555 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2556 *wakeups = RSLEEP | WSLEEP; 2557 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2558 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2559 freemsg(mp); 2560 return (NULL); 2561 } 2562 /* 2563 * If the application asked for delayed errors 2564 * record the T_UDERROR_IND sti_eaddr_mp and the reason in 2565 * sti_delayed_error for delayed error posting. If the reason 2566 * is zero use ECONNRESET. 2567 * Note that delayed error indications do not make sense for 2568 * AF_UNIX sockets since sendto checks that the destination 2569 * address is valid at the time of the sendto. 2570 */ 2571 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2572 mutex_exit(&so->so_lock); 2573 freemsg(mp); 2574 return (NULL); 2575 } 2576 if (sti->sti_eaddr_mp != NULL) 2577 freemsg(sti->sti_eaddr_mp); 2578 2579 sti->sti_eaddr_mp = mp; 2580 if (tudi->ERROR_type != 0) 2581 error = tudi->ERROR_type; 2582 else 2583 error = ECONNRESET; 2584 sti->sti_delayed_error = (ushort_t)error; 2585 mutex_exit(&so->so_lock); 2586 return (NULL); 2587 } 2588 2589 case T_ERROR_ACK: 2590 dprintso(so, 0, 2591 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2592 tpr->error_ack.ERROR_prim, 2593 tpr->error_ack.TLI_error, 2594 tpr->error_ack.UNIX_error)); 2595 2596 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2597 zcmn_err(getzoneid(), CE_WARN, 2598 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2599 (ptrdiff_t)(MBLKL(mp))); 2600 freemsg(mp); 2601 return (NULL); 2602 } 2603 /* 2604 * Check if we were waiting for the async message 2605 */ 2606 mutex_enter(&so->so_lock); 2607 if ((so->so_flag & SOASYNC_UNBIND) && 2608 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2609 so_unlock_single(so, SOASYNC_UNBIND); 2610 mutex_exit(&so->so_lock); 2611 freemsg(mp); 2612 return (NULL); 2613 } 2614 mutex_exit(&so->so_lock); 2615 soqueueack(so, mp); 2616 return (NULL); 2617 2618 case T_OK_ACK: 2619 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2620 zcmn_err(getzoneid(), CE_WARN, 2621 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2622 (ptrdiff_t)(MBLKL(mp))); 2623 freemsg(mp); 2624 return (NULL); 2625 } 2626 /* 2627 * Check if we were waiting for the async message 2628 */ 2629 mutex_enter(&so->so_lock); 2630 if ((so->so_flag & SOASYNC_UNBIND) && 2631 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2632 dprintso(so, 1, 2633 ("strsock_proto: T_OK_ACK async unbind\n")); 2634 so_unlock_single(so, SOASYNC_UNBIND); 2635 mutex_exit(&so->so_lock); 2636 freemsg(mp); 2637 return (NULL); 2638 } 2639 mutex_exit(&so->so_lock); 2640 soqueueack(so, mp); 2641 return (NULL); 2642 2643 case T_INFO_ACK: 2644 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2645 zcmn_err(getzoneid(), CE_WARN, 2646 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2647 (ptrdiff_t)(MBLKL(mp))); 2648 freemsg(mp); 2649 return (NULL); 2650 } 2651 soqueueack(so, mp); 2652 return (NULL); 2653 2654 case T_CAPABILITY_ACK: 2655 /* 2656 * A T_capability_ack need only be large enough to hold 2657 * the PRIM_type and CAP_bits1 fields; checking for anything 2658 * larger might reject a correct response from an older 2659 * provider. 2660 */ 2661 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2662 zcmn_err(getzoneid(), CE_WARN, 2663 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2664 (ptrdiff_t)(MBLKL(mp))); 2665 freemsg(mp); 2666 return (NULL); 2667 } 2668 soqueueack(so, mp); 2669 return (NULL); 2670 2671 case T_BIND_ACK: 2672 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2673 zcmn_err(getzoneid(), CE_WARN, 2674 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2675 (ptrdiff_t)(MBLKL(mp))); 2676 freemsg(mp); 2677 return (NULL); 2678 } 2679 soqueueack(so, mp); 2680 return (NULL); 2681 2682 case T_OPTMGMT_ACK: 2683 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2684 zcmn_err(getzoneid(), CE_WARN, 2685 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2686 (ptrdiff_t)(MBLKL(mp))); 2687 freemsg(mp); 2688 return (NULL); 2689 } 2690 soqueueack(so, mp); 2691 return (NULL); 2692 default: 2693 #ifdef DEBUG 2694 zcmn_err(getzoneid(), CE_WARN, 2695 "sockfs: unknown TPI primitive %d received\n", 2696 tpr->type); 2697 #endif /* DEBUG */ 2698 freemsg(mp); 2699 return (NULL); 2700 } 2701 } 2702 2703 /* 2704 * This routine is registered with the stream head to receive other 2705 * (non-data, and non-proto) messages. 2706 * 2707 * Returns NULL if the message was consumed. 2708 * Returns an mblk to make that mblk be processed by the stream head. 2709 * 2710 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2711 * *pollwakeups) for the stream head to take action on. 2712 */ 2713 static mblk_t * 2714 strsock_misc(vnode_t *vp, mblk_t *mp, 2715 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2716 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2717 { 2718 struct sonode *so; 2719 sotpi_info_t *sti; 2720 2721 so = VTOSO(vp); 2722 sti = SOTOTPI(so); 2723 2724 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2725 (void *)vp, (void *)mp, DB_TYPE(mp))); 2726 2727 /* Set default return values */ 2728 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2729 2730 switch (DB_TYPE(mp)) { 2731 case M_PCSIG: 2732 /* 2733 * This assumes that an M_PCSIG for the urgent data arrives 2734 * before the corresponding T_EXDATA_IND. 2735 * 2736 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2737 * awoken before the urgent data shows up. 2738 * For OOBINLINE this can result in select returning 2739 * only exceptions as opposed to except|read. 2740 */ 2741 if (*mp->b_rptr == SIGURG) { 2742 mutex_enter(&so->so_lock); 2743 dprintso(so, 1, 2744 ("SIGURG(%p): counts %d/%d state %s\n", 2745 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2746 pr_state(so->so_state, so->so_mode))); 2747 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2748 dprintso(so, 1, 2749 ("after SIGURG(%p): counts %d/%d " 2750 " poll 0x%x sig 0x%x state %s\n", 2751 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2752 *pollwakeups, *allmsgsigs, 2753 pr_state(so->so_state, so->so_mode))); 2754 mutex_exit(&so->so_lock); 2755 } 2756 freemsg(mp); 2757 return (NULL); 2758 2759 case M_SIG: 2760 case M_HANGUP: 2761 case M_UNHANGUP: 2762 case M_ERROR: 2763 /* M_ERRORs etc are ignored */ 2764 freemsg(mp); 2765 return (NULL); 2766 2767 case M_FLUSH: 2768 /* 2769 * Do not flush read queue. If the M_FLUSH 2770 * arrives because of an impending T_discon_ind 2771 * we still have to keep any queued data - this is part of 2772 * socket semantics. 2773 */ 2774 if (*mp->b_rptr & FLUSHW) { 2775 *mp->b_rptr &= ~FLUSHR; 2776 return (mp); 2777 } 2778 freemsg(mp); 2779 return (NULL); 2780 2781 default: 2782 return (mp); 2783 } 2784 } 2785 2786 2787 /* Register to receive signals for certain events */ 2788 int 2789 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2790 { 2791 struct strsigset ss; 2792 int32_t rval; 2793 2794 /* 2795 * Note that SOLOCKED will be set except for the call from soaccept(). 2796 */ 2797 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2798 ss.ss_pid = pgrp; 2799 ss.ss_events = events; 2800 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2801 &rval)); 2802 } 2803 2804 2805 /* Register for events matching the SS_ASYNC flag */ 2806 int 2807 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2808 { 2809 int events = so->so_state & SS_ASYNC ? 2810 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2811 S_RDBAND | S_BANDURG; 2812 2813 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2814 } 2815 2816 2817 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2818 int 2819 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2820 { 2821 ASSERT(mutex_owned(&so->so_lock)); 2822 if (so->so_pgrp != 0) { 2823 int error; 2824 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2825 S_RDBAND | S_BANDURG : /* New sigs */ 2826 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2827 2828 so_lock_single(so); 2829 mutex_exit(&so->so_lock); 2830 2831 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2832 2833 mutex_enter(&so->so_lock); 2834 so_unlock_single(so, SOLOCKED); 2835 if (error) 2836 return (error); 2837 } 2838 so->so_state ^= SS_ASYNC; 2839 return (0); 2840 } 2841 2842 /* 2843 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2844 * any existing one. If passed zero, just clear the existing one. 2845 */ 2846 int 2847 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2848 { 2849 int events = so->so_state & SS_ASYNC ? 2850 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2851 S_RDBAND | S_BANDURG; 2852 int error; 2853 2854 ASSERT(mutex_owned(&so->so_lock)); 2855 2856 /* 2857 * Change socket process (group). 2858 * 2859 * strioctl (via so_set_asyncsigs) will perform permission check and 2860 * also keep a PID_HOLD to prevent the pid from being reused. 2861 */ 2862 so_lock_single(so); 2863 mutex_exit(&so->so_lock); 2864 2865 if (pgrp != 0) { 2866 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2867 pgrp, events)); 2868 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2869 if (error != 0) { 2870 eprintsoline(so, error); 2871 goto bad; 2872 } 2873 } 2874 /* Remove the previously registered process/group */ 2875 if (so->so_pgrp != 0) { 2876 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2877 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2878 if (error != 0) { 2879 eprintsoline(so, error); 2880 error = 0; 2881 } 2882 } 2883 mutex_enter(&so->so_lock); 2884 so_unlock_single(so, SOLOCKED); 2885 so->so_pgrp = pgrp; 2886 return (0); 2887 bad: 2888 mutex_enter(&so->so_lock); 2889 so_unlock_single(so, SOLOCKED); 2890 return (error); 2891 } 2892 2893 /* 2894 * Wrapper for getmsg. If the socket has been converted to a stream 2895 * pass the request to the stream head. 2896 */ 2897 int 2898 sock_getmsg( 2899 struct vnode *vp, 2900 struct strbuf *mctl, 2901 struct strbuf *mdata, 2902 uchar_t *prip, 2903 int *flagsp, 2904 int fmode, 2905 rval_t *rvp 2906 ) 2907 { 2908 struct sonode *so; 2909 2910 ASSERT(vp->v_type == VSOCK); 2911 /* 2912 * Use the stream head to find the real socket vnode. 2913 * This is needed when namefs sits above sockfs. Some 2914 * sockets (like SCTP) are not streams. 2915 */ 2916 if (!vp->v_stream) { 2917 return (ENOSTR); 2918 } 2919 ASSERT(vp->v_stream->sd_vnode); 2920 vp = vp->v_stream->sd_vnode; 2921 ASSERT(vn_matchops(vp, socket_vnodeops)); 2922 so = VTOSO(vp); 2923 2924 dprintso(so, 1, ("sock_getmsg(%p) %s\n", 2925 (void *)so, pr_state(so->so_state, so->so_mode))); 2926 2927 if (so->so_version == SOV_STREAM) { 2928 /* The imaginary "sockmod" has been popped - act as a stream */ 2929 return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp)); 2930 } 2931 eprintsoline(so, ENOSTR); 2932 return (ENOSTR); 2933 } 2934 2935 /* 2936 * Wrapper for putmsg. If the socket has been converted to a stream 2937 * pass the request to the stream head. 2938 * 2939 * Note that a while a regular socket (SOV_SOCKSTREAM) does support the 2940 * streams ioctl set it does not support putmsg and getmsg. 2941 * Allowing putmsg would prevent sockfs from tracking the state of 2942 * the socket/transport and would also invalidate the locking in sockfs. 2943 */ 2944 int 2945 sock_putmsg( 2946 struct vnode *vp, 2947 struct strbuf *mctl, 2948 struct strbuf *mdata, 2949 uchar_t pri, 2950 int flag, 2951 int fmode 2952 ) 2953 { 2954 struct sonode *so; 2955 2956 ASSERT(vp->v_type == VSOCK); 2957 /* 2958 * Use the stream head to find the real socket vnode. 2959 * This is needed when namefs sits above sockfs. 2960 */ 2961 if (!vp->v_stream) { 2962 return (ENOSTR); 2963 } 2964 ASSERT(vp->v_stream->sd_vnode); 2965 vp = vp->v_stream->sd_vnode; 2966 ASSERT(vn_matchops(vp, socket_vnodeops)); 2967 so = VTOSO(vp); 2968 2969 dprintso(so, 1, ("sock_putmsg(%p) %s\n", 2970 (void *)so, pr_state(so->so_state, so->so_mode))); 2971 2972 if (so->so_version == SOV_STREAM) { 2973 /* The imaginary "sockmod" has been popped - act as a stream */ 2974 return (strputmsg(vp, mctl, mdata, pri, flag, fmode)); 2975 } 2976 eprintsoline(so, ENOSTR); 2977 return (ENOSTR); 2978 } 2979 2980 /* 2981 * Special function called only from f_getfl(). 2982 * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0. 2983 * No locks are acquired here, so it is safe to use while uf_lock is held. 2984 * This exists solely for BSD fcntl() FASYNC compatibility. 2985 */ 2986 int 2987 sock_getfasync(vnode_t *vp) 2988 { 2989 struct sonode *so; 2990 2991 ASSERT(vp->v_type == VSOCK); 2992 /* 2993 * For stream model, v_stream is used; For non-stream, v_stream always 2994 * equals NULL 2995 */ 2996 if (vp->v_stream != NULL) 2997 so = VTOSO(vp->v_stream->sd_vnode); 2998 else 2999 so = VTOSO(vp); 3000 3001 if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC)) 3002 return (0); 3003 3004 return (FASYNC); 3005 } 3006 3007 /* 3008 * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable 3009 * transport driver/module with an mblk_t chain. 3010 * 3011 * Note, we in-line putq() for the fast-path cases of q is empty, q_last and 3012 * bp are of type M_DATA. All other cases we call putq(). 3013 * 3014 * On success a zero will be return, else an errno will be returned. 3015 */ 3016 int 3017 sodput(sodirect_t *sodp, mblk_t *bp) 3018 { 3019 queue_t *q = sodp->sod_q; 3020 struct stdata *stp = (struct stdata *)q->q_ptr; 3021 mblk_t *nbp; 3022 mblk_t *last = q->q_last; 3023 int bytecnt = 0; 3024 int mblkcnt = 0; 3025 3026 3027 ASSERT(MUTEX_HELD(sodp->sod_lockp)); 3028 3029 if (stp->sd_flag == STREOF) { 3030 do { 3031 if ((nbp = bp->b_next) != NULL) 3032 bp->b_next = NULL; 3033 freemsg(bp); 3034 } while ((bp = nbp) != NULL); 3035 3036 return (0); 3037 } 3038 3039 mutex_enter(QLOCK(q)); 3040 if (q->q_first == NULL) { 3041 /* Q empty, really fast fast-path */ 3042 bp->b_prev = NULL; 3043 bp->b_next = NULL; 3044 q->q_first = bp; 3045 q->q_last = bp; 3046 3047 } else if (last->b_datap->db_type == M_DATA && 3048 bp->b_datap->db_type == M_DATA) { 3049 /* 3050 * Last mblk_t chain and bp are both type M_DATA so 3051 * in-line putq() here, if the DBLK_UIOA state match 3052 * add bp to the end of the current last chain, else 3053 * start a new last chain with bp. 3054 */ 3055 if ((last->b_datap->db_flags & DBLK_UIOA) == 3056 (bp->b_datap->db_flags & DBLK_UIOA)) { 3057 /* Added to end */ 3058 while ((nbp = last->b_cont) != NULL) 3059 last = nbp; 3060 last->b_cont = bp; 3061 } else { 3062 /* New last */ 3063 ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 || 3064 msgdsize(bp) == sodp->sod_uioa.uioa_mbytes); 3065 last->b_next = bp; 3066 bp->b_next = NULL; 3067 bp->b_prev = last; 3068 q->q_last = bp; 3069 } 3070 } else { 3071 /* 3072 * Can't use q_last so just call putq(). 3073 */ 3074 mutex_exit(QLOCK(q)); 3075 3076 ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 || 3077 msgdsize(bp) == sodp->sod_uioa.uioa_mbytes); 3078 (void) putq(q, bp); 3079 return (0); 3080 } 3081 3082 /* Count bytes and mblk_t's */ 3083 do { 3084 bytecnt += MBLKL(bp); 3085 mblkcnt++; 3086 } while ((bp = bp->b_cont) != NULL); 3087 q->q_count += bytecnt; 3088 q->q_mblkcnt += mblkcnt; 3089 3090 /* Check for QFULL */ 3091 if (q->q_count >= q->q_hiwat + sodp->sod_want || 3092 q->q_mblkcnt >= q->q_hiwat) { 3093 q->q_flag |= QFULL; 3094 } 3095 3096 mutex_exit(QLOCK(q)); 3097 return (0); 3098 } 3099 3100 /* 3101 * Sockfs sodirect read wakeup. Called from a sodirect enabled transport 3102 * driver/module to indicate that read-side data is available. 3103 * 3104 * On return the sodirect_t.lock mutex will be exited so this must be the 3105 * last sodirect_t call to guarantee atomic access of *sodp. 3106 */ 3107 void 3108 sodwakeup(sodirect_t *sodp) 3109 { 3110 queue_t *q = sodp->sod_q; 3111 struct stdata *stp = (struct stdata *)q->q_ptr; 3112 3113 ASSERT(MUTEX_HELD(sodp->sod_lockp)); 3114 3115 if (stp->sd_flag & RSLEEP) { 3116 stp->sd_flag &= ~RSLEEP; 3117 cv_broadcast(&q->q_wait); 3118 } 3119 3120 if (stp->sd_rput_opt & SR_POLLIN) { 3121 stp->sd_rput_opt &= ~SR_POLLIN; 3122 mutex_exit(sodp->sod_lockp); 3123 pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM); 3124 } else 3125 mutex_exit(sodp->sod_lockp); 3126 } 3127