1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/inttypes.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/buf.h> 33 #include <sys/conf.h> 34 #include <sys/cred.h> 35 #include <sys/kmem.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/user.h> 44 #include <sys/stream.h> 45 #include <sys/strsubr.h> 46 #include <sys/esunddi.h> 47 #include <sys/flock.h> 48 #include <sys/modctl.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/cmn_err.h> 52 #include <sys/proc.h> 53 #include <sys/ddi.h> 54 55 #include <sys/suntpi.h> 56 #include <sys/socket.h> 57 #include <sys/sockio.h> 58 #include <sys/socketvar.h> 59 #include <sys/sodirect.h> 60 #include <netinet/in.h> 61 #include <inet/common.h> 62 #include <inet/proto_set.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 68 #include <inet/kssl/ksslapi.h> 69 70 #include <c2/audit.h> 71 72 #include <fs/sockfs/socktpi.h> 73 #include <fs/sockfs/socktpi_impl.h> 74 #include <sys/dcopy.h> 75 76 int so_default_version = SOV_SOCKSTREAM; 77 78 #ifdef DEBUG 79 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 80 int sockdebug = 0; 81 82 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 83 int sockprinterr = 0; 84 85 /* 86 * Set so_default_options to SO_DEBUG is all sockets should be created 87 * with SO_DEBUG set. This is needed to get debug printouts from the 88 * socket() call itself. 89 */ 90 int so_default_options = 0; 91 #endif /* DEBUG */ 92 93 #ifdef SOCK_TEST 94 /* 95 * Set to number of ticks to limit cv_waits for code coverage testing. 96 * Set to 1000 when SO_DEBUG is set to 2. 97 */ 98 clock_t sock_test_timelimit = 0; 99 #endif /* SOCK_TEST */ 100 101 /* 102 * For concurrency testing of e.g. opening /dev/ip which does not 103 * handle T_INFO_REQ messages. 104 */ 105 int so_no_tinfo = 0; 106 107 /* 108 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 109 * to simply ignore the T_CAPABILITY_REQ. 110 */ 111 clock_t sock_capability_timeout = 2; /* seconds */ 112 113 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 114 static void so_removehooks(struct sonode *so); 115 116 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 117 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 118 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 119 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 120 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 121 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 122 /* 123 * STREAMS based sodirect put/wakeup functions. 124 */ 125 static int sodput(sodirect_t *, mblk_t *); 126 static void sodwakeup(sodirect_t *); 127 128 /* 129 * Called by sockinit() when sockfs is loaded. 130 */ 131 int 132 sostr_init() 133 { 134 sod_init(); 135 return (0); 136 } 137 138 /* 139 * Convert a socket to a stream. Invoked when the illusory sockmod 140 * is popped from the stream. 141 * Change the stream head back to default operation without losing 142 * any messages (T_conn_ind's are moved to the stream head queue). 143 */ 144 int 145 so_sock2stream(struct sonode *so) 146 { 147 struct vnode *vp = SOTOV(so); 148 queue_t *rq; 149 mblk_t *mp; 150 int error = 0; 151 sotpi_info_t *sti = SOTOTPI(so); 152 153 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 154 155 mutex_enter(&so->so_lock); 156 so_lock_single(so); 157 158 ASSERT(so->so_version != SOV_STREAM); 159 160 if (sti->sti_direct) { 161 mblk_t **mpp; 162 int rval; 163 164 /* 165 * Tell the transport below that sockmod is being popped 166 */ 167 mutex_exit(&so->so_lock); 168 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(), 169 &rval); 170 mutex_enter(&so->so_lock); 171 if (error != 0) { 172 dprintso(so, 0, ("so_sock2stream(%p): " 173 "_SIOCSOCKFALLBACK failed\n", (void *)so)); 174 goto exit; 175 } 176 sti->sti_direct = 0; 177 178 for (mpp = &sti->sti_conn_ind_head; (mp = *mpp) != NULL; 179 mpp = &mp->b_next) { 180 struct T_conn_ind *conn_ind; 181 182 /* 183 * strsock_proto() has already verified the length of 184 * this message block. 185 */ 186 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 187 188 conn_ind = (struct T_conn_ind *)mp->b_rptr; 189 if (conn_ind->OPT_length == 0 && 190 conn_ind->OPT_offset == 0) 191 continue; 192 193 if (DB_REF(mp) > 1) { 194 mblk_t *newmp; 195 size_t length; 196 cred_t *cr; 197 pid_t cpid; 198 int error; /* Dummy - error not returned */ 199 200 /* 201 * Copy the message block because it is used 202 * elsewhere, too. 203 * Can't use copyb since we want to wait 204 * yet allow for EINTR. 205 */ 206 /* Round up size for reuse */ 207 length = MAX(MBLKL(mp), 64); 208 cr = msg_getcred(mp, &cpid); 209 if (cr != NULL) { 210 newmp = allocb_cred_wait(length, 0, 211 &error, cr, cpid); 212 } else { 213 newmp = allocb_wait(length, 0, 0, 214 &error); 215 } 216 if (newmp == NULL) { 217 error = EINTR; 218 goto exit; 219 } 220 bcopy(mp->b_rptr, newmp->b_wptr, length); 221 newmp->b_wptr += length; 222 newmp->b_next = mp->b_next; 223 224 /* 225 * Link the new message block into the queue 226 * and free the old one. 227 */ 228 *mpp = newmp; 229 mp->b_next = NULL; 230 freemsg(mp); 231 232 mp = newmp; 233 conn_ind = (struct T_conn_ind *)mp->b_rptr; 234 } 235 236 /* 237 * Remove options added by TCP for accept fast-path. 238 */ 239 conn_ind->OPT_length = 0; 240 conn_ind->OPT_offset = 0; 241 } 242 } 243 244 so->so_version = SOV_STREAM; 245 so->so_proto_handle = NULL; 246 247 /* 248 * Remove the hooks in the stream head to avoid queuing more 249 * packets in sockfs. 250 */ 251 mutex_exit(&so->so_lock); 252 so_removehooks(so); 253 mutex_enter(&so->so_lock); 254 255 /* 256 * Clear any state related to urgent data. Leave any T_EXDATA_IND 257 * on the queue - the behavior of urgent data after a switch is 258 * left undefined. 259 */ 260 so->so_error = sti->sti_delayed_error = 0; 261 freemsg(so->so_oobmsg); 262 so->so_oobmsg = NULL; 263 sti->sti_oobsigcnt = sti->sti_oobcnt = 0; 264 265 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 266 SS_SAVEDEOR); 267 ASSERT(so_verify_oobstate(so)); 268 269 freemsg(sti->sti_ack_mp); 270 sti->sti_ack_mp = NULL; 271 272 /* 273 * Flush the T_DISCON_IND on sti_discon_ind_mp. 274 */ 275 so_flush_discon_ind(so); 276 277 /* 278 * Move any queued T_CONN_IND messages to stream head queue. 279 */ 280 rq = RD(strvp2wq(vp)); 281 while ((mp = sti->sti_conn_ind_head) != NULL) { 282 sti->sti_conn_ind_head = mp->b_next; 283 mp->b_next = NULL; 284 if (sti->sti_conn_ind_head == NULL) { 285 ASSERT(sti->sti_conn_ind_tail == mp); 286 sti->sti_conn_ind_tail = NULL; 287 } 288 dprintso(so, 0, 289 ("so_sock2stream(%p): moving T_CONN_IND\n", (void *)so)); 290 291 /* Drop lock across put() */ 292 mutex_exit(&so->so_lock); 293 put(rq, mp); 294 mutex_enter(&so->so_lock); 295 } 296 297 exit: 298 ASSERT(MUTEX_HELD(&so->so_lock)); 299 so_unlock_single(so, SOLOCKED); 300 mutex_exit(&so->so_lock); 301 return (error); 302 } 303 304 /* 305 * Covert a stream back to a socket. This is invoked when the illusory 306 * sockmod is pushed on a stream (where the stream was "created" by 307 * popping the illusory sockmod). 308 * This routine can not recreate the socket state (certain aspects of 309 * it like urgent data state and the bound/connected addresses for AF_UNIX 310 * sockets can not be recreated by asking the transport for information). 311 * Thus this routine implicitly assumes that the socket is in an initial 312 * state (as if it was just created). It flushes any messages queued on the 313 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 314 */ 315 void 316 so_stream2sock(struct sonode *so) 317 { 318 struct vnode *vp = SOTOV(so); 319 sotpi_info_t *sti = SOTOTPI(so); 320 321 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 322 323 mutex_enter(&so->so_lock); 324 so_lock_single(so); 325 ASSERT(so->so_version == SOV_STREAM); 326 so->so_version = SOV_SOCKSTREAM; 327 sti->sti_pushcnt = 0; 328 mutex_exit(&so->so_lock); 329 330 /* 331 * Set a permenent error to force any thread in sorecvmsg to 332 * return (and drop SOREADLOCKED). Clear the error once 333 * we have SOREADLOCKED. 334 * This makes a read sleeping during the I_PUSH of sockmod return 335 * EIO. 336 */ 337 strsetrerror(SOTOV(so), EIO, 1, NULL); 338 339 /* 340 * Get the read lock before flushing data to avoid 341 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 342 */ 343 mutex_enter(&so->so_lock); 344 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 345 mutex_exit(&so->so_lock); 346 347 strsetrerror(SOTOV(so), 0, 0, NULL); 348 so_installhooks(so); 349 350 /* 351 * Flush everything on the read queue. 352 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 353 * remain; those types of messages would confuse sockfs. 354 */ 355 strflushrq(vp, FLUSHALL); 356 mutex_enter(&so->so_lock); 357 358 /* 359 * Flush the T_DISCON_IND on sti_discon_ind_mp. 360 */ 361 so_flush_discon_ind(so); 362 so_unlock_read(so); /* Clear SOREADLOCKED */ 363 364 so_unlock_single(so, SOLOCKED); 365 mutex_exit(&so->so_lock); 366 } 367 368 /* 369 * Install the hooks in the stream head. 370 */ 371 void 372 so_installhooks(struct sonode *so) 373 { 374 struct vnode *vp = SOTOV(so); 375 376 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 377 strsock_proto, strsock_misc); 378 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 379 } 380 381 /* 382 * Remove the hooks in the stream head. 383 */ 384 static void 385 so_removehooks(struct sonode *so) 386 { 387 struct vnode *vp = SOTOV(so); 388 389 strsetrputhooks(vp, 0, NULL, NULL); 390 strsetwputhooks(vp, 0, STRTIMOUT); 391 /* 392 * Leave read behavior as it would have been for a normal 393 * stream i.e. a read of an M_PROTO will fail. 394 */ 395 } 396 397 void 398 so_basic_strinit(struct sonode *so) 399 { 400 struct vnode *vp = SOTOV(so); 401 struct stdata *stp; 402 mblk_t *mp; 403 sotpi_info_t *sti = SOTOTPI(so); 404 405 /* Preallocate an unbind_req message */ 406 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, CRED()); 407 mutex_enter(&so->so_lock); 408 sti->sti_unbind_mp = mp; 409 #ifdef DEBUG 410 so->so_options = so_default_options; 411 #endif /* DEBUG */ 412 mutex_exit(&so->so_lock); 413 414 so_installhooks(so); 415 416 stp = vp->v_stream; 417 /* 418 * Have to keep minpsz at zero in order to allow write/send of zero 419 * bytes. 420 */ 421 mutex_enter(&stp->sd_lock); 422 if (stp->sd_qn_minpsz == 1) 423 stp->sd_qn_minpsz = 0; 424 mutex_exit(&stp->sd_lock); 425 426 /* 427 * If sodirect capable allocate and initialize sodirect_t. 428 * Note, SS_SODIRECT is set in socktpi_open(). 429 */ 430 if ((so->so_state & SS_SODIRECT) && 431 !(so->so_state & SS_FALLBACK_PENDING)) { 432 sod_sock_init(so, stp, sodput, sodwakeup, &stp->sd_lock); 433 } 434 } 435 436 /* 437 * Initialize the streams side of a socket including 438 * T_info_req/ack processing. If tso is not NULL its values are used thereby 439 * avoiding the T_INFO_REQ. 440 */ 441 int 442 so_strinit(struct sonode *so, struct sonode *tso) 443 { 444 sotpi_info_t *sti = SOTOTPI(so); 445 sotpi_info_t *tsti; 446 int error; 447 448 so_basic_strinit(so); 449 450 /* 451 * The T_CAPABILITY_REQ should be the first message sent down because 452 * at least TCP has a fast-path for this which avoids timeouts while 453 * waiting for the T_CAPABILITY_ACK under high system load. 454 */ 455 if (tso == NULL) { 456 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 457 if (error) 458 return (error); 459 } else { 460 tsti = SOTOTPI(tso); 461 462 mutex_enter(&so->so_lock); 463 sti->sti_tsdu_size = tsti->sti_tsdu_size; 464 sti->sti_etsdu_size = tsti->sti_etsdu_size; 465 sti->sti_addr_size = tsti->sti_addr_size; 466 sti->sti_opt_size = tsti->sti_opt_size; 467 sti->sti_tidu_size = tsti->sti_tidu_size; 468 sti->sti_serv_type = tsti->sti_serv_type; 469 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 470 mutex_exit(&so->so_lock); 471 472 /* the following do_tcapability may update so->so_mode */ 473 if ((tsti->sti_serv_type != T_CLTS) && 474 (sti->sti_direct == 0)) { 475 error = do_tcapability(so, TC1_ACCEPTOR_ID); 476 if (error) 477 return (error); 478 } 479 } 480 /* 481 * If the addr_size is 0 we treat it as already bound 482 * and connected. This is used by the routing socket. 483 * We set the addr_size to something to allocate a the address 484 * structures. 485 */ 486 if (sti->sti_addr_size == 0) { 487 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 488 /* Address size can vary with address families. */ 489 if (so->so_family == AF_INET6) 490 sti->sti_addr_size = 491 (t_scalar_t)sizeof (struct sockaddr_in6); 492 else 493 sti->sti_addr_size = 494 (t_scalar_t)sizeof (struct sockaddr_in); 495 ASSERT(sti->sti_unbind_mp); 496 } 497 498 so_alloc_addr(so, sti->sti_addr_size); 499 500 return (0); 501 } 502 503 static void 504 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 505 { 506 sotpi_info_t *sti = SOTOTPI(so); 507 508 sti->sti_tsdu_size = tia->TSDU_size; 509 sti->sti_etsdu_size = tia->ETSDU_size; 510 sti->sti_addr_size = tia->ADDR_size; 511 sti->sti_opt_size = tia->OPT_size; 512 sti->sti_tidu_size = tia->TIDU_size; 513 sti->sti_serv_type = tia->SERV_type; 514 switch (tia->CURRENT_state) { 515 case TS_UNBND: 516 break; 517 case TS_IDLE: 518 so->so_state |= SS_ISBOUND; 519 sti->sti_laddr_len = 0; 520 sti->sti_laddr_valid = 0; 521 break; 522 case TS_DATA_XFER: 523 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 524 sti->sti_laddr_len = 0; 525 sti->sti_faddr_len = 0; 526 sti->sti_laddr_valid = 0; 527 sti->sti_faddr_valid = 0; 528 break; 529 } 530 531 /* 532 * Heuristics for determining the socket mode flags 533 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 534 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 535 * from the info ack. 536 */ 537 if (sti->sti_serv_type == T_CLTS) { 538 so->so_mode |= SM_ATOMIC | SM_ADDR; 539 } else { 540 so->so_mode |= SM_CONNREQUIRED; 541 if (sti->sti_etsdu_size != 0 && sti->sti_etsdu_size != -2) 542 so->so_mode |= SM_EXDATA; 543 } 544 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 545 /* Semantics are to discard tail end of messages */ 546 so->so_mode |= SM_ATOMIC; 547 } 548 if (so->so_family == AF_UNIX) { 549 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 550 if (sti->sti_addr_size == -1) { 551 /* MAXPATHLEN + soun_family + nul termination */ 552 sti->sti_addr_size = (t_scalar_t)(MAXPATHLEN + 553 sizeof (short) + 1); 554 } 555 if (so->so_type == SOCK_STREAM) { 556 /* 557 * Make it into a byte-stream transport. 558 * SOCK_SEQPACKET sockets are unchanged. 559 */ 560 sti->sti_tsdu_size = 0; 561 } 562 } else if (sti->sti_addr_size == -1) { 563 /* 564 * Logic extracted from sockmod - have to pick some max address 565 * length in order to preallocate the addresses. 566 */ 567 sti->sti_addr_size = SOA_DEFSIZE; 568 } 569 if (sti->sti_tsdu_size == 0) 570 so->so_mode |= SM_BYTESTREAM; 571 } 572 573 static int 574 check_tinfo(struct sonode *so) 575 { 576 sotpi_info_t *sti = SOTOTPI(so); 577 578 /* Consistency checks */ 579 if (so->so_type == SOCK_DGRAM && sti->sti_serv_type != T_CLTS) { 580 eprintso(so, ("service type and socket type mismatch\n")); 581 eprintsoline(so, EPROTO); 582 return (EPROTO); 583 } 584 if (so->so_type == SOCK_STREAM && sti->sti_serv_type == T_CLTS) { 585 eprintso(so, ("service type and socket type mismatch\n")); 586 eprintsoline(so, EPROTO); 587 return (EPROTO); 588 } 589 if (so->so_type == SOCK_SEQPACKET && sti->sti_serv_type == T_CLTS) { 590 eprintso(so, ("service type and socket type mismatch\n")); 591 eprintsoline(so, EPROTO); 592 return (EPROTO); 593 } 594 if (so->so_family == AF_INET && 595 sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 596 eprintso(so, 597 ("AF_INET must have sockaddr_in address length. Got %d\n", 598 sti->sti_addr_size)); 599 eprintsoline(so, EMSGSIZE); 600 return (EMSGSIZE); 601 } 602 if (so->so_family == AF_INET6 && 603 sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 604 eprintso(so, 605 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 606 sti->sti_addr_size)); 607 eprintsoline(so, EMSGSIZE); 608 return (EMSGSIZE); 609 } 610 611 dprintso(so, 1, ( 612 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 613 sti->sti_serv_type, sti->sti_tsdu_size, sti->sti_etsdu_size, 614 sti->sti_addr_size, sti->sti_opt_size, 615 sti->sti_tidu_size)); 616 dprintso(so, 1, ("tinfo: so_state %s\n", 617 pr_state(so->so_state, so->so_mode))); 618 return (0); 619 } 620 621 /* 622 * Send down T_info_req and wait for the ack. 623 * Record interesting T_info_ack values in the sonode. 624 */ 625 static int 626 do_tinfo(struct sonode *so) 627 { 628 struct T_info_req tir; 629 mblk_t *mp; 630 int error; 631 632 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 633 634 if (so_no_tinfo) { 635 SOTOTPI(so)->sti_addr_size = 0; 636 return (0); 637 } 638 639 dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so)); 640 641 /* Send T_INFO_REQ */ 642 tir.PRIM_type = T_INFO_REQ; 643 mp = soallocproto1(&tir, sizeof (tir), 644 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 645 _ALLOC_INTR, CRED()); 646 if (mp == NULL) { 647 eprintsoline(so, ENOBUFS); 648 return (ENOBUFS); 649 } 650 /* T_INFO_REQ has to be M_PCPROTO */ 651 DB_TYPE(mp) = M_PCPROTO; 652 653 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 654 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 655 if (error) { 656 eprintsoline(so, error); 657 return (error); 658 } 659 mutex_enter(&so->so_lock); 660 /* Wait for T_INFO_ACK */ 661 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 662 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 663 mutex_exit(&so->so_lock); 664 eprintsoline(so, error); 665 return (error); 666 } 667 668 ASSERT(mp); 669 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 670 mutex_exit(&so->so_lock); 671 freemsg(mp); 672 return (check_tinfo(so)); 673 } 674 675 /* 676 * Send down T_capability_req and wait for the ack. 677 * Record interesting T_capability_ack values in the sonode. 678 */ 679 static int 680 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 681 { 682 struct T_capability_req tcr; 683 struct T_capability_ack *tca; 684 mblk_t *mp; 685 int error; 686 sotpi_info_t *sti = SOTOTPI(so); 687 688 ASSERT(cap_bits1 != 0); 689 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 690 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 691 692 if (sti->sti_provinfo->tpi_capability == PI_NO) 693 return (do_tinfo(so)); 694 695 if (so_no_tinfo) { 696 sti->sti_addr_size = 0; 697 if ((cap_bits1 &= ~TC1_INFO) == 0) 698 return (0); 699 } 700 701 dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so)); 702 703 /* Send T_CAPABILITY_REQ */ 704 tcr.PRIM_type = T_CAPABILITY_REQ; 705 tcr.CAP_bits1 = cap_bits1; 706 mp = soallocproto1(&tcr, sizeof (tcr), 707 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 708 _ALLOC_INTR, CRED()); 709 if (mp == NULL) { 710 eprintsoline(so, ENOBUFS); 711 return (ENOBUFS); 712 } 713 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 714 DB_TYPE(mp) = M_PCPROTO; 715 716 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 717 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 718 if (error) { 719 eprintsoline(so, error); 720 return (error); 721 } 722 mutex_enter(&so->so_lock); 723 /* Wait for T_CAPABILITY_ACK */ 724 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 725 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 726 mutex_exit(&so->so_lock); 727 PI_PROVLOCK(sti->sti_provinfo); 728 if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) 729 sti->sti_provinfo->tpi_capability = PI_NO; 730 PI_PROVUNLOCK(sti->sti_provinfo); 731 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 732 if (cap_bits1 & TC1_INFO) { 733 /* 734 * If the T_CAPABILITY_REQ timed out and then a 735 * T_INFO_REQ gets a protocol error, most likely 736 * the capability was slow (vs. unsupported). Return 737 * ENOSR for this case as a best guess. 738 */ 739 if (error == ETIME) { 740 return ((error = do_tinfo(so)) == EPROTO ? 741 ENOSR : error); 742 } 743 return (do_tinfo(so)); 744 } 745 return (0); 746 } 747 748 ASSERT(mp); 749 tca = (struct T_capability_ack *)mp->b_rptr; 750 751 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 752 so_proc_tcapability_ack(so, tca); 753 754 cap_bits1 = tca->CAP_bits1; 755 756 mutex_exit(&so->so_lock); 757 freemsg(mp); 758 759 if (cap_bits1 & TC1_INFO) 760 return (check_tinfo(so)); 761 762 return (0); 763 } 764 765 /* 766 * Process a T_CAPABILITY_ACK 767 */ 768 void 769 so_proc_tcapability_ack(struct sonode *so, struct T_capability_ack *tca) 770 { 771 sotpi_info_t *sti = SOTOTPI(so); 772 773 if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) { 774 PI_PROVLOCK(sti->sti_provinfo); 775 sti->sti_provinfo->tpi_capability = PI_YES; 776 PI_PROVUNLOCK(sti->sti_provinfo); 777 } 778 779 if (tca->CAP_bits1 & TC1_ACCEPTOR_ID) { 780 sti->sti_acceptor_id = tca->ACCEPTOR_id; 781 so->so_mode |= SM_ACCEPTOR_ID; 782 } 783 784 if (tca->CAP_bits1 & TC1_INFO) 785 copy_tinfo(so, &tca->INFO_ack); 786 } 787 788 /* 789 * Retrieve socket error, clear error if not peek. 790 */ 791 int 792 sogeterr(struct sonode *so, boolean_t clear_err) 793 { 794 int error; 795 796 ASSERT(MUTEX_HELD(&so->so_lock)); 797 798 error = so->so_error; 799 if (clear_err) 800 so->so_error = 0; 801 802 return (error); 803 } 804 805 /* 806 * This routine is registered with the stream head to retrieve read 807 * side errors. 808 * It does not clear the socket error for a peeking read side operation. 809 * It the error is to be cleared it sets *clearerr. 810 */ 811 int 812 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 813 { 814 struct sonode *so = VTOSO(vp); 815 int error; 816 817 mutex_enter(&so->so_lock); 818 if (ispeek) { 819 error = so->so_error; 820 *clearerr = 0; 821 } else { 822 error = so->so_error; 823 so->so_error = 0; 824 *clearerr = 1; 825 } 826 mutex_exit(&so->so_lock); 827 return (error); 828 } 829 830 /* 831 * This routine is registered with the stream head to retrieve write 832 * side errors. 833 * It does not clear the socket error for a peeking read side operation. 834 * It the error is to be cleared it sets *clearerr. 835 */ 836 int 837 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 838 { 839 struct sonode *so = VTOSO(vp); 840 int error; 841 842 mutex_enter(&so->so_lock); 843 if (so->so_state & SS_CANTSENDMORE) { 844 error = EPIPE; 845 *clearerr = 0; 846 } else { 847 error = so->so_error; 848 if (ispeek) { 849 *clearerr = 0; 850 } else { 851 so->so_error = 0; 852 *clearerr = 1; 853 } 854 } 855 mutex_exit(&so->so_lock); 856 return (error); 857 } 858 859 /* 860 * Set a nonpersistent read and write error on the socket. 861 * Used when there is a T_uderror_ind for a connected socket. 862 * The caller also needs to call strsetrerror and strsetwerror 863 * after dropping the lock. 864 */ 865 void 866 soseterror(struct sonode *so, int error) 867 { 868 ASSERT(error != 0); 869 870 ASSERT(MUTEX_HELD(&so->so_lock)); 871 so->so_error = (ushort_t)error; 872 } 873 874 void 875 soisconnecting(struct sonode *so) 876 { 877 ASSERT(MUTEX_HELD(&so->so_lock)); 878 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 879 so->so_state |= SS_ISCONNECTING; 880 cv_broadcast(&so->so_state_cv); 881 } 882 883 void 884 soisconnected(struct sonode *so) 885 { 886 ASSERT(MUTEX_HELD(&so->so_lock)); 887 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 888 so->so_state |= SS_ISCONNECTED; 889 cv_broadcast(&so->so_state_cv); 890 } 891 892 /* 893 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 894 */ 895 void 896 soisdisconnected(struct sonode *so, int error) 897 { 898 ASSERT(MUTEX_HELD(&so->so_lock)); 899 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 900 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 901 so->so_error = (ushort_t)error; 902 if (so->so_peercred != NULL) { 903 crfree(so->so_peercred); 904 so->so_peercred = NULL; 905 } 906 cv_broadcast(&so->so_state_cv); 907 } 908 909 /* 910 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 911 * Does not affect write side. 912 * The caller also has to call strsetrerror. 913 */ 914 static void 915 sobreakconn(struct sonode *so, int error) 916 { 917 ASSERT(MUTEX_HELD(&so->so_lock)); 918 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 919 so->so_error = (ushort_t)error; 920 cv_broadcast(&so->so_state_cv); 921 } 922 923 /* 924 * Can no longer send. 925 * Caller must also call strsetwerror. 926 * 927 * We mark the peer address as no longer valid for getpeername, but 928 * leave it around for so_unix_close to notify the peer (that 929 * transport has no addressing held at that layer). 930 */ 931 void 932 socantsendmore(struct sonode *so) 933 { 934 ASSERT(MUTEX_HELD(&so->so_lock)); 935 so->so_state |= SS_CANTSENDMORE; 936 cv_broadcast(&so->so_state_cv); 937 } 938 939 /* 940 * The caller must call strseteof(,1) as well as this routine 941 * to change the socket state. 942 */ 943 void 944 socantrcvmore(struct sonode *so) 945 { 946 ASSERT(MUTEX_HELD(&so->so_lock)); 947 so->so_state |= SS_CANTRCVMORE; 948 cv_broadcast(&so->so_state_cv); 949 } 950 951 /* 952 * The caller has sent down a "request_prim" primitive and wants to wait for 953 * an ack ("ack_prim") or an T_ERROR_ACK for it. 954 * The specified "ack_prim" can be a T_OK_ACK. 955 * 956 * Assumes that all the TPI acks are M_PCPROTO messages. 957 * 958 * Note that the socket is single-threaded (using so_lock_single) 959 * for all operations that generate TPI ack messages. Since 960 * only TPI ack messages are M_PCPROTO we should never receive 961 * anything except either the ack we are expecting or a T_ERROR_ACK 962 * for the same primitive. 963 */ 964 int 965 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 966 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 967 { 968 mblk_t *mp; 969 union T_primitives *tpr; 970 int error; 971 972 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 973 (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait)); 974 975 ASSERT(MUTEX_HELD(&so->so_lock)); 976 977 error = sowaitack(so, &mp, wait); 978 if (error) 979 return (error); 980 981 dprintso(so, 1, ("got msg %p\n", (void *)mp)); 982 if (DB_TYPE(mp) != M_PCPROTO || 983 MBLKL(mp) < sizeof (tpr->type)) { 984 freemsg(mp); 985 eprintsoline(so, EPROTO); 986 return (EPROTO); 987 } 988 tpr = (union T_primitives *)mp->b_rptr; 989 /* 990 * Did we get the primitive that we were asking for? 991 * For T_OK_ACK we also check that it matches the request primitive. 992 */ 993 if (tpr->type == ack_prim && 994 (ack_prim != T_OK_ACK || 995 tpr->ok_ack.CORRECT_prim == request_prim)) { 996 if (MBLKL(mp) >= (ssize_t)min_size) { 997 /* Found what we are looking for */ 998 *mpp = mp; 999 return (0); 1000 } 1001 /* Too short */ 1002 freemsg(mp); 1003 eprintsoline(so, EPROTO); 1004 return (EPROTO); 1005 } 1006 1007 if (tpr->type == T_ERROR_ACK && 1008 tpr->error_ack.ERROR_prim == request_prim) { 1009 /* Error to the primitive we were looking for */ 1010 if (tpr->error_ack.TLI_error == TSYSERR) { 1011 error = tpr->error_ack.UNIX_error; 1012 } else { 1013 error = proto_tlitosyserr(tpr->error_ack.TLI_error); 1014 } 1015 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 1016 tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error, 1017 tpr->error_ack.UNIX_error, error)); 1018 freemsg(mp); 1019 return (error); 1020 } 1021 /* 1022 * Wrong primitive or T_ERROR_ACK for the wrong primitive 1023 */ 1024 #ifdef DEBUG 1025 if (tpr->type == T_ERROR_ACK) { 1026 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 1027 tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error, 1028 tpr->error_ack.UNIX_error)); 1029 } else if (tpr->type == T_OK_ACK) { 1030 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 1031 tpr->ok_ack.CORRECT_prim, ack_prim, request_prim)); 1032 } else { 1033 dprintso(so, 0, 1034 ("unexpected primitive %d, expected %d for %d\n", 1035 tpr->type, ack_prim, request_prim)); 1036 } 1037 #endif /* DEBUG */ 1038 1039 freemsg(mp); 1040 eprintsoline(so, EPROTO); 1041 return (EPROTO); 1042 } 1043 1044 /* 1045 * Wait for a T_OK_ACK for the specified primitive. 1046 */ 1047 int 1048 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1049 { 1050 mblk_t *mp; 1051 int error; 1052 1053 error = sowaitprim(so, request_prim, T_OK_ACK, 1054 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1055 if (error) 1056 return (error); 1057 freemsg(mp); 1058 return (0); 1059 } 1060 1061 /* 1062 * Queue a received TPI ack message on sti_ack_mp. 1063 */ 1064 void 1065 soqueueack(struct sonode *so, mblk_t *mp) 1066 { 1067 sotpi_info_t *sti = SOTOTPI(so); 1068 1069 if (DB_TYPE(mp) != M_PCPROTO) { 1070 zcmn_err(getzoneid(), CE_WARN, 1071 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1072 *(t_scalar_t *)mp->b_rptr); 1073 freemsg(mp); 1074 return; 1075 } 1076 1077 mutex_enter(&so->so_lock); 1078 if (sti->sti_ack_mp != NULL) { 1079 dprintso(so, 1, ("sti_ack_mp already set\n")); 1080 freemsg(sti->sti_ack_mp); 1081 sti->sti_ack_mp = NULL; 1082 } 1083 sti->sti_ack_mp = mp; 1084 cv_broadcast(&sti->sti_ack_cv); 1085 mutex_exit(&so->so_lock); 1086 } 1087 1088 /* 1089 * Wait for a TPI ack ignoring signals and errors. 1090 */ 1091 int 1092 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1093 { 1094 sotpi_info_t *sti = SOTOTPI(so); 1095 1096 ASSERT(MUTEX_HELD(&so->so_lock)); 1097 1098 while (sti->sti_ack_mp == NULL) { 1099 #ifdef SOCK_TEST 1100 if (wait == 0 && sock_test_timelimit != 0) 1101 wait = sock_test_timelimit; 1102 #endif 1103 if (wait != 0) { 1104 /* 1105 * Only wait for the time limit. 1106 */ 1107 clock_t now; 1108 1109 time_to_wait(&now, wait); 1110 if (cv_timedwait(&sti->sti_ack_cv, &so->so_lock, 1111 now) == -1) { 1112 eprintsoline(so, ETIME); 1113 return (ETIME); 1114 } 1115 } 1116 else 1117 cv_wait(&sti->sti_ack_cv, &so->so_lock); 1118 } 1119 *mpp = sti->sti_ack_mp; 1120 #ifdef DEBUG 1121 { 1122 union T_primitives *tpr; 1123 mblk_t *mp = *mpp; 1124 1125 tpr = (union T_primitives *)mp->b_rptr; 1126 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1127 ASSERT(tpr->type == T_OK_ACK || 1128 tpr->type == T_ERROR_ACK || 1129 tpr->type == T_BIND_ACK || 1130 tpr->type == T_CAPABILITY_ACK || 1131 tpr->type == T_INFO_ACK || 1132 tpr->type == T_OPTMGMT_ACK); 1133 } 1134 #endif /* DEBUG */ 1135 sti->sti_ack_mp = NULL; 1136 return (0); 1137 } 1138 1139 /* 1140 * Queue a received T_CONN_IND message on sti_conn_ind_head/tail. 1141 */ 1142 void 1143 soqueueconnind(struct sonode *so, mblk_t *mp) 1144 { 1145 sotpi_info_t *sti = SOTOTPI(so); 1146 1147 if (DB_TYPE(mp) != M_PROTO) { 1148 zcmn_err(getzoneid(), CE_WARN, 1149 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1150 freemsg(mp); 1151 return; 1152 } 1153 1154 mutex_enter(&so->so_lock); 1155 ASSERT(mp->b_next == NULL); 1156 if (sti->sti_conn_ind_head == NULL) { 1157 sti->sti_conn_ind_head = mp; 1158 } else { 1159 ASSERT(sti->sti_conn_ind_tail->b_next == NULL); 1160 sti->sti_conn_ind_tail->b_next = mp; 1161 } 1162 sti->sti_conn_ind_tail = mp; 1163 /* Wakeup a single consumer of the T_CONN_IND */ 1164 cv_signal(&so->so_acceptq_cv); 1165 mutex_exit(&so->so_lock); 1166 } 1167 1168 /* 1169 * Wait for a T_CONN_IND. 1170 * Don't wait if nonblocking. 1171 * Accept signals and socket errors. 1172 */ 1173 int 1174 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1175 { 1176 mblk_t *mp; 1177 sotpi_info_t *sti = SOTOTPI(so); 1178 int error = 0; 1179 1180 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1181 mutex_enter(&so->so_lock); 1182 check_error: 1183 if (so->so_error) { 1184 error = sogeterr(so, B_TRUE); 1185 if (error) { 1186 mutex_exit(&so->so_lock); 1187 return (error); 1188 } 1189 } 1190 1191 if (sti->sti_conn_ind_head == NULL) { 1192 if (fmode & (FNDELAY|FNONBLOCK)) { 1193 error = EWOULDBLOCK; 1194 goto done; 1195 } 1196 1197 if (so->so_state & SS_CLOSING) { 1198 error = EINTR; 1199 goto done; 1200 } 1201 1202 if (!cv_wait_sig_swap(&so->so_acceptq_cv, &so->so_lock)) { 1203 error = EINTR; 1204 goto done; 1205 } 1206 goto check_error; 1207 } 1208 mp = sti->sti_conn_ind_head; 1209 sti->sti_conn_ind_head = mp->b_next; 1210 mp->b_next = NULL; 1211 if (sti->sti_conn_ind_head == NULL) { 1212 ASSERT(sti->sti_conn_ind_tail == mp); 1213 sti->sti_conn_ind_tail = NULL; 1214 } 1215 *mpp = mp; 1216 done: 1217 mutex_exit(&so->so_lock); 1218 return (error); 1219 } 1220 1221 /* 1222 * Flush a T_CONN_IND matching the sequence number from the list. 1223 * Return zero if found; non-zero otherwise. 1224 * This is called very infrequently thus it is ok to do a linear search. 1225 */ 1226 int 1227 soflushconnind(struct sonode *so, t_scalar_t seqno) 1228 { 1229 mblk_t *prevmp, *mp; 1230 struct T_conn_ind *tci; 1231 sotpi_info_t *sti = SOTOTPI(so); 1232 1233 mutex_enter(&so->so_lock); 1234 for (prevmp = NULL, mp = sti->sti_conn_ind_head; mp != NULL; 1235 prevmp = mp, mp = mp->b_next) { 1236 tci = (struct T_conn_ind *)mp->b_rptr; 1237 if (tci->SEQ_number == seqno) { 1238 dprintso(so, 1, 1239 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1240 /* Deleting last? */ 1241 if (sti->sti_conn_ind_tail == mp) { 1242 sti->sti_conn_ind_tail = prevmp; 1243 } 1244 if (prevmp == NULL) { 1245 /* Deleting first */ 1246 sti->sti_conn_ind_head = mp->b_next; 1247 } else { 1248 prevmp->b_next = mp->b_next; 1249 } 1250 mp->b_next = NULL; 1251 1252 ASSERT((sti->sti_conn_ind_head == NULL && 1253 sti->sti_conn_ind_tail == NULL) || 1254 (sti->sti_conn_ind_head != NULL && 1255 sti->sti_conn_ind_tail != NULL)); 1256 1257 so->so_error = ECONNABORTED; 1258 mutex_exit(&so->so_lock); 1259 1260 /* 1261 * T_KSSL_PROXY_CONN_IND may carry a handle for 1262 * an SSL context, and needs to be released. 1263 */ 1264 if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) && 1265 (mp->b_cont != NULL)) { 1266 kssl_ctx_t kssl_ctx; 1267 1268 ASSERT(MBLKL(mp->b_cont) == 1269 sizeof (kssl_ctx_t)); 1270 kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr); 1271 kssl_release_ctx(kssl_ctx); 1272 } 1273 freemsg(mp); 1274 return (0); 1275 } 1276 } 1277 mutex_exit(&so->so_lock); 1278 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1279 return (-1); 1280 } 1281 1282 /* 1283 * Wait until the socket is connected or there is an error. 1284 * fmode should contain any nonblocking flags. nosig should be 1285 * set if the caller does not want the wait to be interrupted by a signal. 1286 */ 1287 int 1288 sowaitconnected(struct sonode *so, int fmode, int nosig) 1289 { 1290 int error; 1291 1292 ASSERT(MUTEX_HELD(&so->so_lock)); 1293 1294 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1295 SS_ISCONNECTING && so->so_error == 0) { 1296 1297 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", 1298 (void *)so)); 1299 if (fmode & (FNDELAY|FNONBLOCK)) 1300 return (EINPROGRESS); 1301 1302 if (so->so_state & SS_CLOSING) 1303 return (EINTR); 1304 1305 if (nosig) 1306 cv_wait(&so->so_state_cv, &so->so_lock); 1307 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1308 /* 1309 * Return EINTR and let the application use 1310 * nonblocking techniques for detecting when 1311 * the connection has been established. 1312 */ 1313 return (EINTR); 1314 } 1315 dprintso(so, 1, ("awoken on %p\n", (void *)so)); 1316 } 1317 1318 if (so->so_error != 0) { 1319 error = sogeterr(so, B_TRUE); 1320 ASSERT(error != 0); 1321 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1322 return (error); 1323 } 1324 if (!(so->so_state & SS_ISCONNECTED)) { 1325 /* 1326 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1327 * zero errno. Or another thread could have consumed so_error 1328 * e.g. by calling read. 1329 */ 1330 error = ECONNREFUSED; 1331 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1332 return (error); 1333 } 1334 return (0); 1335 } 1336 1337 1338 /* 1339 * Handle the signal generation aspect of urgent data. 1340 */ 1341 static void 1342 so_oob_sig(struct sonode *so, int extrasig, 1343 strsigset_t *signals, strpollset_t *pollwakeups) 1344 { 1345 sotpi_info_t *sti = SOTOTPI(so); 1346 1347 ASSERT(MUTEX_HELD(&so->so_lock)); 1348 1349 ASSERT(so_verify_oobstate(so)); 1350 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1351 if (sti->sti_oobsigcnt > sti->sti_oobcnt) { 1352 /* 1353 * Signal has already been generated once for this 1354 * urgent "event". However, since TCP can receive updated 1355 * urgent pointers we still generate a signal. 1356 */ 1357 ASSERT(so->so_state & SS_OOBPEND); 1358 if (extrasig) { 1359 *signals |= S_RDBAND; 1360 *pollwakeups |= POLLRDBAND; 1361 } 1362 return; 1363 } 1364 1365 sti->sti_oobsigcnt++; 1366 ASSERT(sti->sti_oobsigcnt > 0); /* Wraparound */ 1367 ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt); 1368 1369 /* 1370 * Record (for select/poll) that urgent data is pending. 1371 */ 1372 so->so_state |= SS_OOBPEND; 1373 /* 1374 * New urgent data on the way so forget about any old 1375 * urgent data. 1376 */ 1377 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1378 if (so->so_oobmsg != NULL) { 1379 dprintso(so, 1, ("sock: discarding old oob\n")); 1380 freemsg(so->so_oobmsg); 1381 so->so_oobmsg = NULL; 1382 } 1383 *signals |= S_RDBAND; 1384 *pollwakeups |= POLLRDBAND; 1385 ASSERT(so_verify_oobstate(so)); 1386 } 1387 1388 /* 1389 * Handle the processing of the T_EXDATA_IND with urgent data. 1390 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1391 */ 1392 /* ARGSUSED2 */ 1393 static mblk_t * 1394 so_oob_exdata(struct sonode *so, mblk_t *mp, 1395 strsigset_t *signals, strpollset_t *pollwakeups) 1396 { 1397 sotpi_info_t *sti = SOTOTPI(so); 1398 1399 ASSERT(MUTEX_HELD(&so->so_lock)); 1400 1401 ASSERT(so_verify_oobstate(so)); 1402 1403 ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt); 1404 1405 sti->sti_oobcnt++; 1406 ASSERT(sti->sti_oobcnt > 0); /* wraparound? */ 1407 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1408 1409 /* 1410 * Set MSGMARK for SIOCATMARK. 1411 */ 1412 mp->b_flag |= MSGMARK; 1413 1414 ASSERT(so_verify_oobstate(so)); 1415 return (mp); 1416 } 1417 1418 /* 1419 * Handle the processing of the actual urgent data. 1420 * Returns the data mblk if it should be queued on the read queue. 1421 */ 1422 static mblk_t * 1423 so_oob_data(struct sonode *so, mblk_t *mp, 1424 strsigset_t *signals, strpollset_t *pollwakeups) 1425 { 1426 sotpi_info_t *sti = SOTOTPI(so); 1427 1428 ASSERT(MUTEX_HELD(&so->so_lock)); 1429 1430 ASSERT(so_verify_oobstate(so)); 1431 1432 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1433 ASSERT(mp != NULL); 1434 /* 1435 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1436 * Otherwise we store it in so_oobmsg. 1437 */ 1438 ASSERT(so->so_oobmsg == NULL); 1439 if (so->so_options & SO_OOBINLINE) { 1440 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1441 *signals |= S_INPUT | S_RDNORM; 1442 } else { 1443 *pollwakeups |= POLLRDBAND; 1444 so->so_state |= SS_HAVEOOBDATA; 1445 so->so_oobmsg = mp; 1446 mp = NULL; 1447 } 1448 ASSERT(so_verify_oobstate(so)); 1449 return (mp); 1450 } 1451 1452 /* 1453 * Caller must hold the mutex. 1454 * For delayed processing, save the T_DISCON_IND received 1455 * from below on sti_discon_ind_mp. 1456 * When the message is processed the framework will call: 1457 * (*func)(so, mp); 1458 */ 1459 static void 1460 so_save_discon_ind(struct sonode *so, 1461 mblk_t *mp, 1462 void (*func)(struct sonode *so, mblk_t *)) 1463 { 1464 sotpi_info_t *sti = SOTOTPI(so); 1465 1466 ASSERT(MUTEX_HELD(&so->so_lock)); 1467 1468 /* 1469 * Discard new T_DISCON_IND if we have already received another. 1470 * Currently the earlier message can either be on sti_discon_ind_mp 1471 * or being processed. 1472 */ 1473 if (sti->sti_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1474 zcmn_err(getzoneid(), CE_WARN, 1475 "sockfs: received unexpected additional T_DISCON_IND\n"); 1476 freemsg(mp); 1477 return; 1478 } 1479 mp->b_prev = (mblk_t *)func; 1480 mp->b_next = NULL; 1481 sti->sti_discon_ind_mp = mp; 1482 } 1483 1484 /* 1485 * Caller must hold the mutex and make sure that either SOLOCKED 1486 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1487 * Perform delayed processing of T_DISCON_IND message on sti_discon_ind_mp. 1488 * Need to ensure that strsock_proto() will not end up sleeping for 1489 * SOASYNC_UNBIND, while executing this function. 1490 */ 1491 void 1492 so_drain_discon_ind(struct sonode *so) 1493 { 1494 mblk_t *bp; 1495 void (*func)(struct sonode *so, mblk_t *); 1496 sotpi_info_t *sti = SOTOTPI(so); 1497 1498 ASSERT(MUTEX_HELD(&so->so_lock)); 1499 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1500 1501 /* Process T_DISCON_IND on sti_discon_ind_mp */ 1502 if ((bp = sti->sti_discon_ind_mp) != NULL) { 1503 sti->sti_discon_ind_mp = NULL; 1504 func = (void (*)())bp->b_prev; 1505 bp->b_prev = NULL; 1506 1507 /* 1508 * This (*func) is supposed to generate a message downstream 1509 * and we need to have a flag set until the corresponding 1510 * upstream message reaches stream head. 1511 * When processing T_DISCON_IND in strsock_discon_ind 1512 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1513 * drop the flag after we get the ACK in strsock_proto. 1514 */ 1515 (void) (*func)(so, bp); 1516 } 1517 } 1518 1519 /* 1520 * Caller must hold the mutex. 1521 * Remove the T_DISCON_IND on sti_discon_ind_mp. 1522 */ 1523 void 1524 so_flush_discon_ind(struct sonode *so) 1525 { 1526 mblk_t *bp; 1527 sotpi_info_t *sti = SOTOTPI(so); 1528 1529 ASSERT(MUTEX_HELD(&so->so_lock)); 1530 1531 /* 1532 * Remove T_DISCON_IND mblk at sti_discon_ind_mp. 1533 */ 1534 if ((bp = sti->sti_discon_ind_mp) != NULL) { 1535 sti->sti_discon_ind_mp = NULL; 1536 bp->b_prev = NULL; 1537 freemsg(bp); 1538 } 1539 } 1540 1541 /* 1542 * Caller must hold the mutex. 1543 * 1544 * This function is used to process the T_DISCON_IND message. It does 1545 * immediate processing when called from strsock_proto and delayed 1546 * processing of discon_ind saved on sti_discon_ind_mp when called from 1547 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1548 * sti_discon_ind_mp for delayed processing, this function is registered 1549 * as the callback function to process the message. 1550 * 1551 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1552 * unbind operation, and should be released only after we receive the ACK 1553 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1554 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1555 * sent from either this function or tcp_unbind(), flushing away any TPI 1556 * message that is being sent down and stays in a lower module's queue. 1557 * 1558 * This function drops so_lock and grabs it again. 1559 */ 1560 static void 1561 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1562 { 1563 struct vnode *vp; 1564 struct stdata *stp; 1565 union T_primitives *tpr; 1566 struct T_unbind_req *ubr; 1567 mblk_t *mp; 1568 int error; 1569 sotpi_info_t *sti = SOTOTPI(so); 1570 1571 ASSERT(MUTEX_HELD(&so->so_lock)); 1572 ASSERT(discon_mp); 1573 ASSERT(discon_mp->b_rptr); 1574 1575 tpr = (union T_primitives *)discon_mp->b_rptr; 1576 ASSERT(tpr->type == T_DISCON_IND); 1577 1578 vp = SOTOV(so); 1579 stp = vp->v_stream; 1580 ASSERT(stp); 1581 1582 /* 1583 * Not a listener 1584 */ 1585 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1586 1587 /* 1588 * This assumes that the name space for DISCON_reason 1589 * is the errno name space. 1590 */ 1591 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1592 sti->sti_laddr_valid = 0; 1593 sti->sti_faddr_valid = 0; 1594 1595 /* 1596 * Unbind with the transport without blocking. 1597 * If we've already received a T_DISCON_IND do not unbind. 1598 * 1599 * If there is no preallocated unbind message, we have already 1600 * unbound with the transport 1601 * 1602 * If the socket is not bound, no need to unbind. 1603 */ 1604 mp = sti->sti_unbind_mp; 1605 if (mp == NULL) { 1606 ASSERT(!(so->so_state & SS_ISBOUND)); 1607 mutex_exit(&so->so_lock); 1608 } else if (!(so->so_state & SS_ISBOUND)) { 1609 mutex_exit(&so->so_lock); 1610 } else { 1611 sti->sti_unbind_mp = NULL; 1612 1613 /* 1614 * Is another T_DISCON_IND being processed. 1615 */ 1616 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1617 1618 /* 1619 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1620 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1621 * only after we receive the ACK in strsock_proto. 1622 */ 1623 so->so_flag |= SOASYNC_UNBIND; 1624 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1625 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1626 sti->sti_laddr_valid = 0; 1627 mutex_exit(&so->so_lock); 1628 1629 /* 1630 * Send down T_UNBIND_REQ ignoring flow control. 1631 * XXX Assumes that MSG_IGNFLOW implies that this thread 1632 * does not run service procedures. 1633 */ 1634 ASSERT(DB_TYPE(mp) == M_PROTO); 1635 ubr = (struct T_unbind_req *)mp->b_rptr; 1636 mp->b_wptr += sizeof (*ubr); 1637 ubr->PRIM_type = T_UNBIND_REQ; 1638 1639 /* 1640 * Flush the read and write side (except stream head read queue) 1641 * and send down T_UNBIND_REQ. 1642 */ 1643 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1644 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1645 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1646 /* LINTED - warning: statement has no consequent: if */ 1647 if (error) { 1648 eprintsoline(so, error); 1649 } 1650 } 1651 1652 if (tpr->discon_ind.DISCON_reason != 0) 1653 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1654 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1655 strseteof(SOTOV(so), 1); 1656 /* 1657 * strseteof takes care of read side wakeups, 1658 * pollwakeups, and signals. 1659 */ 1660 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1661 freemsg(discon_mp); 1662 1663 1664 pollwakeup(&stp->sd_pollist, POLLOUT); 1665 mutex_enter(&stp->sd_lock); 1666 1667 /* 1668 * Wake sleeping write 1669 */ 1670 if (stp->sd_flag & WSLEEP) { 1671 stp->sd_flag &= ~WSLEEP; 1672 cv_broadcast(&stp->sd_wrq->q_wait); 1673 } 1674 1675 /* 1676 * strsendsig can handle multiple signals with a 1677 * single call. Send SIGPOLL for S_OUTPUT event. 1678 */ 1679 if (stp->sd_sigflags & S_OUTPUT) 1680 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1681 1682 mutex_exit(&stp->sd_lock); 1683 mutex_enter(&so->so_lock); 1684 } 1685 1686 /* 1687 * This routine is registered with the stream head to receive M_PROTO 1688 * and M_PCPROTO messages. 1689 * 1690 * Returns NULL if the message was consumed. 1691 * Returns an mblk to make that mblk be processed (and queued) by the stream 1692 * head. 1693 * 1694 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1695 * *pollwakeups) for the stream head to take action on. Note that since 1696 * sockets always deliver SIGIO for every new piece of data this routine 1697 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1698 * 1699 * This routine handles all data related TPI messages independent of 1700 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1701 * arrive on a SOCK_STREAM. 1702 */ 1703 static mblk_t * 1704 strsock_proto(vnode_t *vp, mblk_t *mp, 1705 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1706 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1707 { 1708 union T_primitives *tpr; 1709 struct sonode *so; 1710 sotpi_info_t *sti; 1711 1712 so = VTOSO(vp); 1713 sti = SOTOTPI(so); 1714 1715 dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp)); 1716 1717 /* Set default return values */ 1718 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1719 1720 ASSERT(DB_TYPE(mp) == M_PROTO || 1721 DB_TYPE(mp) == M_PCPROTO); 1722 1723 if (MBLKL(mp) < sizeof (tpr->type)) { 1724 /* The message is too short to even contain the primitive */ 1725 zcmn_err(getzoneid(), CE_WARN, 1726 "sockfs: Too short TPI message received. Len = %ld\n", 1727 (ptrdiff_t)(MBLKL(mp))); 1728 freemsg(mp); 1729 return (NULL); 1730 } 1731 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1732 /* The read pointer is not aligned correctly for TPI */ 1733 zcmn_err(getzoneid(), CE_WARN, 1734 "sockfs: Unaligned TPI message received. rptr = %p\n", 1735 (void *)mp->b_rptr); 1736 freemsg(mp); 1737 return (NULL); 1738 } 1739 tpr = (union T_primitives *)mp->b_rptr; 1740 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1741 1742 switch (tpr->type) { 1743 1744 case T_DATA_IND: 1745 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1746 zcmn_err(getzoneid(), CE_WARN, 1747 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1748 (ptrdiff_t)(MBLKL(mp))); 1749 freemsg(mp); 1750 return (NULL); 1751 } 1752 /* 1753 * Ignore zero-length T_DATA_IND messages. These might be 1754 * generated by some transports. 1755 * This is needed to prevent read (which skips the M_PROTO 1756 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1757 * on a non-blocking socket after select/poll has indicated 1758 * that data is available). 1759 */ 1760 if (msgdsize(mp->b_cont) == 0) { 1761 dprintso(so, 0, 1762 ("strsock_proto: zero length T_DATA_IND\n")); 1763 freemsg(mp); 1764 return (NULL); 1765 } 1766 *allmsgsigs = S_INPUT | S_RDNORM; 1767 *pollwakeups = POLLIN | POLLRDNORM; 1768 *wakeups = RSLEEP; 1769 return (mp); 1770 1771 case T_UNITDATA_IND: { 1772 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1773 void *addr; 1774 t_uscalar_t addrlen; 1775 1776 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1777 zcmn_err(getzoneid(), CE_WARN, 1778 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1779 (ptrdiff_t)(MBLKL(mp))); 1780 freemsg(mp); 1781 return (NULL); 1782 } 1783 1784 /* Is this is not a connected datagram socket? */ 1785 if ((so->so_mode & SM_CONNREQUIRED) || 1786 !(so->so_state & SS_ISCONNECTED)) { 1787 /* 1788 * Not a connected datagram socket. Look for 1789 * the SO_UNIX_CLOSE option. If such an option is found 1790 * discard the message (since it has no meaning 1791 * unless connected). 1792 */ 1793 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1794 tudi->OPT_length != 0) { 1795 void *opt; 1796 t_uscalar_t optlen = tudi->OPT_length; 1797 1798 opt = sogetoff(mp, tudi->OPT_offset, 1799 optlen, __TPI_ALIGN_SIZE); 1800 if (opt == NULL) { 1801 /* The len/off falls outside mp */ 1802 freemsg(mp); 1803 mutex_enter(&so->so_lock); 1804 soseterror(so, EPROTO); 1805 mutex_exit(&so->so_lock); 1806 zcmn_err(getzoneid(), CE_WARN, 1807 "sockfs: T_unidata_ind with " 1808 "invalid optlen/offset %u/%d\n", 1809 optlen, tudi->OPT_offset); 1810 return (NULL); 1811 } 1812 if (so_getopt_unix_close(opt, optlen)) { 1813 freemsg(mp); 1814 return (NULL); 1815 } 1816 } 1817 *allmsgsigs = S_INPUT | S_RDNORM; 1818 *pollwakeups = POLLIN | POLLRDNORM; 1819 *wakeups = RSLEEP; 1820 if (audit_active) 1821 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1822 mp, 0); 1823 return (mp); 1824 } 1825 1826 /* 1827 * A connect datagram socket. For AF_INET{,6} we verify that 1828 * the source address matches the "connected to" address. 1829 * The semantics of AF_UNIX sockets is to not verify 1830 * the source address. 1831 * Note that this source address verification is transport 1832 * specific. Thus the real fix would be to extent TPI 1833 * to allow T_CONN_REQ messages to be send to connectionless 1834 * transport providers and always let the transport provider 1835 * do whatever filtering is needed. 1836 * 1837 * The verification/filtering semantics for transports 1838 * other than AF_INET and AF_UNIX are unknown. The choice 1839 * would be to either filter using bcmp or let all messages 1840 * get through. This code does not filter other address 1841 * families since this at least allows the application to 1842 * work around any missing filtering. 1843 * 1844 * XXX Should we move filtering to UDP/ICMP??? 1845 * That would require passing e.g. a T_DISCON_REQ to UDP 1846 * when the socket becomes unconnected. 1847 */ 1848 addrlen = tudi->SRC_length; 1849 /* 1850 * The alignment restriction is really to strict but 1851 * we want enough alignment to inspect the fields of 1852 * a sockaddr_in. 1853 */ 1854 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1855 __TPI_ALIGN_SIZE); 1856 if (addr == NULL) { 1857 freemsg(mp); 1858 mutex_enter(&so->so_lock); 1859 soseterror(so, EPROTO); 1860 mutex_exit(&so->so_lock); 1861 zcmn_err(getzoneid(), CE_WARN, 1862 "sockfs: T_unidata_ind with invalid " 1863 "addrlen/offset %u/%d\n", 1864 addrlen, tudi->SRC_offset); 1865 return (NULL); 1866 } 1867 1868 if (so->so_family == AF_INET) { 1869 /* 1870 * For AF_INET we allow wildcarding both sin_addr 1871 * and sin_port. 1872 */ 1873 struct sockaddr_in *faddr, *sin; 1874 1875 /* Prevent sti_faddr_sa from changing while accessed */ 1876 mutex_enter(&so->so_lock); 1877 ASSERT(sti->sti_faddr_len == 1878 (socklen_t)sizeof (struct sockaddr_in)); 1879 faddr = (struct sockaddr_in *)sti->sti_faddr_sa; 1880 sin = (struct sockaddr_in *)addr; 1881 if (addrlen != 1882 (t_uscalar_t)sizeof (struct sockaddr_in) || 1883 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1884 faddr->sin_addr.s_addr != INADDR_ANY) || 1885 (so->so_type != SOCK_RAW && 1886 sin->sin_port != faddr->sin_port && 1887 faddr->sin_port != 0)) { 1888 #ifdef DEBUG 1889 dprintso(so, 0, 1890 ("sockfs: T_UNITDATA_IND mismatch: %s", 1891 pr_addr(so->so_family, 1892 (struct sockaddr *)addr, addrlen))); 1893 dprintso(so, 0, (" - %s\n", 1894 pr_addr(so->so_family, sti->sti_faddr_sa, 1895 (t_uscalar_t)sti->sti_faddr_len))); 1896 #endif /* DEBUG */ 1897 mutex_exit(&so->so_lock); 1898 freemsg(mp); 1899 return (NULL); 1900 } 1901 mutex_exit(&so->so_lock); 1902 } else if (so->so_family == AF_INET6) { 1903 /* 1904 * For AF_INET6 we allow wildcarding both sin6_addr 1905 * and sin6_port. 1906 */ 1907 struct sockaddr_in6 *faddr6, *sin6; 1908 static struct in6_addr zeroes; /* inits to all zeros */ 1909 1910 /* Prevent sti_faddr_sa from changing while accessed */ 1911 mutex_enter(&so->so_lock); 1912 ASSERT(sti->sti_faddr_len == 1913 (socklen_t)sizeof (struct sockaddr_in6)); 1914 faddr6 = (struct sockaddr_in6 *)sti->sti_faddr_sa; 1915 sin6 = (struct sockaddr_in6 *)addr; 1916 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1917 if (addrlen != 1918 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1919 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1920 &faddr6->sin6_addr) && 1921 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1922 (so->so_type != SOCK_RAW && 1923 sin6->sin6_port != faddr6->sin6_port && 1924 faddr6->sin6_port != 0)) { 1925 #ifdef DEBUG 1926 dprintso(so, 0, 1927 ("sockfs: T_UNITDATA_IND mismatch: %s", 1928 pr_addr(so->so_family, 1929 (struct sockaddr *)addr, addrlen))); 1930 dprintso(so, 0, (" - %s\n", 1931 pr_addr(so->so_family, sti->sti_faddr_sa, 1932 (t_uscalar_t)sti->sti_faddr_len))); 1933 #endif /* DEBUG */ 1934 mutex_exit(&so->so_lock); 1935 freemsg(mp); 1936 return (NULL); 1937 } 1938 mutex_exit(&so->so_lock); 1939 } else if (so->so_family == AF_UNIX && 1940 msgdsize(mp->b_cont) == 0 && 1941 tudi->OPT_length != 0) { 1942 /* 1943 * Attempt to extract AF_UNIX 1944 * SO_UNIX_CLOSE indication from options. 1945 */ 1946 void *opt; 1947 t_uscalar_t optlen = tudi->OPT_length; 1948 1949 opt = sogetoff(mp, tudi->OPT_offset, 1950 optlen, __TPI_ALIGN_SIZE); 1951 if (opt == NULL) { 1952 /* The len/off falls outside mp */ 1953 freemsg(mp); 1954 mutex_enter(&so->so_lock); 1955 soseterror(so, EPROTO); 1956 mutex_exit(&so->so_lock); 1957 zcmn_err(getzoneid(), CE_WARN, 1958 "sockfs: T_unidata_ind with invalid " 1959 "optlen/offset %u/%d\n", 1960 optlen, tudi->OPT_offset); 1961 return (NULL); 1962 } 1963 /* 1964 * If we received a unix close indication mark the 1965 * socket and discard this message. 1966 */ 1967 if (so_getopt_unix_close(opt, optlen)) { 1968 mutex_enter(&so->so_lock); 1969 sobreakconn(so, ECONNRESET); 1970 mutex_exit(&so->so_lock); 1971 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1972 freemsg(mp); 1973 *pollwakeups = POLLIN | POLLRDNORM; 1974 *allmsgsigs = S_INPUT | S_RDNORM; 1975 *wakeups = RSLEEP; 1976 return (NULL); 1977 } 1978 } 1979 *allmsgsigs = S_INPUT | S_RDNORM; 1980 *pollwakeups = POLLIN | POLLRDNORM; 1981 *wakeups = RSLEEP; 1982 return (mp); 1983 } 1984 1985 case T_OPTDATA_IND: { 1986 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1987 1988 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1989 zcmn_err(getzoneid(), CE_WARN, 1990 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1991 (ptrdiff_t)(MBLKL(mp))); 1992 freemsg(mp); 1993 return (NULL); 1994 } 1995 /* 1996 * Allow zero-length messages carrying options. 1997 * This is used when carrying the SO_UNIX_CLOSE option. 1998 */ 1999 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 2000 tdi->OPT_length != 0) { 2001 /* 2002 * Attempt to extract AF_UNIX close indication 2003 * from the options. Ignore any other options - 2004 * those are handled once the message is removed 2005 * from the queue. 2006 * The close indication message should not carry data. 2007 */ 2008 void *opt; 2009 t_uscalar_t optlen = tdi->OPT_length; 2010 2011 opt = sogetoff(mp, tdi->OPT_offset, 2012 optlen, __TPI_ALIGN_SIZE); 2013 if (opt == NULL) { 2014 /* The len/off falls outside mp */ 2015 freemsg(mp); 2016 mutex_enter(&so->so_lock); 2017 soseterror(so, EPROTO); 2018 mutex_exit(&so->so_lock); 2019 zcmn_err(getzoneid(), CE_WARN, 2020 "sockfs: T_optdata_ind with invalid " 2021 "optlen/offset %u/%d\n", 2022 optlen, tdi->OPT_offset); 2023 return (NULL); 2024 } 2025 /* 2026 * If we received a close indication mark the 2027 * socket and discard this message. 2028 */ 2029 if (so_getopt_unix_close(opt, optlen)) { 2030 mutex_enter(&so->so_lock); 2031 socantsendmore(so); 2032 sti->sti_faddr_valid = 0; 2033 mutex_exit(&so->so_lock); 2034 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2035 freemsg(mp); 2036 return (NULL); 2037 } 2038 } 2039 *allmsgsigs = S_INPUT | S_RDNORM; 2040 *pollwakeups = POLLIN | POLLRDNORM; 2041 *wakeups = RSLEEP; 2042 return (mp); 2043 } 2044 2045 case T_EXDATA_IND: { 2046 mblk_t *mctl, *mdata; 2047 mblk_t *lbp; 2048 union T_primitives *tprp; 2049 struct stdata *stp; 2050 queue_t *qp; 2051 2052 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 2053 zcmn_err(getzoneid(), CE_WARN, 2054 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 2055 (ptrdiff_t)(MBLKL(mp))); 2056 freemsg(mp); 2057 return (NULL); 2058 } 2059 /* 2060 * Ignore zero-length T_EXDATA_IND messages. These might be 2061 * generated by some transports. 2062 * 2063 * This is needed to prevent read (which skips the M_PROTO 2064 * part) to unexpectedly return 0 (or return EWOULDBLOCK 2065 * on a non-blocking socket after select/poll has indicated 2066 * that data is available). 2067 */ 2068 dprintso(so, 1, 2069 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 2070 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2071 pr_state(so->so_state, so->so_mode))); 2072 2073 if (msgdsize(mp->b_cont) == 0) { 2074 dprintso(so, 0, 2075 ("strsock_proto: zero length T_EXDATA_IND\n")); 2076 freemsg(mp); 2077 return (NULL); 2078 } 2079 2080 /* 2081 * Split into the T_EXDATA_IND and the M_DATA part. 2082 * We process these three pieces separately: 2083 * signal generation 2084 * handling T_EXDATA_IND 2085 * handling M_DATA component 2086 */ 2087 mctl = mp; 2088 mdata = mctl->b_cont; 2089 mctl->b_cont = NULL; 2090 mutex_enter(&so->so_lock); 2091 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2092 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2093 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2094 2095 stp = vp->v_stream; 2096 ASSERT(stp != NULL); 2097 qp = _RD(stp->sd_wrq); 2098 2099 mutex_enter(QLOCK(qp)); 2100 lbp = qp->q_last; 2101 2102 /* 2103 * We want to avoid queueing up a string of T_EXDATA_IND 2104 * messages with no intervening data messages at the stream 2105 * head. These messages contribute to the total message 2106 * count. Eventually this can lead to STREAMS flow contol 2107 * and also cause TCP to advertise a zero window condition 2108 * to the peer. This can happen in the degenerate case where 2109 * the sender and receiver exchange only OOB data. The sender 2110 * only sends messages with MSG_OOB flag and the receiver 2111 * receives only MSG_OOB messages and does not use SO_OOBINLINE. 2112 * An example of this scenario has been reported in applications 2113 * that use OOB data to exchange heart beats. Flow control 2114 * relief will never happen if the application only reads OOB 2115 * data which is done directly by sorecvoob() and the 2116 * T_EXDATA_IND messages at the streamhead won't be consumed. 2117 * Note that there is no correctness issue in compressing the 2118 * string of T_EXDATA_IND messages into a single T_EXDATA_IND 2119 * message. A single read that does not specify MSG_OOB will 2120 * read across all the marks in a loop in sotpi_recvmsg(). 2121 * Each mark is individually distinguishable only if the 2122 * T_EXDATA_IND messages are separated by data messages. 2123 */ 2124 if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) { 2125 tprp = (union T_primitives *)lbp->b_rptr; 2126 if ((tprp->type == T_EXDATA_IND) && 2127 !(so->so_options & SO_OOBINLINE)) { 2128 2129 /* 2130 * free the new M_PROTO message 2131 */ 2132 freemsg(mctl); 2133 2134 /* 2135 * adjust the OOB count and OOB signal count 2136 * just incremented for the new OOB data. 2137 */ 2138 sti->sti_oobcnt--; 2139 sti->sti_oobsigcnt--; 2140 mutex_exit(QLOCK(qp)); 2141 mutex_exit(&so->so_lock); 2142 return (NULL); 2143 } 2144 } 2145 mutex_exit(QLOCK(qp)); 2146 2147 /* 2148 * Pass the T_EXDATA_IND and the M_DATA back separately 2149 * by using b_next linkage. (The stream head will queue any 2150 * b_next linked messages separately.) This is needed 2151 * since MSGMARK applies to the last by of the message 2152 * hence we can not have any M_DATA component attached 2153 * to the marked T_EXDATA_IND. Note that the stream head 2154 * will not consolidate M_DATA messages onto an MSGMARK'ed 2155 * message in order to preserve the constraint that 2156 * the T_EXDATA_IND always is a separate message. 2157 */ 2158 ASSERT(mctl != NULL); 2159 mctl->b_next = mdata; 2160 mp = mctl; 2161 #ifdef DEBUG 2162 if (mdata == NULL) { 2163 dprintso(so, 1, 2164 ("after outofline T_EXDATA_IND(%p): " 2165 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2166 (void *)vp, sti->sti_oobsigcnt, 2167 sti->sti_oobcnt, *pollwakeups, *allmsgsigs, 2168 pr_state(so->so_state, so->so_mode))); 2169 } else { 2170 dprintso(so, 1, 2171 ("after inline T_EXDATA_IND(%p): " 2172 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2173 (void *)vp, sti->sti_oobsigcnt, 2174 sti->sti_oobcnt, *pollwakeups, *allmsgsigs, 2175 pr_state(so->so_state, so->so_mode))); 2176 } 2177 #endif /* DEBUG */ 2178 mutex_exit(&so->so_lock); 2179 *wakeups = RSLEEP; 2180 return (mp); 2181 } 2182 2183 case T_CONN_CON: { 2184 struct T_conn_con *conn_con; 2185 void *addr; 2186 t_uscalar_t addrlen; 2187 2188 /* 2189 * Verify the state, update the state to ISCONNECTED, 2190 * record the potentially new address in the message, 2191 * and drop the message. 2192 */ 2193 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2194 zcmn_err(getzoneid(), CE_WARN, 2195 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2196 (ptrdiff_t)(MBLKL(mp))); 2197 freemsg(mp); 2198 return (NULL); 2199 } 2200 2201 mutex_enter(&so->so_lock); 2202 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2203 SS_ISCONNECTING) { 2204 mutex_exit(&so->so_lock); 2205 dprintso(so, 1, 2206 ("T_CONN_CON: state %x\n", so->so_state)); 2207 freemsg(mp); 2208 return (NULL); 2209 } 2210 2211 conn_con = &tpr->conn_con; 2212 addrlen = conn_con->RES_length; 2213 /* 2214 * Allow the address to be of different size than sent down 2215 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2216 * For AF_UNIX require the identical length. 2217 */ 2218 if (so->so_family == AF_UNIX ? 2219 addrlen != (t_uscalar_t)sizeof (sti->sti_ux_laddr) : 2220 addrlen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2221 zcmn_err(getzoneid(), CE_WARN, 2222 "sockfs: T_conn_con with different " 2223 "length %u/%d\n", 2224 addrlen, conn_con->RES_length); 2225 soisdisconnected(so, EPROTO); 2226 sti->sti_laddr_valid = 0; 2227 sti->sti_faddr_valid = 0; 2228 mutex_exit(&so->so_lock); 2229 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2230 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2231 strseteof(SOTOV(so), 1); 2232 freemsg(mp); 2233 /* 2234 * strseteof takes care of read side wakeups, 2235 * pollwakeups, and signals. 2236 */ 2237 *wakeups = WSLEEP; 2238 *allmsgsigs = S_OUTPUT; 2239 *pollwakeups = POLLOUT; 2240 return (NULL); 2241 } 2242 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2243 if (addr == NULL) { 2244 zcmn_err(getzoneid(), CE_WARN, 2245 "sockfs: T_conn_con with invalid " 2246 "addrlen/offset %u/%d\n", 2247 addrlen, conn_con->RES_offset); 2248 mutex_exit(&so->so_lock); 2249 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2250 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2251 strseteof(SOTOV(so), 1); 2252 freemsg(mp); 2253 /* 2254 * strseteof takes care of read side wakeups, 2255 * pollwakeups, and signals. 2256 */ 2257 *wakeups = WSLEEP; 2258 *allmsgsigs = S_OUTPUT; 2259 *pollwakeups = POLLOUT; 2260 return (NULL); 2261 } 2262 2263 /* 2264 * Save for getpeername. 2265 */ 2266 if (so->so_family != AF_UNIX) { 2267 sti->sti_faddr_len = (socklen_t)addrlen; 2268 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2269 bcopy(addr, sti->sti_faddr_sa, addrlen); 2270 sti->sti_faddr_valid = 1; 2271 } 2272 2273 if (so->so_peercred != NULL) 2274 crfree(so->so_peercred); 2275 so->so_peercred = msg_getcred(mp, &so->so_cpid); 2276 if (so->so_peercred != NULL) 2277 crhold(so->so_peercred); 2278 2279 /* Wakeup anybody sleeping in sowaitconnected */ 2280 soisconnected(so); 2281 mutex_exit(&so->so_lock); 2282 2283 /* 2284 * The socket is now available for sending data. 2285 */ 2286 *wakeups = WSLEEP; 2287 *allmsgsigs = S_OUTPUT; 2288 *pollwakeups = POLLOUT; 2289 freemsg(mp); 2290 return (NULL); 2291 } 2292 2293 /* 2294 * Extra processing in case of an SSL proxy, before queuing or 2295 * forwarding to the fallback endpoint 2296 */ 2297 case T_SSL_PROXY_CONN_IND: 2298 case T_CONN_IND: 2299 /* 2300 * Verify the min size and queue the message on 2301 * the sti_conn_ind_head/tail list. 2302 */ 2303 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2304 zcmn_err(getzoneid(), CE_WARN, 2305 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2306 (ptrdiff_t)(MBLKL(mp))); 2307 freemsg(mp); 2308 return (NULL); 2309 } 2310 2311 if (audit_active) 2312 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2313 if (!(so->so_state & SS_ACCEPTCONN)) { 2314 zcmn_err(getzoneid(), CE_WARN, 2315 "sockfs: T_conn_ind on non-listening socket\n"); 2316 freemsg(mp); 2317 return (NULL); 2318 } 2319 2320 if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) { 2321 /* No context: need to fall back */ 2322 struct sonode *fbso; 2323 stdata_t *fbstp; 2324 2325 tpr->type = T_CONN_IND; 2326 2327 fbso = kssl_find_fallback(sti->sti_kssl_ent); 2328 2329 /* 2330 * No fallback: the remote will timeout and 2331 * disconnect. 2332 */ 2333 if (fbso == NULL) { 2334 freemsg(mp); 2335 return (NULL); 2336 } 2337 fbstp = SOTOV(fbso)->v_stream; 2338 qreply(fbstp->sd_wrq->q_next, mp); 2339 return (NULL); 2340 } 2341 soqueueconnind(so, mp); 2342 *allmsgsigs = S_INPUT | S_RDNORM; 2343 *pollwakeups = POLLIN | POLLRDNORM; 2344 *wakeups = RSLEEP; 2345 return (NULL); 2346 2347 case T_ORDREL_IND: 2348 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2349 zcmn_err(getzoneid(), CE_WARN, 2350 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2351 (ptrdiff_t)(MBLKL(mp))); 2352 freemsg(mp); 2353 return (NULL); 2354 } 2355 2356 /* 2357 * Some providers send this when not fully connected. 2358 * SunLink X.25 needs to retrieve disconnect reason after 2359 * disconnect for compatibility. It uses T_ORDREL_IND 2360 * instead of T_DISCON_IND so that it may use the 2361 * endpoint after a connect failure to retrieve the 2362 * reason using an ioctl. Thus we explicitly clear 2363 * SS_ISCONNECTING here for SunLink X.25. 2364 * This is a needed TPI violation. 2365 */ 2366 mutex_enter(&so->so_lock); 2367 so->so_state &= ~SS_ISCONNECTING; 2368 socantrcvmore(so); 2369 mutex_exit(&so->so_lock); 2370 strseteof(SOTOV(so), 1); 2371 /* 2372 * strseteof takes care of read side wakeups, 2373 * pollwakeups, and signals. 2374 */ 2375 freemsg(mp); 2376 return (NULL); 2377 2378 case T_DISCON_IND: 2379 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2380 zcmn_err(getzoneid(), CE_WARN, 2381 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2382 (ptrdiff_t)(MBLKL(mp))); 2383 freemsg(mp); 2384 return (NULL); 2385 } 2386 if (so->so_state & SS_ACCEPTCONN) { 2387 /* 2388 * This is a listener. Look for a queued T_CONN_IND 2389 * with a matching sequence number and remove it 2390 * from the list. 2391 * It is normal to not find the sequence number since 2392 * the soaccept might have already dequeued it 2393 * (in which case the T_CONN_RES will fail with 2394 * TBADSEQ). 2395 */ 2396 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2397 freemsg(mp); 2398 return (0); 2399 } 2400 2401 /* 2402 * Not a listener 2403 * 2404 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2405 * Such a discon_ind appears when the peer has first done 2406 * a shutdown() followed by a close() in which case we just 2407 * want to record socantsendmore. 2408 * In this case sockfs first receives a T_ORDREL_IND followed 2409 * by a T_DISCON_IND. 2410 * Note that for other transports (e.g. TCP) we need to handle 2411 * the discon_ind in this case since it signals an error. 2412 */ 2413 mutex_enter(&so->so_lock); 2414 if ((so->so_state & SS_CANTRCVMORE) && 2415 (so->so_family == AF_UNIX)) { 2416 socantsendmore(so); 2417 sti->sti_faddr_valid = 0; 2418 mutex_exit(&so->so_lock); 2419 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2420 dprintso(so, 1, 2421 ("T_DISCON_IND: error %d\n", so->so_error)); 2422 freemsg(mp); 2423 /* 2424 * Set these variables for caller to process them. 2425 * For the else part where T_DISCON_IND is processed, 2426 * this will be done in the function being called 2427 * (strsock_discon_ind()) 2428 */ 2429 *wakeups = WSLEEP; 2430 *allmsgsigs = S_OUTPUT; 2431 *pollwakeups = POLLOUT; 2432 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2433 /* 2434 * Deferred processing of T_DISCON_IND 2435 */ 2436 so_save_discon_ind(so, mp, strsock_discon_ind); 2437 mutex_exit(&so->so_lock); 2438 } else { 2439 /* 2440 * Process T_DISCON_IND now 2441 */ 2442 (void) strsock_discon_ind(so, mp); 2443 mutex_exit(&so->so_lock); 2444 } 2445 return (NULL); 2446 2447 case T_UDERROR_IND: { 2448 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2449 void *addr; 2450 t_uscalar_t addrlen; 2451 int error; 2452 2453 dprintso(so, 0, 2454 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2455 2456 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2457 zcmn_err(getzoneid(), CE_WARN, 2458 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2459 (ptrdiff_t)(MBLKL(mp))); 2460 freemsg(mp); 2461 return (NULL); 2462 } 2463 /* Ignore on connection-oriented transports */ 2464 if (so->so_mode & SM_CONNREQUIRED) { 2465 freemsg(mp); 2466 eprintsoline(so, 0); 2467 zcmn_err(getzoneid(), CE_WARN, 2468 "sockfs: T_uderror_ind on connection-oriented " 2469 "transport\n"); 2470 return (NULL); 2471 } 2472 addrlen = tudi->DEST_length; 2473 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2474 if (addr == NULL) { 2475 zcmn_err(getzoneid(), CE_WARN, 2476 "sockfs: T_uderror_ind with invalid " 2477 "addrlen/offset %u/%d\n", 2478 addrlen, tudi->DEST_offset); 2479 freemsg(mp); 2480 return (NULL); 2481 } 2482 2483 /* Verify source address for connected socket. */ 2484 mutex_enter(&so->so_lock); 2485 if (so->so_state & SS_ISCONNECTED) { 2486 void *faddr; 2487 t_uscalar_t faddr_len; 2488 boolean_t match = B_FALSE; 2489 2490 switch (so->so_family) { 2491 case AF_INET: { 2492 /* Compare just IP address and port */ 2493 struct sockaddr_in *sin1, *sin2; 2494 2495 sin1 = (struct sockaddr_in *)sti->sti_faddr_sa; 2496 sin2 = (struct sockaddr_in *)addr; 2497 if (addrlen == sizeof (struct sockaddr_in) && 2498 sin1->sin_port == sin2->sin_port && 2499 sin1->sin_addr.s_addr == 2500 sin2->sin_addr.s_addr) 2501 match = B_TRUE; 2502 break; 2503 } 2504 case AF_INET6: { 2505 /* Compare just IP address and port. Not flow */ 2506 struct sockaddr_in6 *sin1, *sin2; 2507 2508 sin1 = (struct sockaddr_in6 *)sti->sti_faddr_sa; 2509 sin2 = (struct sockaddr_in6 *)addr; 2510 if (addrlen == sizeof (struct sockaddr_in6) && 2511 sin1->sin6_port == sin2->sin6_port && 2512 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2513 &sin2->sin6_addr)) 2514 match = B_TRUE; 2515 break; 2516 } 2517 case AF_UNIX: 2518 faddr = &sti->sti_ux_faddr; 2519 faddr_len = 2520 (t_uscalar_t)sizeof (sti->sti_ux_faddr); 2521 if (faddr_len == addrlen && 2522 bcmp(addr, faddr, addrlen) == 0) 2523 match = B_TRUE; 2524 break; 2525 default: 2526 faddr = sti->sti_faddr_sa; 2527 faddr_len = (t_uscalar_t)sti->sti_faddr_len; 2528 if (faddr_len == addrlen && 2529 bcmp(addr, faddr, addrlen) == 0) 2530 match = B_TRUE; 2531 break; 2532 } 2533 2534 if (!match) { 2535 #ifdef DEBUG 2536 dprintso(so, 0, 2537 ("sockfs: T_UDERR_IND mismatch: %s - ", 2538 pr_addr(so->so_family, 2539 (struct sockaddr *)addr, addrlen))); 2540 dprintso(so, 0, ("%s\n", 2541 pr_addr(so->so_family, sti->sti_faddr_sa, 2542 sti->sti_faddr_len))); 2543 #endif /* DEBUG */ 2544 mutex_exit(&so->so_lock); 2545 freemsg(mp); 2546 return (NULL); 2547 } 2548 /* 2549 * Make the write error nonpersistent. If the error 2550 * is zero we use ECONNRESET. 2551 * This assumes that the name space for ERROR_type 2552 * is the errno name space. 2553 */ 2554 if (tudi->ERROR_type != 0) 2555 error = tudi->ERROR_type; 2556 else 2557 error = ECONNRESET; 2558 2559 soseterror(so, error); 2560 mutex_exit(&so->so_lock); 2561 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2562 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2563 *wakeups = RSLEEP | WSLEEP; 2564 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2565 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2566 freemsg(mp); 2567 return (NULL); 2568 } 2569 /* 2570 * If the application asked for delayed errors 2571 * record the T_UDERROR_IND sti_eaddr_mp and the reason in 2572 * sti_delayed_error for delayed error posting. If the reason 2573 * is zero use ECONNRESET. 2574 * Note that delayed error indications do not make sense for 2575 * AF_UNIX sockets since sendto checks that the destination 2576 * address is valid at the time of the sendto. 2577 */ 2578 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2579 mutex_exit(&so->so_lock); 2580 freemsg(mp); 2581 return (NULL); 2582 } 2583 if (sti->sti_eaddr_mp != NULL) 2584 freemsg(sti->sti_eaddr_mp); 2585 2586 sti->sti_eaddr_mp = mp; 2587 if (tudi->ERROR_type != 0) 2588 error = tudi->ERROR_type; 2589 else 2590 error = ECONNRESET; 2591 sti->sti_delayed_error = (ushort_t)error; 2592 mutex_exit(&so->so_lock); 2593 return (NULL); 2594 } 2595 2596 case T_ERROR_ACK: 2597 dprintso(so, 0, 2598 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2599 tpr->error_ack.ERROR_prim, 2600 tpr->error_ack.TLI_error, 2601 tpr->error_ack.UNIX_error)); 2602 2603 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2604 zcmn_err(getzoneid(), CE_WARN, 2605 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2606 (ptrdiff_t)(MBLKL(mp))); 2607 freemsg(mp); 2608 return (NULL); 2609 } 2610 /* 2611 * Check if we were waiting for the async message 2612 */ 2613 mutex_enter(&so->so_lock); 2614 if ((so->so_flag & SOASYNC_UNBIND) && 2615 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2616 so_unlock_single(so, SOASYNC_UNBIND); 2617 mutex_exit(&so->so_lock); 2618 freemsg(mp); 2619 return (NULL); 2620 } 2621 mutex_exit(&so->so_lock); 2622 soqueueack(so, mp); 2623 return (NULL); 2624 2625 case T_OK_ACK: 2626 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2627 zcmn_err(getzoneid(), CE_WARN, 2628 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2629 (ptrdiff_t)(MBLKL(mp))); 2630 freemsg(mp); 2631 return (NULL); 2632 } 2633 /* 2634 * Check if we were waiting for the async message 2635 */ 2636 mutex_enter(&so->so_lock); 2637 if ((so->so_flag & SOASYNC_UNBIND) && 2638 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2639 dprintso(so, 1, 2640 ("strsock_proto: T_OK_ACK async unbind\n")); 2641 so_unlock_single(so, SOASYNC_UNBIND); 2642 mutex_exit(&so->so_lock); 2643 freemsg(mp); 2644 return (NULL); 2645 } 2646 mutex_exit(&so->so_lock); 2647 soqueueack(so, mp); 2648 return (NULL); 2649 2650 case T_INFO_ACK: 2651 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2652 zcmn_err(getzoneid(), CE_WARN, 2653 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2654 (ptrdiff_t)(MBLKL(mp))); 2655 freemsg(mp); 2656 return (NULL); 2657 } 2658 soqueueack(so, mp); 2659 return (NULL); 2660 2661 case T_CAPABILITY_ACK: 2662 /* 2663 * A T_capability_ack need only be large enough to hold 2664 * the PRIM_type and CAP_bits1 fields; checking for anything 2665 * larger might reject a correct response from an older 2666 * provider. 2667 */ 2668 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2669 zcmn_err(getzoneid(), CE_WARN, 2670 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2671 (ptrdiff_t)(MBLKL(mp))); 2672 freemsg(mp); 2673 return (NULL); 2674 } 2675 soqueueack(so, mp); 2676 return (NULL); 2677 2678 case T_BIND_ACK: 2679 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2680 zcmn_err(getzoneid(), CE_WARN, 2681 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2682 (ptrdiff_t)(MBLKL(mp))); 2683 freemsg(mp); 2684 return (NULL); 2685 } 2686 soqueueack(so, mp); 2687 return (NULL); 2688 2689 case T_OPTMGMT_ACK: 2690 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2691 zcmn_err(getzoneid(), CE_WARN, 2692 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2693 (ptrdiff_t)(MBLKL(mp))); 2694 freemsg(mp); 2695 return (NULL); 2696 } 2697 soqueueack(so, mp); 2698 return (NULL); 2699 default: 2700 #ifdef DEBUG 2701 zcmn_err(getzoneid(), CE_WARN, 2702 "sockfs: unknown TPI primitive %d received\n", 2703 tpr->type); 2704 #endif /* DEBUG */ 2705 freemsg(mp); 2706 return (NULL); 2707 } 2708 } 2709 2710 /* 2711 * This routine is registered with the stream head to receive other 2712 * (non-data, and non-proto) messages. 2713 * 2714 * Returns NULL if the message was consumed. 2715 * Returns an mblk to make that mblk be processed by the stream head. 2716 * 2717 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2718 * *pollwakeups) for the stream head to take action on. 2719 */ 2720 static mblk_t * 2721 strsock_misc(vnode_t *vp, mblk_t *mp, 2722 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2723 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2724 { 2725 struct sonode *so; 2726 sotpi_info_t *sti; 2727 2728 so = VTOSO(vp); 2729 sti = SOTOTPI(so); 2730 2731 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2732 (void *)vp, (void *)mp, DB_TYPE(mp))); 2733 2734 /* Set default return values */ 2735 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2736 2737 switch (DB_TYPE(mp)) { 2738 case M_PCSIG: 2739 /* 2740 * This assumes that an M_PCSIG for the urgent data arrives 2741 * before the corresponding T_EXDATA_IND. 2742 * 2743 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2744 * awoken before the urgent data shows up. 2745 * For OOBINLINE this can result in select returning 2746 * only exceptions as opposed to except|read. 2747 */ 2748 if (*mp->b_rptr == SIGURG) { 2749 mutex_enter(&so->so_lock); 2750 dprintso(so, 1, 2751 ("SIGURG(%p): counts %d/%d state %s\n", 2752 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2753 pr_state(so->so_state, so->so_mode))); 2754 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2755 dprintso(so, 1, 2756 ("after SIGURG(%p): counts %d/%d " 2757 " poll 0x%x sig 0x%x state %s\n", 2758 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2759 *pollwakeups, *allmsgsigs, 2760 pr_state(so->so_state, so->so_mode))); 2761 mutex_exit(&so->so_lock); 2762 } 2763 freemsg(mp); 2764 return (NULL); 2765 2766 case M_SIG: 2767 case M_HANGUP: 2768 case M_UNHANGUP: 2769 case M_ERROR: 2770 /* M_ERRORs etc are ignored */ 2771 freemsg(mp); 2772 return (NULL); 2773 2774 case M_FLUSH: 2775 /* 2776 * Do not flush read queue. If the M_FLUSH 2777 * arrives because of an impending T_discon_ind 2778 * we still have to keep any queued data - this is part of 2779 * socket semantics. 2780 */ 2781 if (*mp->b_rptr & FLUSHW) { 2782 *mp->b_rptr &= ~FLUSHR; 2783 return (mp); 2784 } 2785 freemsg(mp); 2786 return (NULL); 2787 2788 default: 2789 return (mp); 2790 } 2791 } 2792 2793 2794 /* Register to receive signals for certain events */ 2795 int 2796 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2797 { 2798 struct strsigset ss; 2799 int32_t rval; 2800 2801 /* 2802 * Note that SOLOCKED will be set except for the call from soaccept(). 2803 */ 2804 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2805 ss.ss_pid = pgrp; 2806 ss.ss_events = events; 2807 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2808 &rval)); 2809 } 2810 2811 2812 /* Register for events matching the SS_ASYNC flag */ 2813 int 2814 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2815 { 2816 int events = so->so_state & SS_ASYNC ? 2817 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2818 S_RDBAND | S_BANDURG; 2819 2820 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2821 } 2822 2823 2824 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2825 int 2826 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2827 { 2828 ASSERT(mutex_owned(&so->so_lock)); 2829 if (so->so_pgrp != 0) { 2830 int error; 2831 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2832 S_RDBAND | S_BANDURG : /* New sigs */ 2833 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2834 2835 so_lock_single(so); 2836 mutex_exit(&so->so_lock); 2837 2838 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2839 2840 mutex_enter(&so->so_lock); 2841 so_unlock_single(so, SOLOCKED); 2842 if (error) 2843 return (error); 2844 } 2845 so->so_state ^= SS_ASYNC; 2846 return (0); 2847 } 2848 2849 /* 2850 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2851 * any existing one. If passed zero, just clear the existing one. 2852 */ 2853 int 2854 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2855 { 2856 int events = so->so_state & SS_ASYNC ? 2857 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2858 S_RDBAND | S_BANDURG; 2859 int error; 2860 2861 ASSERT(mutex_owned(&so->so_lock)); 2862 2863 /* 2864 * Change socket process (group). 2865 * 2866 * strioctl (via so_set_asyncsigs) will perform permission check and 2867 * also keep a PID_HOLD to prevent the pid from being reused. 2868 */ 2869 so_lock_single(so); 2870 mutex_exit(&so->so_lock); 2871 2872 if (pgrp != 0) { 2873 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2874 pgrp, events)); 2875 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2876 if (error != 0) { 2877 eprintsoline(so, error); 2878 goto bad; 2879 } 2880 } 2881 /* Remove the previously registered process/group */ 2882 if (so->so_pgrp != 0) { 2883 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2884 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2885 if (error != 0) { 2886 eprintsoline(so, error); 2887 error = 0; 2888 } 2889 } 2890 mutex_enter(&so->so_lock); 2891 so_unlock_single(so, SOLOCKED); 2892 so->so_pgrp = pgrp; 2893 return (0); 2894 bad: 2895 mutex_enter(&so->so_lock); 2896 so_unlock_single(so, SOLOCKED); 2897 return (error); 2898 } 2899 2900 /* 2901 * Wrapper for getmsg. If the socket has been converted to a stream 2902 * pass the request to the stream head. 2903 */ 2904 int 2905 sock_getmsg( 2906 struct vnode *vp, 2907 struct strbuf *mctl, 2908 struct strbuf *mdata, 2909 uchar_t *prip, 2910 int *flagsp, 2911 int fmode, 2912 rval_t *rvp 2913 ) 2914 { 2915 struct sonode *so; 2916 2917 ASSERT(vp->v_type == VSOCK); 2918 /* 2919 * Use the stream head to find the real socket vnode. 2920 * This is needed when namefs sits above sockfs. Some 2921 * sockets (like SCTP) are not streams. 2922 */ 2923 if (!vp->v_stream) { 2924 return (ENOSTR); 2925 } 2926 ASSERT(vp->v_stream->sd_vnode); 2927 vp = vp->v_stream->sd_vnode; 2928 ASSERT(vn_matchops(vp, socket_vnodeops)); 2929 so = VTOSO(vp); 2930 2931 dprintso(so, 1, ("sock_getmsg(%p) %s\n", 2932 (void *)so, pr_state(so->so_state, so->so_mode))); 2933 2934 if (so->so_version == SOV_STREAM) { 2935 /* The imaginary "sockmod" has been popped - act as a stream */ 2936 return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp)); 2937 } 2938 eprintsoline(so, ENOSTR); 2939 return (ENOSTR); 2940 } 2941 2942 /* 2943 * Wrapper for putmsg. If the socket has been converted to a stream 2944 * pass the request to the stream head. 2945 * 2946 * Note that a while a regular socket (SOV_SOCKSTREAM) does support the 2947 * streams ioctl set it does not support putmsg and getmsg. 2948 * Allowing putmsg would prevent sockfs from tracking the state of 2949 * the socket/transport and would also invalidate the locking in sockfs. 2950 */ 2951 int 2952 sock_putmsg( 2953 struct vnode *vp, 2954 struct strbuf *mctl, 2955 struct strbuf *mdata, 2956 uchar_t pri, 2957 int flag, 2958 int fmode 2959 ) 2960 { 2961 struct sonode *so; 2962 2963 ASSERT(vp->v_type == VSOCK); 2964 /* 2965 * Use the stream head to find the real socket vnode. 2966 * This is needed when namefs sits above sockfs. 2967 */ 2968 if (!vp->v_stream) { 2969 return (ENOSTR); 2970 } 2971 ASSERT(vp->v_stream->sd_vnode); 2972 vp = vp->v_stream->sd_vnode; 2973 ASSERT(vn_matchops(vp, socket_vnodeops)); 2974 so = VTOSO(vp); 2975 2976 dprintso(so, 1, ("sock_putmsg(%p) %s\n", 2977 (void *)so, pr_state(so->so_state, so->so_mode))); 2978 2979 if (so->so_version == SOV_STREAM) { 2980 /* The imaginary "sockmod" has been popped - act as a stream */ 2981 return (strputmsg(vp, mctl, mdata, pri, flag, fmode)); 2982 } 2983 eprintsoline(so, ENOSTR); 2984 return (ENOSTR); 2985 } 2986 2987 /* 2988 * Special function called only from f_getfl(). 2989 * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0. 2990 * No locks are acquired here, so it is safe to use while uf_lock is held. 2991 * This exists solely for BSD fcntl() FASYNC compatibility. 2992 */ 2993 int 2994 sock_getfasync(vnode_t *vp) 2995 { 2996 struct sonode *so; 2997 2998 ASSERT(vp->v_type == VSOCK); 2999 /* 3000 * For stream model, v_stream is used; For non-stream, v_stream always 3001 * equals NULL 3002 */ 3003 if (vp->v_stream != NULL) 3004 so = VTOSO(vp->v_stream->sd_vnode); 3005 else 3006 so = VTOSO(vp); 3007 3008 if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC)) 3009 return (0); 3010 3011 return (FASYNC); 3012 } 3013 3014 /* 3015 * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable 3016 * transport driver/module with an mblk_t chain. 3017 * 3018 * Note, we in-line putq() for the fast-path cases of q is empty, q_last and 3019 * bp are of type M_DATA. All other cases we call putq(). 3020 * 3021 * On success a zero will be return, else an errno will be returned. 3022 */ 3023 int 3024 sodput(sodirect_t *sodp, mblk_t *bp) 3025 { 3026 queue_t *q = sodp->sod_q; 3027 struct stdata *stp = (struct stdata *)q->q_ptr; 3028 mblk_t *nbp; 3029 mblk_t *last = q->q_last; 3030 int bytecnt = 0; 3031 int mblkcnt = 0; 3032 3033 3034 ASSERT(MUTEX_HELD(sodp->sod_lockp)); 3035 3036 if (stp->sd_flag == STREOF) { 3037 do { 3038 if ((nbp = bp->b_next) != NULL) 3039 bp->b_next = NULL; 3040 freemsg(bp); 3041 } while ((bp = nbp) != NULL); 3042 3043 return (0); 3044 } 3045 3046 mutex_enter(QLOCK(q)); 3047 if (q->q_first == NULL) { 3048 /* Q empty, really fast fast-path */ 3049 bp->b_prev = NULL; 3050 bp->b_next = NULL; 3051 q->q_first = bp; 3052 q->q_last = bp; 3053 3054 } else if (last->b_datap->db_type == M_DATA && 3055 bp->b_datap->db_type == M_DATA) { 3056 /* 3057 * Last mblk_t chain and bp are both type M_DATA so 3058 * in-line putq() here, if the DBLK_UIOA state match 3059 * add bp to the end of the current last chain, else 3060 * start a new last chain with bp. 3061 */ 3062 if ((last->b_datap->db_flags & DBLK_UIOA) == 3063 (bp->b_datap->db_flags & DBLK_UIOA)) { 3064 /* Added to end */ 3065 while ((nbp = last->b_cont) != NULL) 3066 last = nbp; 3067 last->b_cont = bp; 3068 } else { 3069 /* New last */ 3070 ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 || 3071 msgdsize(bp) == sodp->sod_uioa.uioa_mbytes); 3072 last->b_next = bp; 3073 bp->b_next = NULL; 3074 bp->b_prev = last; 3075 q->q_last = bp; 3076 } 3077 } else { 3078 /* 3079 * Can't use q_last so just call putq(). 3080 */ 3081 mutex_exit(QLOCK(q)); 3082 3083 ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 || 3084 msgdsize(bp) == sodp->sod_uioa.uioa_mbytes); 3085 (void) putq(q, bp); 3086 return (0); 3087 } 3088 3089 /* Count bytes and mblk_t's */ 3090 do { 3091 bytecnt += MBLKL(bp); 3092 mblkcnt++; 3093 } while ((bp = bp->b_cont) != NULL); 3094 q->q_count += bytecnt; 3095 q->q_mblkcnt += mblkcnt; 3096 3097 /* Check for QFULL */ 3098 if (q->q_count >= q->q_hiwat + sodp->sod_want || 3099 q->q_mblkcnt >= q->q_hiwat) { 3100 q->q_flag |= QFULL; 3101 } 3102 3103 mutex_exit(QLOCK(q)); 3104 return (0); 3105 } 3106 3107 /* 3108 * Sockfs sodirect read wakeup. Called from a sodirect enabled transport 3109 * driver/module to indicate that read-side data is available. 3110 * 3111 * On return the sodirect_t.lock mutex will be exited so this must be the 3112 * last sodirect_t call to guarantee atomic access of *sodp. 3113 */ 3114 void 3115 sodwakeup(sodirect_t *sodp) 3116 { 3117 queue_t *q = sodp->sod_q; 3118 struct stdata *stp = (struct stdata *)q->q_ptr; 3119 3120 ASSERT(MUTEX_HELD(sodp->sod_lockp)); 3121 3122 if (stp->sd_flag & RSLEEP) { 3123 stp->sd_flag &= ~RSLEEP; 3124 cv_broadcast(&q->q_wait); 3125 } 3126 3127 if (stp->sd_rput_opt & SR_POLLIN) { 3128 stp->sd_rput_opt &= ~SR_POLLIN; 3129 mutex_exit(sodp->sod_lockp); 3130 pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM); 3131 } else 3132 mutex_exit(sodp->sod_lockp); 3133 } 3134