1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/inttypes.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/buf.h> 33 #include <sys/conf.h> 34 #include <sys/cred.h> 35 #include <sys/kmem.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/user.h> 44 #include <sys/stream.h> 45 #include <sys/strsubr.h> 46 #include <sys/esunddi.h> 47 #include <sys/flock.h> 48 #include <sys/modctl.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/cmn_err.h> 52 #include <sys/proc.h> 53 #include <sys/ddi.h> 54 55 #include <sys/suntpi.h> 56 #include <sys/socket.h> 57 #include <sys/sockio.h> 58 #include <sys/socketvar.h> 59 #include <netinet/in.h> 60 #include <inet/common.h> 61 #include <inet/proto_set.h> 62 63 #include <sys/tiuser.h> 64 #define _SUN_TPI_VERSION 2 65 #include <sys/tihdr.h> 66 67 #include <inet/kssl/ksslapi.h> 68 69 #include <c2/audit.h> 70 71 #include <fs/sockfs/socktpi.h> 72 #include <fs/sockfs/socktpi_impl.h> 73 74 int so_default_version = SOV_SOCKSTREAM; 75 76 #ifdef DEBUG 77 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 78 int sockdebug = 0; 79 80 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 81 int sockprinterr = 0; 82 83 /* 84 * Set so_default_options to SO_DEBUG is all sockets should be created 85 * with SO_DEBUG set. This is needed to get debug printouts from the 86 * socket() call itself. 87 */ 88 int so_default_options = 0; 89 #endif /* DEBUG */ 90 91 #ifdef SOCK_TEST 92 /* 93 * Set to number of ticks to limit cv_waits for code coverage testing. 94 * Set to 1000 when SO_DEBUG is set to 2. 95 */ 96 clock_t sock_test_timelimit = 0; 97 #endif /* SOCK_TEST */ 98 99 /* 100 * For concurrency testing of e.g. opening /dev/ip which does not 101 * handle T_INFO_REQ messages. 102 */ 103 int so_no_tinfo = 0; 104 105 /* 106 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 107 * to simply ignore the T_CAPABILITY_REQ. 108 */ 109 clock_t sock_capability_timeout = 2; /* seconds */ 110 111 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 112 static void so_removehooks(struct sonode *so); 113 114 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 115 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 116 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 117 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 118 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 119 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 120 121 /* 122 * Convert a socket to a stream. Invoked when the illusory sockmod 123 * is popped from the stream. 124 * Change the stream head back to default operation without losing 125 * any messages (T_conn_ind's are moved to the stream head queue). 126 */ 127 int 128 so_sock2stream(struct sonode *so) 129 { 130 struct vnode *vp = SOTOV(so); 131 queue_t *rq; 132 mblk_t *mp; 133 int error = 0; 134 sotpi_info_t *sti = SOTOTPI(so); 135 136 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 137 138 mutex_enter(&so->so_lock); 139 so_lock_single(so); 140 141 ASSERT(so->so_version != SOV_STREAM); 142 143 if (sti->sti_direct) { 144 mblk_t **mpp; 145 int rval; 146 147 /* 148 * Tell the transport below that sockmod is being popped 149 */ 150 mutex_exit(&so->so_lock); 151 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(), 152 &rval); 153 mutex_enter(&so->so_lock); 154 if (error != 0) { 155 dprintso(so, 0, ("so_sock2stream(%p): " 156 "_SIOCSOCKFALLBACK failed\n", (void *)so)); 157 goto exit; 158 } 159 sti->sti_direct = 0; 160 161 for (mpp = &sti->sti_conn_ind_head; (mp = *mpp) != NULL; 162 mpp = &mp->b_next) { 163 struct T_conn_ind *conn_ind; 164 165 /* 166 * strsock_proto() has already verified the length of 167 * this message block. 168 */ 169 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 170 171 conn_ind = (struct T_conn_ind *)mp->b_rptr; 172 if (conn_ind->OPT_length == 0 && 173 conn_ind->OPT_offset == 0) 174 continue; 175 176 if (DB_REF(mp) > 1) { 177 mblk_t *newmp; 178 size_t length; 179 cred_t *cr; 180 pid_t cpid; 181 int error; /* Dummy - error not returned */ 182 183 /* 184 * Copy the message block because it is used 185 * elsewhere, too. 186 * Can't use copyb since we want to wait 187 * yet allow for EINTR. 188 */ 189 /* Round up size for reuse */ 190 length = MAX(MBLKL(mp), 64); 191 cr = msg_getcred(mp, &cpid); 192 if (cr != NULL) { 193 newmp = allocb_cred_wait(length, 0, 194 &error, cr, cpid); 195 } else { 196 newmp = allocb_wait(length, 0, 0, 197 &error); 198 } 199 if (newmp == NULL) { 200 error = EINTR; 201 goto exit; 202 } 203 bcopy(mp->b_rptr, newmp->b_wptr, length); 204 newmp->b_wptr += length; 205 newmp->b_next = mp->b_next; 206 207 /* 208 * Link the new message block into the queue 209 * and free the old one. 210 */ 211 *mpp = newmp; 212 mp->b_next = NULL; 213 freemsg(mp); 214 215 mp = newmp; 216 conn_ind = (struct T_conn_ind *)mp->b_rptr; 217 } 218 219 /* 220 * Remove options added by TCP for accept fast-path. 221 */ 222 conn_ind->OPT_length = 0; 223 conn_ind->OPT_offset = 0; 224 } 225 } 226 227 so->so_version = SOV_STREAM; 228 so->so_proto_handle = NULL; 229 230 /* 231 * Remove the hooks in the stream head to avoid queuing more 232 * packets in sockfs. 233 */ 234 mutex_exit(&so->so_lock); 235 so_removehooks(so); 236 mutex_enter(&so->so_lock); 237 238 /* 239 * Clear any state related to urgent data. Leave any T_EXDATA_IND 240 * on the queue - the behavior of urgent data after a switch is 241 * left undefined. 242 */ 243 so->so_error = sti->sti_delayed_error = 0; 244 freemsg(so->so_oobmsg); 245 so->so_oobmsg = NULL; 246 sti->sti_oobsigcnt = sti->sti_oobcnt = 0; 247 248 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 249 SS_SAVEDEOR); 250 ASSERT(so_verify_oobstate(so)); 251 252 freemsg(sti->sti_ack_mp); 253 sti->sti_ack_mp = NULL; 254 255 /* 256 * Flush the T_DISCON_IND on sti_discon_ind_mp. 257 */ 258 so_flush_discon_ind(so); 259 260 /* 261 * Move any queued T_CONN_IND messages to stream head queue. 262 */ 263 rq = RD(strvp2wq(vp)); 264 while ((mp = sti->sti_conn_ind_head) != NULL) { 265 sti->sti_conn_ind_head = mp->b_next; 266 mp->b_next = NULL; 267 if (sti->sti_conn_ind_head == NULL) { 268 ASSERT(sti->sti_conn_ind_tail == mp); 269 sti->sti_conn_ind_tail = NULL; 270 } 271 dprintso(so, 0, 272 ("so_sock2stream(%p): moving T_CONN_IND\n", (void *)so)); 273 274 /* Drop lock across put() */ 275 mutex_exit(&so->so_lock); 276 put(rq, mp); 277 mutex_enter(&so->so_lock); 278 } 279 280 exit: 281 ASSERT(MUTEX_HELD(&so->so_lock)); 282 so_unlock_single(so, SOLOCKED); 283 mutex_exit(&so->so_lock); 284 return (error); 285 } 286 287 /* 288 * Covert a stream back to a socket. This is invoked when the illusory 289 * sockmod is pushed on a stream (where the stream was "created" by 290 * popping the illusory sockmod). 291 * This routine can not recreate the socket state (certain aspects of 292 * it like urgent data state and the bound/connected addresses for AF_UNIX 293 * sockets can not be recreated by asking the transport for information). 294 * Thus this routine implicitly assumes that the socket is in an initial 295 * state (as if it was just created). It flushes any messages queued on the 296 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 297 */ 298 void 299 so_stream2sock(struct sonode *so) 300 { 301 struct vnode *vp = SOTOV(so); 302 sotpi_info_t *sti = SOTOTPI(so); 303 304 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 305 306 mutex_enter(&so->so_lock); 307 so_lock_single(so); 308 ASSERT(so->so_version == SOV_STREAM); 309 so->so_version = SOV_SOCKSTREAM; 310 sti->sti_pushcnt = 0; 311 mutex_exit(&so->so_lock); 312 313 /* 314 * Set a permenent error to force any thread in sorecvmsg to 315 * return (and drop SOREADLOCKED). Clear the error once 316 * we have SOREADLOCKED. 317 * This makes a read sleeping during the I_PUSH of sockmod return 318 * EIO. 319 */ 320 strsetrerror(SOTOV(so), EIO, 1, NULL); 321 322 /* 323 * Get the read lock before flushing data to avoid 324 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 325 */ 326 mutex_enter(&so->so_lock); 327 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 328 mutex_exit(&so->so_lock); 329 330 strsetrerror(SOTOV(so), 0, 0, NULL); 331 so_installhooks(so); 332 333 /* 334 * Flush everything on the read queue. 335 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 336 * remain; those types of messages would confuse sockfs. 337 */ 338 strflushrq(vp, FLUSHALL); 339 mutex_enter(&so->so_lock); 340 341 /* 342 * Flush the T_DISCON_IND on sti_discon_ind_mp. 343 */ 344 so_flush_discon_ind(so); 345 so_unlock_read(so); /* Clear SOREADLOCKED */ 346 347 so_unlock_single(so, SOLOCKED); 348 mutex_exit(&so->so_lock); 349 } 350 351 /* 352 * Install the hooks in the stream head. 353 */ 354 void 355 so_installhooks(struct sonode *so) 356 { 357 struct vnode *vp = SOTOV(so); 358 359 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 360 strsock_proto, strsock_misc); 361 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 362 } 363 364 /* 365 * Remove the hooks in the stream head. 366 */ 367 static void 368 so_removehooks(struct sonode *so) 369 { 370 struct vnode *vp = SOTOV(so); 371 372 strsetrputhooks(vp, 0, NULL, NULL); 373 strsetwputhooks(vp, 0, STRTIMOUT); 374 /* 375 * Leave read behavior as it would have been for a normal 376 * stream i.e. a read of an M_PROTO will fail. 377 */ 378 } 379 380 void 381 so_basic_strinit(struct sonode *so) 382 { 383 struct vnode *vp = SOTOV(so); 384 struct stdata *stp; 385 mblk_t *mp; 386 sotpi_info_t *sti = SOTOTPI(so); 387 388 /* Preallocate an unbind_req message */ 389 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, CRED()); 390 mutex_enter(&so->so_lock); 391 sti->sti_unbind_mp = mp; 392 #ifdef DEBUG 393 so->so_options = so_default_options; 394 #endif /* DEBUG */ 395 mutex_exit(&so->so_lock); 396 397 so_installhooks(so); 398 399 stp = vp->v_stream; 400 /* 401 * Have to keep minpsz at zero in order to allow write/send of zero 402 * bytes. 403 */ 404 mutex_enter(&stp->sd_lock); 405 if (stp->sd_qn_minpsz == 1) 406 stp->sd_qn_minpsz = 0; 407 mutex_exit(&stp->sd_lock); 408 } 409 410 /* 411 * Initialize the streams side of a socket including 412 * T_info_req/ack processing. If tso is not NULL its values are used thereby 413 * avoiding the T_INFO_REQ. 414 */ 415 int 416 so_strinit(struct sonode *so, struct sonode *tso) 417 { 418 sotpi_info_t *sti = SOTOTPI(so); 419 sotpi_info_t *tsti; 420 int error; 421 422 so_basic_strinit(so); 423 424 /* 425 * The T_CAPABILITY_REQ should be the first message sent down because 426 * at least TCP has a fast-path for this which avoids timeouts while 427 * waiting for the T_CAPABILITY_ACK under high system load. 428 */ 429 if (tso == NULL) { 430 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 431 if (error) 432 return (error); 433 } else { 434 tsti = SOTOTPI(tso); 435 436 mutex_enter(&so->so_lock); 437 sti->sti_tsdu_size = tsti->sti_tsdu_size; 438 sti->sti_etsdu_size = tsti->sti_etsdu_size; 439 sti->sti_addr_size = tsti->sti_addr_size; 440 sti->sti_opt_size = tsti->sti_opt_size; 441 sti->sti_tidu_size = tsti->sti_tidu_size; 442 sti->sti_serv_type = tsti->sti_serv_type; 443 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 444 mutex_exit(&so->so_lock); 445 446 /* the following do_tcapability may update so->so_mode */ 447 if ((tsti->sti_serv_type != T_CLTS) && 448 (sti->sti_direct == 0)) { 449 error = do_tcapability(so, TC1_ACCEPTOR_ID); 450 if (error) 451 return (error); 452 } 453 } 454 /* 455 * If the addr_size is 0 we treat it as already bound 456 * and connected. This is used by the routing socket. 457 * We set the addr_size to something to allocate a the address 458 * structures. 459 */ 460 if (sti->sti_addr_size == 0) { 461 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 462 /* Address size can vary with address families. */ 463 if (so->so_family == AF_INET6) 464 sti->sti_addr_size = 465 (t_scalar_t)sizeof (struct sockaddr_in6); 466 else 467 sti->sti_addr_size = 468 (t_scalar_t)sizeof (struct sockaddr_in); 469 ASSERT(sti->sti_unbind_mp); 470 } 471 472 so_alloc_addr(so, sti->sti_addr_size); 473 474 return (0); 475 } 476 477 static void 478 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 479 { 480 sotpi_info_t *sti = SOTOTPI(so); 481 482 sti->sti_tsdu_size = tia->TSDU_size; 483 sti->sti_etsdu_size = tia->ETSDU_size; 484 sti->sti_addr_size = tia->ADDR_size; 485 sti->sti_opt_size = tia->OPT_size; 486 sti->sti_tidu_size = tia->TIDU_size; 487 sti->sti_serv_type = tia->SERV_type; 488 switch (tia->CURRENT_state) { 489 case TS_UNBND: 490 break; 491 case TS_IDLE: 492 so->so_state |= SS_ISBOUND; 493 sti->sti_laddr_len = 0; 494 sti->sti_laddr_valid = 0; 495 break; 496 case TS_DATA_XFER: 497 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 498 sti->sti_laddr_len = 0; 499 sti->sti_faddr_len = 0; 500 sti->sti_laddr_valid = 0; 501 sti->sti_faddr_valid = 0; 502 break; 503 } 504 505 /* 506 * Heuristics for determining the socket mode flags 507 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 508 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 509 * from the info ack. 510 */ 511 if (sti->sti_serv_type == T_CLTS) { 512 so->so_mode |= SM_ATOMIC | SM_ADDR; 513 } else { 514 so->so_mode |= SM_CONNREQUIRED; 515 if (sti->sti_etsdu_size != 0 && sti->sti_etsdu_size != -2) 516 so->so_mode |= SM_EXDATA; 517 } 518 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 519 /* Semantics are to discard tail end of messages */ 520 so->so_mode |= SM_ATOMIC; 521 } 522 if (so->so_family == AF_UNIX) { 523 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 524 if (sti->sti_addr_size == -1) { 525 /* MAXPATHLEN + soun_family + nul termination */ 526 sti->sti_addr_size = (t_scalar_t)(MAXPATHLEN + 527 sizeof (short) + 1); 528 } 529 if (so->so_type == SOCK_STREAM) { 530 /* 531 * Make it into a byte-stream transport. 532 * SOCK_SEQPACKET sockets are unchanged. 533 */ 534 sti->sti_tsdu_size = 0; 535 } 536 } else if (sti->sti_addr_size == -1) { 537 /* 538 * Logic extracted from sockmod - have to pick some max address 539 * length in order to preallocate the addresses. 540 */ 541 sti->sti_addr_size = SOA_DEFSIZE; 542 } 543 if (sti->sti_tsdu_size == 0) 544 so->so_mode |= SM_BYTESTREAM; 545 } 546 547 static int 548 check_tinfo(struct sonode *so) 549 { 550 sotpi_info_t *sti = SOTOTPI(so); 551 552 /* Consistency checks */ 553 if (so->so_type == SOCK_DGRAM && sti->sti_serv_type != T_CLTS) { 554 eprintso(so, ("service type and socket type mismatch\n")); 555 eprintsoline(so, EPROTO); 556 return (EPROTO); 557 } 558 if (so->so_type == SOCK_STREAM && sti->sti_serv_type == T_CLTS) { 559 eprintso(so, ("service type and socket type mismatch\n")); 560 eprintsoline(so, EPROTO); 561 return (EPROTO); 562 } 563 if (so->so_type == SOCK_SEQPACKET && sti->sti_serv_type == T_CLTS) { 564 eprintso(so, ("service type and socket type mismatch\n")); 565 eprintsoline(so, EPROTO); 566 return (EPROTO); 567 } 568 if (so->so_family == AF_INET && 569 sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 570 eprintso(so, 571 ("AF_INET must have sockaddr_in address length. Got %d\n", 572 sti->sti_addr_size)); 573 eprintsoline(so, EMSGSIZE); 574 return (EMSGSIZE); 575 } 576 if (so->so_family == AF_INET6 && 577 sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 578 eprintso(so, 579 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 580 sti->sti_addr_size)); 581 eprintsoline(so, EMSGSIZE); 582 return (EMSGSIZE); 583 } 584 585 dprintso(so, 1, ( 586 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 587 sti->sti_serv_type, sti->sti_tsdu_size, sti->sti_etsdu_size, 588 sti->sti_addr_size, sti->sti_opt_size, 589 sti->sti_tidu_size)); 590 dprintso(so, 1, ("tinfo: so_state %s\n", 591 pr_state(so->so_state, so->so_mode))); 592 return (0); 593 } 594 595 /* 596 * Send down T_info_req and wait for the ack. 597 * Record interesting T_info_ack values in the sonode. 598 */ 599 static int 600 do_tinfo(struct sonode *so) 601 { 602 struct T_info_req tir; 603 mblk_t *mp; 604 int error; 605 606 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 607 608 if (so_no_tinfo) { 609 SOTOTPI(so)->sti_addr_size = 0; 610 return (0); 611 } 612 613 dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so)); 614 615 /* Send T_INFO_REQ */ 616 tir.PRIM_type = T_INFO_REQ; 617 mp = soallocproto1(&tir, sizeof (tir), 618 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 619 _ALLOC_INTR, CRED()); 620 if (mp == NULL) { 621 eprintsoline(so, ENOBUFS); 622 return (ENOBUFS); 623 } 624 /* T_INFO_REQ has to be M_PCPROTO */ 625 DB_TYPE(mp) = M_PCPROTO; 626 627 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 628 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 629 if (error) { 630 eprintsoline(so, error); 631 return (error); 632 } 633 mutex_enter(&so->so_lock); 634 /* Wait for T_INFO_ACK */ 635 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 636 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 637 mutex_exit(&so->so_lock); 638 eprintsoline(so, error); 639 return (error); 640 } 641 642 ASSERT(mp); 643 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 644 mutex_exit(&so->so_lock); 645 freemsg(mp); 646 return (check_tinfo(so)); 647 } 648 649 /* 650 * Send down T_capability_req and wait for the ack. 651 * Record interesting T_capability_ack values in the sonode. 652 */ 653 static int 654 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 655 { 656 struct T_capability_req tcr; 657 struct T_capability_ack *tca; 658 mblk_t *mp; 659 int error; 660 sotpi_info_t *sti = SOTOTPI(so); 661 662 ASSERT(cap_bits1 != 0); 663 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 664 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 665 666 if (sti->sti_provinfo->tpi_capability == PI_NO) 667 return (do_tinfo(so)); 668 669 if (so_no_tinfo) { 670 sti->sti_addr_size = 0; 671 if ((cap_bits1 &= ~TC1_INFO) == 0) 672 return (0); 673 } 674 675 dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so)); 676 677 /* Send T_CAPABILITY_REQ */ 678 tcr.PRIM_type = T_CAPABILITY_REQ; 679 tcr.CAP_bits1 = cap_bits1; 680 mp = soallocproto1(&tcr, sizeof (tcr), 681 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 682 _ALLOC_INTR, CRED()); 683 if (mp == NULL) { 684 eprintsoline(so, ENOBUFS); 685 return (ENOBUFS); 686 } 687 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 688 DB_TYPE(mp) = M_PCPROTO; 689 690 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 691 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 692 if (error) { 693 eprintsoline(so, error); 694 return (error); 695 } 696 mutex_enter(&so->so_lock); 697 /* Wait for T_CAPABILITY_ACK */ 698 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 699 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 700 mutex_exit(&so->so_lock); 701 PI_PROVLOCK(sti->sti_provinfo); 702 if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) 703 sti->sti_provinfo->tpi_capability = PI_NO; 704 PI_PROVUNLOCK(sti->sti_provinfo); 705 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 706 if (cap_bits1 & TC1_INFO) { 707 /* 708 * If the T_CAPABILITY_REQ timed out and then a 709 * T_INFO_REQ gets a protocol error, most likely 710 * the capability was slow (vs. unsupported). Return 711 * ENOSR for this case as a best guess. 712 */ 713 if (error == ETIME) { 714 return ((error = do_tinfo(so)) == EPROTO ? 715 ENOSR : error); 716 } 717 return (do_tinfo(so)); 718 } 719 return (0); 720 } 721 722 ASSERT(mp); 723 tca = (struct T_capability_ack *)mp->b_rptr; 724 725 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 726 so_proc_tcapability_ack(so, tca); 727 728 cap_bits1 = tca->CAP_bits1; 729 730 mutex_exit(&so->so_lock); 731 freemsg(mp); 732 733 if (cap_bits1 & TC1_INFO) 734 return (check_tinfo(so)); 735 736 return (0); 737 } 738 739 /* 740 * Process a T_CAPABILITY_ACK 741 */ 742 void 743 so_proc_tcapability_ack(struct sonode *so, struct T_capability_ack *tca) 744 { 745 sotpi_info_t *sti = SOTOTPI(so); 746 747 if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) { 748 PI_PROVLOCK(sti->sti_provinfo); 749 sti->sti_provinfo->tpi_capability = PI_YES; 750 PI_PROVUNLOCK(sti->sti_provinfo); 751 } 752 753 if (tca->CAP_bits1 & TC1_ACCEPTOR_ID) { 754 sti->sti_acceptor_id = tca->ACCEPTOR_id; 755 so->so_mode |= SM_ACCEPTOR_ID; 756 } 757 758 if (tca->CAP_bits1 & TC1_INFO) 759 copy_tinfo(so, &tca->INFO_ack); 760 } 761 762 /* 763 * Retrieve socket error, clear error if not peek. 764 */ 765 int 766 sogeterr(struct sonode *so, boolean_t clear_err) 767 { 768 int error; 769 770 ASSERT(MUTEX_HELD(&so->so_lock)); 771 772 error = so->so_error; 773 if (clear_err) 774 so->so_error = 0; 775 776 return (error); 777 } 778 779 /* 780 * This routine is registered with the stream head to retrieve read 781 * side errors. 782 * It does not clear the socket error for a peeking read side operation. 783 * It the error is to be cleared it sets *clearerr. 784 */ 785 int 786 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 787 { 788 struct sonode *so = VTOSO(vp); 789 int error; 790 791 mutex_enter(&so->so_lock); 792 if (ispeek) { 793 error = so->so_error; 794 *clearerr = 0; 795 } else { 796 error = so->so_error; 797 so->so_error = 0; 798 *clearerr = 1; 799 } 800 mutex_exit(&so->so_lock); 801 return (error); 802 } 803 804 /* 805 * This routine is registered with the stream head to retrieve write 806 * side errors. 807 * It does not clear the socket error for a peeking read side operation. 808 * It the error is to be cleared it sets *clearerr. 809 */ 810 int 811 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 812 { 813 struct sonode *so = VTOSO(vp); 814 int error; 815 816 mutex_enter(&so->so_lock); 817 if (so->so_state & SS_CANTSENDMORE) { 818 error = EPIPE; 819 *clearerr = 0; 820 } else { 821 error = so->so_error; 822 if (ispeek) { 823 *clearerr = 0; 824 } else { 825 so->so_error = 0; 826 *clearerr = 1; 827 } 828 } 829 mutex_exit(&so->so_lock); 830 return (error); 831 } 832 833 /* 834 * Set a nonpersistent read and write error on the socket. 835 * Used when there is a T_uderror_ind for a connected socket. 836 * The caller also needs to call strsetrerror and strsetwerror 837 * after dropping the lock. 838 */ 839 void 840 soseterror(struct sonode *so, int error) 841 { 842 ASSERT(error != 0); 843 844 ASSERT(MUTEX_HELD(&so->so_lock)); 845 so->so_error = (ushort_t)error; 846 } 847 848 void 849 soisconnecting(struct sonode *so) 850 { 851 ASSERT(MUTEX_HELD(&so->so_lock)); 852 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 853 so->so_state |= SS_ISCONNECTING; 854 cv_broadcast(&so->so_state_cv); 855 } 856 857 void 858 soisconnected(struct sonode *so) 859 { 860 ASSERT(MUTEX_HELD(&so->so_lock)); 861 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 862 so->so_state |= SS_ISCONNECTED; 863 cv_broadcast(&so->so_state_cv); 864 } 865 866 /* 867 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 868 */ 869 void 870 soisdisconnected(struct sonode *so, int error) 871 { 872 ASSERT(MUTEX_HELD(&so->so_lock)); 873 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 874 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 875 so->so_error = (ushort_t)error; 876 if (so->so_peercred != NULL) { 877 crfree(so->so_peercred); 878 so->so_peercred = NULL; 879 } 880 cv_broadcast(&so->so_state_cv); 881 } 882 883 /* 884 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 885 * Does not affect write side. 886 * The caller also has to call strsetrerror. 887 */ 888 static void 889 sobreakconn(struct sonode *so, int error) 890 { 891 ASSERT(MUTEX_HELD(&so->so_lock)); 892 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 893 so->so_error = (ushort_t)error; 894 cv_broadcast(&so->so_state_cv); 895 } 896 897 /* 898 * Can no longer send. 899 * Caller must also call strsetwerror. 900 * 901 * We mark the peer address as no longer valid for getpeername, but 902 * leave it around for so_unix_close to notify the peer (that 903 * transport has no addressing held at that layer). 904 */ 905 void 906 socantsendmore(struct sonode *so) 907 { 908 ASSERT(MUTEX_HELD(&so->so_lock)); 909 so->so_state |= SS_CANTSENDMORE; 910 cv_broadcast(&so->so_state_cv); 911 } 912 913 /* 914 * The caller must call strseteof(,1) as well as this routine 915 * to change the socket state. 916 */ 917 void 918 socantrcvmore(struct sonode *so) 919 { 920 ASSERT(MUTEX_HELD(&so->so_lock)); 921 so->so_state |= SS_CANTRCVMORE; 922 cv_broadcast(&so->so_state_cv); 923 } 924 925 /* 926 * The caller has sent down a "request_prim" primitive and wants to wait for 927 * an ack ("ack_prim") or an T_ERROR_ACK for it. 928 * The specified "ack_prim" can be a T_OK_ACK. 929 * 930 * Assumes that all the TPI acks are M_PCPROTO messages. 931 * 932 * Note that the socket is single-threaded (using so_lock_single) 933 * for all operations that generate TPI ack messages. Since 934 * only TPI ack messages are M_PCPROTO we should never receive 935 * anything except either the ack we are expecting or a T_ERROR_ACK 936 * for the same primitive. 937 */ 938 int 939 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 940 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 941 { 942 mblk_t *mp; 943 union T_primitives *tpr; 944 int error; 945 946 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 947 (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait)); 948 949 ASSERT(MUTEX_HELD(&so->so_lock)); 950 951 error = sowaitack(so, &mp, wait); 952 if (error) 953 return (error); 954 955 dprintso(so, 1, ("got msg %p\n", (void *)mp)); 956 if (DB_TYPE(mp) != M_PCPROTO || 957 MBLKL(mp) < sizeof (tpr->type)) { 958 freemsg(mp); 959 eprintsoline(so, EPROTO); 960 return (EPROTO); 961 } 962 tpr = (union T_primitives *)mp->b_rptr; 963 /* 964 * Did we get the primitive that we were asking for? 965 * For T_OK_ACK we also check that it matches the request primitive. 966 */ 967 if (tpr->type == ack_prim && 968 (ack_prim != T_OK_ACK || 969 tpr->ok_ack.CORRECT_prim == request_prim)) { 970 if (MBLKL(mp) >= (ssize_t)min_size) { 971 /* Found what we are looking for */ 972 *mpp = mp; 973 return (0); 974 } 975 /* Too short */ 976 freemsg(mp); 977 eprintsoline(so, EPROTO); 978 return (EPROTO); 979 } 980 981 if (tpr->type == T_ERROR_ACK && 982 tpr->error_ack.ERROR_prim == request_prim) { 983 /* Error to the primitive we were looking for */ 984 if (tpr->error_ack.TLI_error == TSYSERR) { 985 error = tpr->error_ack.UNIX_error; 986 } else { 987 error = proto_tlitosyserr(tpr->error_ack.TLI_error); 988 } 989 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 990 tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error, 991 tpr->error_ack.UNIX_error, error)); 992 freemsg(mp); 993 return (error); 994 } 995 /* 996 * Wrong primitive or T_ERROR_ACK for the wrong primitive 997 */ 998 #ifdef DEBUG 999 if (tpr->type == T_ERROR_ACK) { 1000 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 1001 tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error, 1002 tpr->error_ack.UNIX_error)); 1003 } else if (tpr->type == T_OK_ACK) { 1004 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 1005 tpr->ok_ack.CORRECT_prim, ack_prim, request_prim)); 1006 } else { 1007 dprintso(so, 0, 1008 ("unexpected primitive %d, expected %d for %d\n", 1009 tpr->type, ack_prim, request_prim)); 1010 } 1011 #endif /* DEBUG */ 1012 1013 freemsg(mp); 1014 eprintsoline(so, EPROTO); 1015 return (EPROTO); 1016 } 1017 1018 /* 1019 * Wait for a T_OK_ACK for the specified primitive. 1020 */ 1021 int 1022 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1023 { 1024 mblk_t *mp; 1025 int error; 1026 1027 error = sowaitprim(so, request_prim, T_OK_ACK, 1028 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1029 if (error) 1030 return (error); 1031 freemsg(mp); 1032 return (0); 1033 } 1034 1035 /* 1036 * Queue a received TPI ack message on sti_ack_mp. 1037 */ 1038 void 1039 soqueueack(struct sonode *so, mblk_t *mp) 1040 { 1041 sotpi_info_t *sti = SOTOTPI(so); 1042 1043 if (DB_TYPE(mp) != M_PCPROTO) { 1044 zcmn_err(getzoneid(), CE_WARN, 1045 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1046 *(t_scalar_t *)mp->b_rptr); 1047 freemsg(mp); 1048 return; 1049 } 1050 1051 mutex_enter(&so->so_lock); 1052 if (sti->sti_ack_mp != NULL) { 1053 dprintso(so, 1, ("sti_ack_mp already set\n")); 1054 freemsg(sti->sti_ack_mp); 1055 sti->sti_ack_mp = NULL; 1056 } 1057 sti->sti_ack_mp = mp; 1058 cv_broadcast(&sti->sti_ack_cv); 1059 mutex_exit(&so->so_lock); 1060 } 1061 1062 /* 1063 * Wait for a TPI ack ignoring signals and errors. 1064 */ 1065 int 1066 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1067 { 1068 sotpi_info_t *sti = SOTOTPI(so); 1069 1070 ASSERT(MUTEX_HELD(&so->so_lock)); 1071 1072 while (sti->sti_ack_mp == NULL) { 1073 #ifdef SOCK_TEST 1074 if (wait == 0 && sock_test_timelimit != 0) 1075 wait = sock_test_timelimit; 1076 #endif 1077 if (wait != 0) { 1078 /* 1079 * Only wait for the time limit. 1080 */ 1081 clock_t now; 1082 1083 time_to_wait(&now, wait); 1084 if (cv_timedwait(&sti->sti_ack_cv, &so->so_lock, 1085 now) == -1) { 1086 eprintsoline(so, ETIME); 1087 return (ETIME); 1088 } 1089 } 1090 else 1091 cv_wait(&sti->sti_ack_cv, &so->so_lock); 1092 } 1093 *mpp = sti->sti_ack_mp; 1094 #ifdef DEBUG 1095 { 1096 union T_primitives *tpr; 1097 mblk_t *mp = *mpp; 1098 1099 tpr = (union T_primitives *)mp->b_rptr; 1100 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1101 ASSERT(tpr->type == T_OK_ACK || 1102 tpr->type == T_ERROR_ACK || 1103 tpr->type == T_BIND_ACK || 1104 tpr->type == T_CAPABILITY_ACK || 1105 tpr->type == T_INFO_ACK || 1106 tpr->type == T_OPTMGMT_ACK); 1107 } 1108 #endif /* DEBUG */ 1109 sti->sti_ack_mp = NULL; 1110 return (0); 1111 } 1112 1113 /* 1114 * Queue a received T_CONN_IND message on sti_conn_ind_head/tail. 1115 */ 1116 void 1117 soqueueconnind(struct sonode *so, mblk_t *mp) 1118 { 1119 sotpi_info_t *sti = SOTOTPI(so); 1120 1121 if (DB_TYPE(mp) != M_PROTO) { 1122 zcmn_err(getzoneid(), CE_WARN, 1123 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1124 freemsg(mp); 1125 return; 1126 } 1127 1128 mutex_enter(&so->so_lock); 1129 ASSERT(mp->b_next == NULL); 1130 if (sti->sti_conn_ind_head == NULL) { 1131 sti->sti_conn_ind_head = mp; 1132 } else { 1133 ASSERT(sti->sti_conn_ind_tail->b_next == NULL); 1134 sti->sti_conn_ind_tail->b_next = mp; 1135 } 1136 sti->sti_conn_ind_tail = mp; 1137 /* Wakeup a single consumer of the T_CONN_IND */ 1138 cv_signal(&so->so_acceptq_cv); 1139 mutex_exit(&so->so_lock); 1140 } 1141 1142 /* 1143 * Wait for a T_CONN_IND. 1144 * Don't wait if nonblocking. 1145 * Accept signals and socket errors. 1146 */ 1147 int 1148 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1149 { 1150 mblk_t *mp; 1151 sotpi_info_t *sti = SOTOTPI(so); 1152 int error = 0; 1153 1154 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1155 mutex_enter(&so->so_lock); 1156 check_error: 1157 if (so->so_error) { 1158 error = sogeterr(so, B_TRUE); 1159 if (error) { 1160 mutex_exit(&so->so_lock); 1161 return (error); 1162 } 1163 } 1164 1165 if (sti->sti_conn_ind_head == NULL) { 1166 if (fmode & (FNDELAY|FNONBLOCK)) { 1167 error = EWOULDBLOCK; 1168 goto done; 1169 } 1170 1171 if (so->so_state & SS_CLOSING) { 1172 error = EINTR; 1173 goto done; 1174 } 1175 1176 if (!cv_wait_sig_swap(&so->so_acceptq_cv, &so->so_lock)) { 1177 error = EINTR; 1178 goto done; 1179 } 1180 goto check_error; 1181 } 1182 mp = sti->sti_conn_ind_head; 1183 sti->sti_conn_ind_head = mp->b_next; 1184 mp->b_next = NULL; 1185 if (sti->sti_conn_ind_head == NULL) { 1186 ASSERT(sti->sti_conn_ind_tail == mp); 1187 sti->sti_conn_ind_tail = NULL; 1188 } 1189 *mpp = mp; 1190 done: 1191 mutex_exit(&so->so_lock); 1192 return (error); 1193 } 1194 1195 /* 1196 * Flush a T_CONN_IND matching the sequence number from the list. 1197 * Return zero if found; non-zero otherwise. 1198 * This is called very infrequently thus it is ok to do a linear search. 1199 */ 1200 int 1201 soflushconnind(struct sonode *so, t_scalar_t seqno) 1202 { 1203 mblk_t *prevmp, *mp; 1204 struct T_conn_ind *tci; 1205 sotpi_info_t *sti = SOTOTPI(so); 1206 1207 mutex_enter(&so->so_lock); 1208 for (prevmp = NULL, mp = sti->sti_conn_ind_head; mp != NULL; 1209 prevmp = mp, mp = mp->b_next) { 1210 tci = (struct T_conn_ind *)mp->b_rptr; 1211 if (tci->SEQ_number == seqno) { 1212 dprintso(so, 1, 1213 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1214 /* Deleting last? */ 1215 if (sti->sti_conn_ind_tail == mp) { 1216 sti->sti_conn_ind_tail = prevmp; 1217 } 1218 if (prevmp == NULL) { 1219 /* Deleting first */ 1220 sti->sti_conn_ind_head = mp->b_next; 1221 } else { 1222 prevmp->b_next = mp->b_next; 1223 } 1224 mp->b_next = NULL; 1225 1226 ASSERT((sti->sti_conn_ind_head == NULL && 1227 sti->sti_conn_ind_tail == NULL) || 1228 (sti->sti_conn_ind_head != NULL && 1229 sti->sti_conn_ind_tail != NULL)); 1230 1231 so->so_error = ECONNABORTED; 1232 mutex_exit(&so->so_lock); 1233 1234 /* 1235 * T_KSSL_PROXY_CONN_IND may carry a handle for 1236 * an SSL context, and needs to be released. 1237 */ 1238 if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) && 1239 (mp->b_cont != NULL)) { 1240 kssl_ctx_t kssl_ctx; 1241 1242 ASSERT(MBLKL(mp->b_cont) == 1243 sizeof (kssl_ctx_t)); 1244 kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr); 1245 kssl_release_ctx(kssl_ctx); 1246 } 1247 freemsg(mp); 1248 return (0); 1249 } 1250 } 1251 mutex_exit(&so->so_lock); 1252 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1253 return (-1); 1254 } 1255 1256 /* 1257 * Wait until the socket is connected or there is an error. 1258 * fmode should contain any nonblocking flags. nosig should be 1259 * set if the caller does not want the wait to be interrupted by a signal. 1260 */ 1261 int 1262 sowaitconnected(struct sonode *so, int fmode, int nosig) 1263 { 1264 int error; 1265 1266 ASSERT(MUTEX_HELD(&so->so_lock)); 1267 1268 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1269 SS_ISCONNECTING && so->so_error == 0) { 1270 1271 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", 1272 (void *)so)); 1273 if (fmode & (FNDELAY|FNONBLOCK)) 1274 return (EINPROGRESS); 1275 1276 if (so->so_state & SS_CLOSING) 1277 return (EINTR); 1278 1279 if (nosig) 1280 cv_wait(&so->so_state_cv, &so->so_lock); 1281 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1282 /* 1283 * Return EINTR and let the application use 1284 * nonblocking techniques for detecting when 1285 * the connection has been established. 1286 */ 1287 return (EINTR); 1288 } 1289 dprintso(so, 1, ("awoken on %p\n", (void *)so)); 1290 } 1291 1292 if (so->so_error != 0) { 1293 error = sogeterr(so, B_TRUE); 1294 ASSERT(error != 0); 1295 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1296 return (error); 1297 } 1298 if (!(so->so_state & SS_ISCONNECTED)) { 1299 /* 1300 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1301 * zero errno. Or another thread could have consumed so_error 1302 * e.g. by calling read. 1303 */ 1304 error = ECONNREFUSED; 1305 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1306 return (error); 1307 } 1308 return (0); 1309 } 1310 1311 1312 /* 1313 * Handle the signal generation aspect of urgent data. 1314 */ 1315 static void 1316 so_oob_sig(struct sonode *so, int extrasig, 1317 strsigset_t *signals, strpollset_t *pollwakeups) 1318 { 1319 sotpi_info_t *sti = SOTOTPI(so); 1320 1321 ASSERT(MUTEX_HELD(&so->so_lock)); 1322 1323 ASSERT(so_verify_oobstate(so)); 1324 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1325 if (sti->sti_oobsigcnt > sti->sti_oobcnt) { 1326 /* 1327 * Signal has already been generated once for this 1328 * urgent "event". However, since TCP can receive updated 1329 * urgent pointers we still generate a signal. 1330 */ 1331 ASSERT(so->so_state & SS_OOBPEND); 1332 if (extrasig) { 1333 *signals |= S_RDBAND; 1334 *pollwakeups |= POLLRDBAND; 1335 } 1336 return; 1337 } 1338 1339 sti->sti_oobsigcnt++; 1340 ASSERT(sti->sti_oobsigcnt > 0); /* Wraparound */ 1341 ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt); 1342 1343 /* 1344 * Record (for select/poll) that urgent data is pending. 1345 */ 1346 so->so_state |= SS_OOBPEND; 1347 /* 1348 * New urgent data on the way so forget about any old 1349 * urgent data. 1350 */ 1351 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1352 if (so->so_oobmsg != NULL) { 1353 dprintso(so, 1, ("sock: discarding old oob\n")); 1354 freemsg(so->so_oobmsg); 1355 so->so_oobmsg = NULL; 1356 } 1357 *signals |= S_RDBAND; 1358 *pollwakeups |= POLLRDBAND; 1359 ASSERT(so_verify_oobstate(so)); 1360 } 1361 1362 /* 1363 * Handle the processing of the T_EXDATA_IND with urgent data. 1364 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1365 */ 1366 /* ARGSUSED2 */ 1367 static mblk_t * 1368 so_oob_exdata(struct sonode *so, mblk_t *mp, 1369 strsigset_t *signals, strpollset_t *pollwakeups) 1370 { 1371 sotpi_info_t *sti = SOTOTPI(so); 1372 1373 ASSERT(MUTEX_HELD(&so->so_lock)); 1374 1375 ASSERT(so_verify_oobstate(so)); 1376 1377 ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt); 1378 1379 sti->sti_oobcnt++; 1380 ASSERT(sti->sti_oobcnt > 0); /* wraparound? */ 1381 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1382 1383 /* 1384 * Set MSGMARK for SIOCATMARK. 1385 */ 1386 mp->b_flag |= MSGMARK; 1387 1388 ASSERT(so_verify_oobstate(so)); 1389 return (mp); 1390 } 1391 1392 /* 1393 * Handle the processing of the actual urgent data. 1394 * Returns the data mblk if it should be queued on the read queue. 1395 */ 1396 static mblk_t * 1397 so_oob_data(struct sonode *so, mblk_t *mp, 1398 strsigset_t *signals, strpollset_t *pollwakeups) 1399 { 1400 sotpi_info_t *sti = SOTOTPI(so); 1401 1402 ASSERT(MUTEX_HELD(&so->so_lock)); 1403 1404 ASSERT(so_verify_oobstate(so)); 1405 1406 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1407 ASSERT(mp != NULL); 1408 /* 1409 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1410 * Otherwise we store it in so_oobmsg. 1411 */ 1412 ASSERT(so->so_oobmsg == NULL); 1413 if (so->so_options & SO_OOBINLINE) { 1414 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1415 *signals |= S_INPUT | S_RDNORM; 1416 } else { 1417 *pollwakeups |= POLLRDBAND; 1418 so->so_state |= SS_HAVEOOBDATA; 1419 so->so_oobmsg = mp; 1420 mp = NULL; 1421 } 1422 ASSERT(so_verify_oobstate(so)); 1423 return (mp); 1424 } 1425 1426 /* 1427 * Caller must hold the mutex. 1428 * For delayed processing, save the T_DISCON_IND received 1429 * from below on sti_discon_ind_mp. 1430 * When the message is processed the framework will call: 1431 * (*func)(so, mp); 1432 */ 1433 static void 1434 so_save_discon_ind(struct sonode *so, 1435 mblk_t *mp, 1436 void (*func)(struct sonode *so, mblk_t *)) 1437 { 1438 sotpi_info_t *sti = SOTOTPI(so); 1439 1440 ASSERT(MUTEX_HELD(&so->so_lock)); 1441 1442 /* 1443 * Discard new T_DISCON_IND if we have already received another. 1444 * Currently the earlier message can either be on sti_discon_ind_mp 1445 * or being processed. 1446 */ 1447 if (sti->sti_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1448 zcmn_err(getzoneid(), CE_WARN, 1449 "sockfs: received unexpected additional T_DISCON_IND\n"); 1450 freemsg(mp); 1451 return; 1452 } 1453 mp->b_prev = (mblk_t *)func; 1454 mp->b_next = NULL; 1455 sti->sti_discon_ind_mp = mp; 1456 } 1457 1458 /* 1459 * Caller must hold the mutex and make sure that either SOLOCKED 1460 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1461 * Perform delayed processing of T_DISCON_IND message on sti_discon_ind_mp. 1462 * Need to ensure that strsock_proto() will not end up sleeping for 1463 * SOASYNC_UNBIND, while executing this function. 1464 */ 1465 void 1466 so_drain_discon_ind(struct sonode *so) 1467 { 1468 mblk_t *bp; 1469 void (*func)(struct sonode *so, mblk_t *); 1470 sotpi_info_t *sti = SOTOTPI(so); 1471 1472 ASSERT(MUTEX_HELD(&so->so_lock)); 1473 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1474 1475 /* Process T_DISCON_IND on sti_discon_ind_mp */ 1476 if ((bp = sti->sti_discon_ind_mp) != NULL) { 1477 sti->sti_discon_ind_mp = NULL; 1478 func = (void (*)())bp->b_prev; 1479 bp->b_prev = NULL; 1480 1481 /* 1482 * This (*func) is supposed to generate a message downstream 1483 * and we need to have a flag set until the corresponding 1484 * upstream message reaches stream head. 1485 * When processing T_DISCON_IND in strsock_discon_ind 1486 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1487 * drop the flag after we get the ACK in strsock_proto. 1488 */ 1489 (void) (*func)(so, bp); 1490 } 1491 } 1492 1493 /* 1494 * Caller must hold the mutex. 1495 * Remove the T_DISCON_IND on sti_discon_ind_mp. 1496 */ 1497 void 1498 so_flush_discon_ind(struct sonode *so) 1499 { 1500 mblk_t *bp; 1501 sotpi_info_t *sti = SOTOTPI(so); 1502 1503 ASSERT(MUTEX_HELD(&so->so_lock)); 1504 1505 /* 1506 * Remove T_DISCON_IND mblk at sti_discon_ind_mp. 1507 */ 1508 if ((bp = sti->sti_discon_ind_mp) != NULL) { 1509 sti->sti_discon_ind_mp = NULL; 1510 bp->b_prev = NULL; 1511 freemsg(bp); 1512 } 1513 } 1514 1515 /* 1516 * Caller must hold the mutex. 1517 * 1518 * This function is used to process the T_DISCON_IND message. It does 1519 * immediate processing when called from strsock_proto and delayed 1520 * processing of discon_ind saved on sti_discon_ind_mp when called from 1521 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1522 * sti_discon_ind_mp for delayed processing, this function is registered 1523 * as the callback function to process the message. 1524 * 1525 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1526 * unbind operation, and should be released only after we receive the ACK 1527 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1528 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1529 * sent from either this function or tcp_unbind(), flushing away any TPI 1530 * message that is being sent down and stays in a lower module's queue. 1531 * 1532 * This function drops so_lock and grabs it again. 1533 */ 1534 static void 1535 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1536 { 1537 struct vnode *vp; 1538 struct stdata *stp; 1539 union T_primitives *tpr; 1540 struct T_unbind_req *ubr; 1541 mblk_t *mp; 1542 int error; 1543 sotpi_info_t *sti = SOTOTPI(so); 1544 1545 ASSERT(MUTEX_HELD(&so->so_lock)); 1546 ASSERT(discon_mp); 1547 ASSERT(discon_mp->b_rptr); 1548 1549 tpr = (union T_primitives *)discon_mp->b_rptr; 1550 ASSERT(tpr->type == T_DISCON_IND); 1551 1552 vp = SOTOV(so); 1553 stp = vp->v_stream; 1554 ASSERT(stp); 1555 1556 /* 1557 * Not a listener 1558 */ 1559 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1560 1561 /* 1562 * This assumes that the name space for DISCON_reason 1563 * is the errno name space. 1564 */ 1565 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1566 sti->sti_laddr_valid = 0; 1567 sti->sti_faddr_valid = 0; 1568 1569 /* 1570 * Unbind with the transport without blocking. 1571 * If we've already received a T_DISCON_IND do not unbind. 1572 * 1573 * If there is no preallocated unbind message, we have already 1574 * unbound with the transport 1575 * 1576 * If the socket is not bound, no need to unbind. 1577 */ 1578 mp = sti->sti_unbind_mp; 1579 if (mp == NULL) { 1580 ASSERT(!(so->so_state & SS_ISBOUND)); 1581 mutex_exit(&so->so_lock); 1582 } else if (!(so->so_state & SS_ISBOUND)) { 1583 mutex_exit(&so->so_lock); 1584 } else { 1585 sti->sti_unbind_mp = NULL; 1586 1587 /* 1588 * Is another T_DISCON_IND being processed. 1589 */ 1590 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1591 1592 /* 1593 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1594 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1595 * only after we receive the ACK in strsock_proto. 1596 */ 1597 so->so_flag |= SOASYNC_UNBIND; 1598 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1599 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1600 sti->sti_laddr_valid = 0; 1601 mutex_exit(&so->so_lock); 1602 1603 /* 1604 * Send down T_UNBIND_REQ ignoring flow control. 1605 * XXX Assumes that MSG_IGNFLOW implies that this thread 1606 * does not run service procedures. 1607 */ 1608 ASSERT(DB_TYPE(mp) == M_PROTO); 1609 ubr = (struct T_unbind_req *)mp->b_rptr; 1610 mp->b_wptr += sizeof (*ubr); 1611 ubr->PRIM_type = T_UNBIND_REQ; 1612 1613 /* 1614 * Flush the read and write side (except stream head read queue) 1615 * and send down T_UNBIND_REQ. 1616 */ 1617 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1618 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1619 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1620 /* LINTED - warning: statement has no consequent: if */ 1621 if (error) { 1622 eprintsoline(so, error); 1623 } 1624 } 1625 1626 if (tpr->discon_ind.DISCON_reason != 0) 1627 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1628 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1629 strseteof(SOTOV(so), 1); 1630 /* 1631 * strseteof takes care of read side wakeups, 1632 * pollwakeups, and signals. 1633 */ 1634 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1635 freemsg(discon_mp); 1636 1637 1638 pollwakeup(&stp->sd_pollist, POLLOUT); 1639 mutex_enter(&stp->sd_lock); 1640 1641 /* 1642 * Wake sleeping write 1643 */ 1644 if (stp->sd_flag & WSLEEP) { 1645 stp->sd_flag &= ~WSLEEP; 1646 cv_broadcast(&stp->sd_wrq->q_wait); 1647 } 1648 1649 /* 1650 * strsendsig can handle multiple signals with a 1651 * single call. Send SIGPOLL for S_OUTPUT event. 1652 */ 1653 if (stp->sd_sigflags & S_OUTPUT) 1654 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1655 1656 mutex_exit(&stp->sd_lock); 1657 mutex_enter(&so->so_lock); 1658 } 1659 1660 /* 1661 * This routine is registered with the stream head to receive M_PROTO 1662 * and M_PCPROTO messages. 1663 * 1664 * Returns NULL if the message was consumed. 1665 * Returns an mblk to make that mblk be processed (and queued) by the stream 1666 * head. 1667 * 1668 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1669 * *pollwakeups) for the stream head to take action on. Note that since 1670 * sockets always deliver SIGIO for every new piece of data this routine 1671 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1672 * 1673 * This routine handles all data related TPI messages independent of 1674 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1675 * arrive on a SOCK_STREAM. 1676 */ 1677 static mblk_t * 1678 strsock_proto(vnode_t *vp, mblk_t *mp, 1679 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1680 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1681 { 1682 union T_primitives *tpr; 1683 struct sonode *so; 1684 sotpi_info_t *sti; 1685 1686 so = VTOSO(vp); 1687 sti = SOTOTPI(so); 1688 1689 dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp)); 1690 1691 /* Set default return values */ 1692 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1693 1694 ASSERT(DB_TYPE(mp) == M_PROTO || 1695 DB_TYPE(mp) == M_PCPROTO); 1696 1697 if (MBLKL(mp) < sizeof (tpr->type)) { 1698 /* The message is too short to even contain the primitive */ 1699 zcmn_err(getzoneid(), CE_WARN, 1700 "sockfs: Too short TPI message received. Len = %ld\n", 1701 (ptrdiff_t)(MBLKL(mp))); 1702 freemsg(mp); 1703 return (NULL); 1704 } 1705 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1706 /* The read pointer is not aligned correctly for TPI */ 1707 zcmn_err(getzoneid(), CE_WARN, 1708 "sockfs: Unaligned TPI message received. rptr = %p\n", 1709 (void *)mp->b_rptr); 1710 freemsg(mp); 1711 return (NULL); 1712 } 1713 tpr = (union T_primitives *)mp->b_rptr; 1714 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1715 1716 switch (tpr->type) { 1717 1718 case T_DATA_IND: 1719 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1720 zcmn_err(getzoneid(), CE_WARN, 1721 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1722 (ptrdiff_t)(MBLKL(mp))); 1723 freemsg(mp); 1724 return (NULL); 1725 } 1726 /* 1727 * Ignore zero-length T_DATA_IND messages. These might be 1728 * generated by some transports. 1729 * This is needed to prevent read (which skips the M_PROTO 1730 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1731 * on a non-blocking socket after select/poll has indicated 1732 * that data is available). 1733 */ 1734 if (msgdsize(mp->b_cont) == 0) { 1735 dprintso(so, 0, 1736 ("strsock_proto: zero length T_DATA_IND\n")); 1737 freemsg(mp); 1738 return (NULL); 1739 } 1740 *allmsgsigs = S_INPUT | S_RDNORM; 1741 *pollwakeups = POLLIN | POLLRDNORM; 1742 *wakeups = RSLEEP; 1743 return (mp); 1744 1745 case T_UNITDATA_IND: { 1746 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1747 void *addr; 1748 t_uscalar_t addrlen; 1749 1750 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1751 zcmn_err(getzoneid(), CE_WARN, 1752 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1753 (ptrdiff_t)(MBLKL(mp))); 1754 freemsg(mp); 1755 return (NULL); 1756 } 1757 1758 /* Is this is not a connected datagram socket? */ 1759 if ((so->so_mode & SM_CONNREQUIRED) || 1760 !(so->so_state & SS_ISCONNECTED)) { 1761 /* 1762 * Not a connected datagram socket. Look for 1763 * the SO_UNIX_CLOSE option. If such an option is found 1764 * discard the message (since it has no meaning 1765 * unless connected). 1766 */ 1767 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1768 tudi->OPT_length != 0) { 1769 void *opt; 1770 t_uscalar_t optlen = tudi->OPT_length; 1771 1772 opt = sogetoff(mp, tudi->OPT_offset, 1773 optlen, __TPI_ALIGN_SIZE); 1774 if (opt == NULL) { 1775 /* The len/off falls outside mp */ 1776 freemsg(mp); 1777 mutex_enter(&so->so_lock); 1778 soseterror(so, EPROTO); 1779 mutex_exit(&so->so_lock); 1780 zcmn_err(getzoneid(), CE_WARN, 1781 "sockfs: T_unidata_ind with " 1782 "invalid optlen/offset %u/%d\n", 1783 optlen, tudi->OPT_offset); 1784 return (NULL); 1785 } 1786 if (so_getopt_unix_close(opt, optlen)) { 1787 freemsg(mp); 1788 return (NULL); 1789 } 1790 } 1791 *allmsgsigs = S_INPUT | S_RDNORM; 1792 *pollwakeups = POLLIN | POLLRDNORM; 1793 *wakeups = RSLEEP; 1794 if (audit_active) 1795 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1796 mp, 0); 1797 return (mp); 1798 } 1799 1800 /* 1801 * A connect datagram socket. For AF_INET{,6} we verify that 1802 * the source address matches the "connected to" address. 1803 * The semantics of AF_UNIX sockets is to not verify 1804 * the source address. 1805 * Note that this source address verification is transport 1806 * specific. Thus the real fix would be to extent TPI 1807 * to allow T_CONN_REQ messages to be send to connectionless 1808 * transport providers and always let the transport provider 1809 * do whatever filtering is needed. 1810 * 1811 * The verification/filtering semantics for transports 1812 * other than AF_INET and AF_UNIX are unknown. The choice 1813 * would be to either filter using bcmp or let all messages 1814 * get through. This code does not filter other address 1815 * families since this at least allows the application to 1816 * work around any missing filtering. 1817 * 1818 * XXX Should we move filtering to UDP/ICMP??? 1819 * That would require passing e.g. a T_DISCON_REQ to UDP 1820 * when the socket becomes unconnected. 1821 */ 1822 addrlen = tudi->SRC_length; 1823 /* 1824 * The alignment restriction is really to strict but 1825 * we want enough alignment to inspect the fields of 1826 * a sockaddr_in. 1827 */ 1828 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1829 __TPI_ALIGN_SIZE); 1830 if (addr == NULL) { 1831 freemsg(mp); 1832 mutex_enter(&so->so_lock); 1833 soseterror(so, EPROTO); 1834 mutex_exit(&so->so_lock); 1835 zcmn_err(getzoneid(), CE_WARN, 1836 "sockfs: T_unidata_ind with invalid " 1837 "addrlen/offset %u/%d\n", 1838 addrlen, tudi->SRC_offset); 1839 return (NULL); 1840 } 1841 1842 if (so->so_family == AF_INET) { 1843 /* 1844 * For AF_INET we allow wildcarding both sin_addr 1845 * and sin_port. 1846 */ 1847 struct sockaddr_in *faddr, *sin; 1848 1849 /* Prevent sti_faddr_sa from changing while accessed */ 1850 mutex_enter(&so->so_lock); 1851 ASSERT(sti->sti_faddr_len == 1852 (socklen_t)sizeof (struct sockaddr_in)); 1853 faddr = (struct sockaddr_in *)sti->sti_faddr_sa; 1854 sin = (struct sockaddr_in *)addr; 1855 if (addrlen != 1856 (t_uscalar_t)sizeof (struct sockaddr_in) || 1857 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1858 faddr->sin_addr.s_addr != INADDR_ANY) || 1859 (so->so_type != SOCK_RAW && 1860 sin->sin_port != faddr->sin_port && 1861 faddr->sin_port != 0)) { 1862 #ifdef DEBUG 1863 dprintso(so, 0, 1864 ("sockfs: T_UNITDATA_IND mismatch: %s", 1865 pr_addr(so->so_family, 1866 (struct sockaddr *)addr, addrlen))); 1867 dprintso(so, 0, (" - %s\n", 1868 pr_addr(so->so_family, sti->sti_faddr_sa, 1869 (t_uscalar_t)sti->sti_faddr_len))); 1870 #endif /* DEBUG */ 1871 mutex_exit(&so->so_lock); 1872 freemsg(mp); 1873 return (NULL); 1874 } 1875 mutex_exit(&so->so_lock); 1876 } else if (so->so_family == AF_INET6) { 1877 /* 1878 * For AF_INET6 we allow wildcarding both sin6_addr 1879 * and sin6_port. 1880 */ 1881 struct sockaddr_in6 *faddr6, *sin6; 1882 static struct in6_addr zeroes; /* inits to all zeros */ 1883 1884 /* Prevent sti_faddr_sa from changing while accessed */ 1885 mutex_enter(&so->so_lock); 1886 ASSERT(sti->sti_faddr_len == 1887 (socklen_t)sizeof (struct sockaddr_in6)); 1888 faddr6 = (struct sockaddr_in6 *)sti->sti_faddr_sa; 1889 sin6 = (struct sockaddr_in6 *)addr; 1890 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1891 if (addrlen != 1892 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1893 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1894 &faddr6->sin6_addr) && 1895 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1896 (so->so_type != SOCK_RAW && 1897 sin6->sin6_port != faddr6->sin6_port && 1898 faddr6->sin6_port != 0)) { 1899 #ifdef DEBUG 1900 dprintso(so, 0, 1901 ("sockfs: T_UNITDATA_IND mismatch: %s", 1902 pr_addr(so->so_family, 1903 (struct sockaddr *)addr, addrlen))); 1904 dprintso(so, 0, (" - %s\n", 1905 pr_addr(so->so_family, sti->sti_faddr_sa, 1906 (t_uscalar_t)sti->sti_faddr_len))); 1907 #endif /* DEBUG */ 1908 mutex_exit(&so->so_lock); 1909 freemsg(mp); 1910 return (NULL); 1911 } 1912 mutex_exit(&so->so_lock); 1913 } else if (so->so_family == AF_UNIX && 1914 msgdsize(mp->b_cont) == 0 && 1915 tudi->OPT_length != 0) { 1916 /* 1917 * Attempt to extract AF_UNIX 1918 * SO_UNIX_CLOSE indication from options. 1919 */ 1920 void *opt; 1921 t_uscalar_t optlen = tudi->OPT_length; 1922 1923 opt = sogetoff(mp, tudi->OPT_offset, 1924 optlen, __TPI_ALIGN_SIZE); 1925 if (opt == NULL) { 1926 /* The len/off falls outside mp */ 1927 freemsg(mp); 1928 mutex_enter(&so->so_lock); 1929 soseterror(so, EPROTO); 1930 mutex_exit(&so->so_lock); 1931 zcmn_err(getzoneid(), CE_WARN, 1932 "sockfs: T_unidata_ind with invalid " 1933 "optlen/offset %u/%d\n", 1934 optlen, tudi->OPT_offset); 1935 return (NULL); 1936 } 1937 /* 1938 * If we received a unix close indication mark the 1939 * socket and discard this message. 1940 */ 1941 if (so_getopt_unix_close(opt, optlen)) { 1942 mutex_enter(&so->so_lock); 1943 sobreakconn(so, ECONNRESET); 1944 mutex_exit(&so->so_lock); 1945 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1946 freemsg(mp); 1947 *pollwakeups = POLLIN | POLLRDNORM; 1948 *allmsgsigs = S_INPUT | S_RDNORM; 1949 *wakeups = RSLEEP; 1950 return (NULL); 1951 } 1952 } 1953 *allmsgsigs = S_INPUT | S_RDNORM; 1954 *pollwakeups = POLLIN | POLLRDNORM; 1955 *wakeups = RSLEEP; 1956 return (mp); 1957 } 1958 1959 case T_OPTDATA_IND: { 1960 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1961 1962 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1963 zcmn_err(getzoneid(), CE_WARN, 1964 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1965 (ptrdiff_t)(MBLKL(mp))); 1966 freemsg(mp); 1967 return (NULL); 1968 } 1969 /* 1970 * Allow zero-length messages carrying options. 1971 * This is used when carrying the SO_UNIX_CLOSE option. 1972 */ 1973 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 1974 tdi->OPT_length != 0) { 1975 /* 1976 * Attempt to extract AF_UNIX close indication 1977 * from the options. Ignore any other options - 1978 * those are handled once the message is removed 1979 * from the queue. 1980 * The close indication message should not carry data. 1981 */ 1982 void *opt; 1983 t_uscalar_t optlen = tdi->OPT_length; 1984 1985 opt = sogetoff(mp, tdi->OPT_offset, 1986 optlen, __TPI_ALIGN_SIZE); 1987 if (opt == NULL) { 1988 /* The len/off falls outside mp */ 1989 freemsg(mp); 1990 mutex_enter(&so->so_lock); 1991 soseterror(so, EPROTO); 1992 mutex_exit(&so->so_lock); 1993 zcmn_err(getzoneid(), CE_WARN, 1994 "sockfs: T_optdata_ind with invalid " 1995 "optlen/offset %u/%d\n", 1996 optlen, tdi->OPT_offset); 1997 return (NULL); 1998 } 1999 /* 2000 * If we received a close indication mark the 2001 * socket and discard this message. 2002 */ 2003 if (so_getopt_unix_close(opt, optlen)) { 2004 mutex_enter(&so->so_lock); 2005 socantsendmore(so); 2006 sti->sti_faddr_valid = 0; 2007 mutex_exit(&so->so_lock); 2008 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2009 freemsg(mp); 2010 return (NULL); 2011 } 2012 } 2013 *allmsgsigs = S_INPUT | S_RDNORM; 2014 *pollwakeups = POLLIN | POLLRDNORM; 2015 *wakeups = RSLEEP; 2016 return (mp); 2017 } 2018 2019 case T_EXDATA_IND: { 2020 mblk_t *mctl, *mdata; 2021 mblk_t *lbp; 2022 union T_primitives *tprp; 2023 struct stdata *stp; 2024 queue_t *qp; 2025 2026 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 2027 zcmn_err(getzoneid(), CE_WARN, 2028 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 2029 (ptrdiff_t)(MBLKL(mp))); 2030 freemsg(mp); 2031 return (NULL); 2032 } 2033 /* 2034 * Ignore zero-length T_EXDATA_IND messages. These might be 2035 * generated by some transports. 2036 * 2037 * This is needed to prevent read (which skips the M_PROTO 2038 * part) to unexpectedly return 0 (or return EWOULDBLOCK 2039 * on a non-blocking socket after select/poll has indicated 2040 * that data is available). 2041 */ 2042 dprintso(so, 1, 2043 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 2044 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2045 pr_state(so->so_state, so->so_mode))); 2046 2047 if (msgdsize(mp->b_cont) == 0) { 2048 dprintso(so, 0, 2049 ("strsock_proto: zero length T_EXDATA_IND\n")); 2050 freemsg(mp); 2051 return (NULL); 2052 } 2053 2054 /* 2055 * Split into the T_EXDATA_IND and the M_DATA part. 2056 * We process these three pieces separately: 2057 * signal generation 2058 * handling T_EXDATA_IND 2059 * handling M_DATA component 2060 */ 2061 mctl = mp; 2062 mdata = mctl->b_cont; 2063 mctl->b_cont = NULL; 2064 mutex_enter(&so->so_lock); 2065 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2066 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2067 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2068 2069 stp = vp->v_stream; 2070 ASSERT(stp != NULL); 2071 qp = _RD(stp->sd_wrq); 2072 2073 mutex_enter(QLOCK(qp)); 2074 lbp = qp->q_last; 2075 2076 /* 2077 * We want to avoid queueing up a string of T_EXDATA_IND 2078 * messages with no intervening data messages at the stream 2079 * head. These messages contribute to the total message 2080 * count. Eventually this can lead to STREAMS flow contol 2081 * and also cause TCP to advertise a zero window condition 2082 * to the peer. This can happen in the degenerate case where 2083 * the sender and receiver exchange only OOB data. The sender 2084 * only sends messages with MSG_OOB flag and the receiver 2085 * receives only MSG_OOB messages and does not use SO_OOBINLINE. 2086 * An example of this scenario has been reported in applications 2087 * that use OOB data to exchange heart beats. Flow control 2088 * relief will never happen if the application only reads OOB 2089 * data which is done directly by sorecvoob() and the 2090 * T_EXDATA_IND messages at the streamhead won't be consumed. 2091 * Note that there is no correctness issue in compressing the 2092 * string of T_EXDATA_IND messages into a single T_EXDATA_IND 2093 * message. A single read that does not specify MSG_OOB will 2094 * read across all the marks in a loop in sotpi_recvmsg(). 2095 * Each mark is individually distinguishable only if the 2096 * T_EXDATA_IND messages are separated by data messages. 2097 */ 2098 if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) { 2099 tprp = (union T_primitives *)lbp->b_rptr; 2100 if ((tprp->type == T_EXDATA_IND) && 2101 !(so->so_options & SO_OOBINLINE)) { 2102 2103 /* 2104 * free the new M_PROTO message 2105 */ 2106 freemsg(mctl); 2107 2108 /* 2109 * adjust the OOB count and OOB signal count 2110 * just incremented for the new OOB data. 2111 */ 2112 sti->sti_oobcnt--; 2113 sti->sti_oobsigcnt--; 2114 mutex_exit(QLOCK(qp)); 2115 mutex_exit(&so->so_lock); 2116 return (NULL); 2117 } 2118 } 2119 mutex_exit(QLOCK(qp)); 2120 2121 /* 2122 * Pass the T_EXDATA_IND and the M_DATA back separately 2123 * by using b_next linkage. (The stream head will queue any 2124 * b_next linked messages separately.) This is needed 2125 * since MSGMARK applies to the last by of the message 2126 * hence we can not have any M_DATA component attached 2127 * to the marked T_EXDATA_IND. Note that the stream head 2128 * will not consolidate M_DATA messages onto an MSGMARK'ed 2129 * message in order to preserve the constraint that 2130 * the T_EXDATA_IND always is a separate message. 2131 */ 2132 ASSERT(mctl != NULL); 2133 mctl->b_next = mdata; 2134 mp = mctl; 2135 #ifdef DEBUG 2136 if (mdata == NULL) { 2137 dprintso(so, 1, 2138 ("after outofline T_EXDATA_IND(%p): " 2139 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2140 (void *)vp, sti->sti_oobsigcnt, 2141 sti->sti_oobcnt, *pollwakeups, *allmsgsigs, 2142 pr_state(so->so_state, so->so_mode))); 2143 } else { 2144 dprintso(so, 1, 2145 ("after inline T_EXDATA_IND(%p): " 2146 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2147 (void *)vp, sti->sti_oobsigcnt, 2148 sti->sti_oobcnt, *pollwakeups, *allmsgsigs, 2149 pr_state(so->so_state, so->so_mode))); 2150 } 2151 #endif /* DEBUG */ 2152 mutex_exit(&so->so_lock); 2153 *wakeups = RSLEEP; 2154 return (mp); 2155 } 2156 2157 case T_CONN_CON: { 2158 struct T_conn_con *conn_con; 2159 void *addr; 2160 t_uscalar_t addrlen; 2161 2162 /* 2163 * Verify the state, update the state to ISCONNECTED, 2164 * record the potentially new address in the message, 2165 * and drop the message. 2166 */ 2167 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2168 zcmn_err(getzoneid(), CE_WARN, 2169 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2170 (ptrdiff_t)(MBLKL(mp))); 2171 freemsg(mp); 2172 return (NULL); 2173 } 2174 2175 mutex_enter(&so->so_lock); 2176 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2177 SS_ISCONNECTING) { 2178 mutex_exit(&so->so_lock); 2179 dprintso(so, 1, 2180 ("T_CONN_CON: state %x\n", so->so_state)); 2181 freemsg(mp); 2182 return (NULL); 2183 } 2184 2185 conn_con = &tpr->conn_con; 2186 addrlen = conn_con->RES_length; 2187 /* 2188 * Allow the address to be of different size than sent down 2189 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2190 * For AF_UNIX require the identical length. 2191 */ 2192 if (so->so_family == AF_UNIX ? 2193 addrlen != (t_uscalar_t)sizeof (sti->sti_ux_laddr) : 2194 addrlen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2195 zcmn_err(getzoneid(), CE_WARN, 2196 "sockfs: T_conn_con with different " 2197 "length %u/%d\n", 2198 addrlen, conn_con->RES_length); 2199 soisdisconnected(so, EPROTO); 2200 sti->sti_laddr_valid = 0; 2201 sti->sti_faddr_valid = 0; 2202 mutex_exit(&so->so_lock); 2203 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2204 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2205 strseteof(SOTOV(so), 1); 2206 freemsg(mp); 2207 /* 2208 * strseteof takes care of read side wakeups, 2209 * pollwakeups, and signals. 2210 */ 2211 *wakeups = WSLEEP; 2212 *allmsgsigs = S_OUTPUT; 2213 *pollwakeups = POLLOUT; 2214 return (NULL); 2215 } 2216 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2217 if (addr == NULL) { 2218 zcmn_err(getzoneid(), CE_WARN, 2219 "sockfs: T_conn_con with invalid " 2220 "addrlen/offset %u/%d\n", 2221 addrlen, conn_con->RES_offset); 2222 mutex_exit(&so->so_lock); 2223 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2224 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2225 strseteof(SOTOV(so), 1); 2226 freemsg(mp); 2227 /* 2228 * strseteof takes care of read side wakeups, 2229 * pollwakeups, and signals. 2230 */ 2231 *wakeups = WSLEEP; 2232 *allmsgsigs = S_OUTPUT; 2233 *pollwakeups = POLLOUT; 2234 return (NULL); 2235 } 2236 2237 /* 2238 * Save for getpeername. 2239 */ 2240 if (so->so_family != AF_UNIX) { 2241 sti->sti_faddr_len = (socklen_t)addrlen; 2242 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2243 bcopy(addr, sti->sti_faddr_sa, addrlen); 2244 sti->sti_faddr_valid = 1; 2245 } 2246 2247 if (so->so_peercred != NULL) 2248 crfree(so->so_peercred); 2249 so->so_peercred = msg_getcred(mp, &so->so_cpid); 2250 if (so->so_peercred != NULL) 2251 crhold(so->so_peercred); 2252 2253 /* Wakeup anybody sleeping in sowaitconnected */ 2254 soisconnected(so); 2255 mutex_exit(&so->so_lock); 2256 2257 /* 2258 * The socket is now available for sending data. 2259 */ 2260 *wakeups = WSLEEP; 2261 *allmsgsigs = S_OUTPUT; 2262 *pollwakeups = POLLOUT; 2263 freemsg(mp); 2264 return (NULL); 2265 } 2266 2267 /* 2268 * Extra processing in case of an SSL proxy, before queuing or 2269 * forwarding to the fallback endpoint 2270 */ 2271 case T_SSL_PROXY_CONN_IND: 2272 case T_CONN_IND: 2273 /* 2274 * Verify the min size and queue the message on 2275 * the sti_conn_ind_head/tail list. 2276 */ 2277 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2278 zcmn_err(getzoneid(), CE_WARN, 2279 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2280 (ptrdiff_t)(MBLKL(mp))); 2281 freemsg(mp); 2282 return (NULL); 2283 } 2284 2285 if (audit_active) 2286 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2287 if (!(so->so_state & SS_ACCEPTCONN)) { 2288 zcmn_err(getzoneid(), CE_WARN, 2289 "sockfs: T_conn_ind on non-listening socket\n"); 2290 freemsg(mp); 2291 return (NULL); 2292 } 2293 2294 if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) { 2295 /* No context: need to fall back */ 2296 struct sonode *fbso; 2297 stdata_t *fbstp; 2298 2299 tpr->type = T_CONN_IND; 2300 2301 fbso = kssl_find_fallback(sti->sti_kssl_ent); 2302 2303 /* 2304 * No fallback: the remote will timeout and 2305 * disconnect. 2306 */ 2307 if (fbso == NULL) { 2308 freemsg(mp); 2309 return (NULL); 2310 } 2311 fbstp = SOTOV(fbso)->v_stream; 2312 qreply(fbstp->sd_wrq->q_next, mp); 2313 return (NULL); 2314 } 2315 soqueueconnind(so, mp); 2316 *allmsgsigs = S_INPUT | S_RDNORM; 2317 *pollwakeups = POLLIN | POLLRDNORM; 2318 *wakeups = RSLEEP; 2319 return (NULL); 2320 2321 case T_ORDREL_IND: 2322 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2323 zcmn_err(getzoneid(), CE_WARN, 2324 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2325 (ptrdiff_t)(MBLKL(mp))); 2326 freemsg(mp); 2327 return (NULL); 2328 } 2329 2330 /* 2331 * Some providers send this when not fully connected. 2332 * SunLink X.25 needs to retrieve disconnect reason after 2333 * disconnect for compatibility. It uses T_ORDREL_IND 2334 * instead of T_DISCON_IND so that it may use the 2335 * endpoint after a connect failure to retrieve the 2336 * reason using an ioctl. Thus we explicitly clear 2337 * SS_ISCONNECTING here for SunLink X.25. 2338 * This is a needed TPI violation. 2339 */ 2340 mutex_enter(&so->so_lock); 2341 so->so_state &= ~SS_ISCONNECTING; 2342 socantrcvmore(so); 2343 mutex_exit(&so->so_lock); 2344 strseteof(SOTOV(so), 1); 2345 /* 2346 * strseteof takes care of read side wakeups, 2347 * pollwakeups, and signals. 2348 */ 2349 freemsg(mp); 2350 return (NULL); 2351 2352 case T_DISCON_IND: 2353 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2354 zcmn_err(getzoneid(), CE_WARN, 2355 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2356 (ptrdiff_t)(MBLKL(mp))); 2357 freemsg(mp); 2358 return (NULL); 2359 } 2360 if (so->so_state & SS_ACCEPTCONN) { 2361 /* 2362 * This is a listener. Look for a queued T_CONN_IND 2363 * with a matching sequence number and remove it 2364 * from the list. 2365 * It is normal to not find the sequence number since 2366 * the soaccept might have already dequeued it 2367 * (in which case the T_CONN_RES will fail with 2368 * TBADSEQ). 2369 */ 2370 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2371 freemsg(mp); 2372 return (0); 2373 } 2374 2375 /* 2376 * Not a listener 2377 * 2378 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2379 * Such a discon_ind appears when the peer has first done 2380 * a shutdown() followed by a close() in which case we just 2381 * want to record socantsendmore. 2382 * In this case sockfs first receives a T_ORDREL_IND followed 2383 * by a T_DISCON_IND. 2384 * Note that for other transports (e.g. TCP) we need to handle 2385 * the discon_ind in this case since it signals an error. 2386 */ 2387 mutex_enter(&so->so_lock); 2388 if ((so->so_state & SS_CANTRCVMORE) && 2389 (so->so_family == AF_UNIX)) { 2390 socantsendmore(so); 2391 sti->sti_faddr_valid = 0; 2392 mutex_exit(&so->so_lock); 2393 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2394 dprintso(so, 1, 2395 ("T_DISCON_IND: error %d\n", so->so_error)); 2396 freemsg(mp); 2397 /* 2398 * Set these variables for caller to process them. 2399 * For the else part where T_DISCON_IND is processed, 2400 * this will be done in the function being called 2401 * (strsock_discon_ind()) 2402 */ 2403 *wakeups = WSLEEP; 2404 *allmsgsigs = S_OUTPUT; 2405 *pollwakeups = POLLOUT; 2406 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2407 /* 2408 * Deferred processing of T_DISCON_IND 2409 */ 2410 so_save_discon_ind(so, mp, strsock_discon_ind); 2411 mutex_exit(&so->so_lock); 2412 } else { 2413 /* 2414 * Process T_DISCON_IND now 2415 */ 2416 (void) strsock_discon_ind(so, mp); 2417 mutex_exit(&so->so_lock); 2418 } 2419 return (NULL); 2420 2421 case T_UDERROR_IND: { 2422 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2423 void *addr; 2424 t_uscalar_t addrlen; 2425 int error; 2426 2427 dprintso(so, 0, 2428 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2429 2430 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2431 zcmn_err(getzoneid(), CE_WARN, 2432 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2433 (ptrdiff_t)(MBLKL(mp))); 2434 freemsg(mp); 2435 return (NULL); 2436 } 2437 /* Ignore on connection-oriented transports */ 2438 if (so->so_mode & SM_CONNREQUIRED) { 2439 freemsg(mp); 2440 eprintsoline(so, 0); 2441 zcmn_err(getzoneid(), CE_WARN, 2442 "sockfs: T_uderror_ind on connection-oriented " 2443 "transport\n"); 2444 return (NULL); 2445 } 2446 addrlen = tudi->DEST_length; 2447 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2448 if (addr == NULL) { 2449 zcmn_err(getzoneid(), CE_WARN, 2450 "sockfs: T_uderror_ind with invalid " 2451 "addrlen/offset %u/%d\n", 2452 addrlen, tudi->DEST_offset); 2453 freemsg(mp); 2454 return (NULL); 2455 } 2456 2457 /* Verify source address for connected socket. */ 2458 mutex_enter(&so->so_lock); 2459 if (so->so_state & SS_ISCONNECTED) { 2460 void *faddr; 2461 t_uscalar_t faddr_len; 2462 boolean_t match = B_FALSE; 2463 2464 switch (so->so_family) { 2465 case AF_INET: { 2466 /* Compare just IP address and port */ 2467 struct sockaddr_in *sin1, *sin2; 2468 2469 sin1 = (struct sockaddr_in *)sti->sti_faddr_sa; 2470 sin2 = (struct sockaddr_in *)addr; 2471 if (addrlen == sizeof (struct sockaddr_in) && 2472 sin1->sin_port == sin2->sin_port && 2473 sin1->sin_addr.s_addr == 2474 sin2->sin_addr.s_addr) 2475 match = B_TRUE; 2476 break; 2477 } 2478 case AF_INET6: { 2479 /* Compare just IP address and port. Not flow */ 2480 struct sockaddr_in6 *sin1, *sin2; 2481 2482 sin1 = (struct sockaddr_in6 *)sti->sti_faddr_sa; 2483 sin2 = (struct sockaddr_in6 *)addr; 2484 if (addrlen == sizeof (struct sockaddr_in6) && 2485 sin1->sin6_port == sin2->sin6_port && 2486 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2487 &sin2->sin6_addr)) 2488 match = B_TRUE; 2489 break; 2490 } 2491 case AF_UNIX: 2492 faddr = &sti->sti_ux_faddr; 2493 faddr_len = 2494 (t_uscalar_t)sizeof (sti->sti_ux_faddr); 2495 if (faddr_len == addrlen && 2496 bcmp(addr, faddr, addrlen) == 0) 2497 match = B_TRUE; 2498 break; 2499 default: 2500 faddr = sti->sti_faddr_sa; 2501 faddr_len = (t_uscalar_t)sti->sti_faddr_len; 2502 if (faddr_len == addrlen && 2503 bcmp(addr, faddr, addrlen) == 0) 2504 match = B_TRUE; 2505 break; 2506 } 2507 2508 if (!match) { 2509 #ifdef DEBUG 2510 dprintso(so, 0, 2511 ("sockfs: T_UDERR_IND mismatch: %s - ", 2512 pr_addr(so->so_family, 2513 (struct sockaddr *)addr, addrlen))); 2514 dprintso(so, 0, ("%s\n", 2515 pr_addr(so->so_family, sti->sti_faddr_sa, 2516 sti->sti_faddr_len))); 2517 #endif /* DEBUG */ 2518 mutex_exit(&so->so_lock); 2519 freemsg(mp); 2520 return (NULL); 2521 } 2522 /* 2523 * Make the write error nonpersistent. If the error 2524 * is zero we use ECONNRESET. 2525 * This assumes that the name space for ERROR_type 2526 * is the errno name space. 2527 */ 2528 if (tudi->ERROR_type != 0) 2529 error = tudi->ERROR_type; 2530 else 2531 error = ECONNRESET; 2532 2533 soseterror(so, error); 2534 mutex_exit(&so->so_lock); 2535 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2536 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2537 *wakeups = RSLEEP | WSLEEP; 2538 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2539 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2540 freemsg(mp); 2541 return (NULL); 2542 } 2543 /* 2544 * If the application asked for delayed errors 2545 * record the T_UDERROR_IND sti_eaddr_mp and the reason in 2546 * sti_delayed_error for delayed error posting. If the reason 2547 * is zero use ECONNRESET. 2548 * Note that delayed error indications do not make sense for 2549 * AF_UNIX sockets since sendto checks that the destination 2550 * address is valid at the time of the sendto. 2551 */ 2552 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2553 mutex_exit(&so->so_lock); 2554 freemsg(mp); 2555 return (NULL); 2556 } 2557 if (sti->sti_eaddr_mp != NULL) 2558 freemsg(sti->sti_eaddr_mp); 2559 2560 sti->sti_eaddr_mp = mp; 2561 if (tudi->ERROR_type != 0) 2562 error = tudi->ERROR_type; 2563 else 2564 error = ECONNRESET; 2565 sti->sti_delayed_error = (ushort_t)error; 2566 mutex_exit(&so->so_lock); 2567 return (NULL); 2568 } 2569 2570 case T_ERROR_ACK: 2571 dprintso(so, 0, 2572 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2573 tpr->error_ack.ERROR_prim, 2574 tpr->error_ack.TLI_error, 2575 tpr->error_ack.UNIX_error)); 2576 2577 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2578 zcmn_err(getzoneid(), CE_WARN, 2579 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2580 (ptrdiff_t)(MBLKL(mp))); 2581 freemsg(mp); 2582 return (NULL); 2583 } 2584 /* 2585 * Check if we were waiting for the async message 2586 */ 2587 mutex_enter(&so->so_lock); 2588 if ((so->so_flag & SOASYNC_UNBIND) && 2589 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2590 so_unlock_single(so, SOASYNC_UNBIND); 2591 mutex_exit(&so->so_lock); 2592 freemsg(mp); 2593 return (NULL); 2594 } 2595 mutex_exit(&so->so_lock); 2596 soqueueack(so, mp); 2597 return (NULL); 2598 2599 case T_OK_ACK: 2600 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2601 zcmn_err(getzoneid(), CE_WARN, 2602 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2603 (ptrdiff_t)(MBLKL(mp))); 2604 freemsg(mp); 2605 return (NULL); 2606 } 2607 /* 2608 * Check if we were waiting for the async message 2609 */ 2610 mutex_enter(&so->so_lock); 2611 if ((so->so_flag & SOASYNC_UNBIND) && 2612 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2613 dprintso(so, 1, 2614 ("strsock_proto: T_OK_ACK async unbind\n")); 2615 so_unlock_single(so, SOASYNC_UNBIND); 2616 mutex_exit(&so->so_lock); 2617 freemsg(mp); 2618 return (NULL); 2619 } 2620 mutex_exit(&so->so_lock); 2621 soqueueack(so, mp); 2622 return (NULL); 2623 2624 case T_INFO_ACK: 2625 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2626 zcmn_err(getzoneid(), CE_WARN, 2627 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2628 (ptrdiff_t)(MBLKL(mp))); 2629 freemsg(mp); 2630 return (NULL); 2631 } 2632 soqueueack(so, mp); 2633 return (NULL); 2634 2635 case T_CAPABILITY_ACK: 2636 /* 2637 * A T_capability_ack need only be large enough to hold 2638 * the PRIM_type and CAP_bits1 fields; checking for anything 2639 * larger might reject a correct response from an older 2640 * provider. 2641 */ 2642 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2643 zcmn_err(getzoneid(), CE_WARN, 2644 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2645 (ptrdiff_t)(MBLKL(mp))); 2646 freemsg(mp); 2647 return (NULL); 2648 } 2649 soqueueack(so, mp); 2650 return (NULL); 2651 2652 case T_BIND_ACK: 2653 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2654 zcmn_err(getzoneid(), CE_WARN, 2655 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2656 (ptrdiff_t)(MBLKL(mp))); 2657 freemsg(mp); 2658 return (NULL); 2659 } 2660 soqueueack(so, mp); 2661 return (NULL); 2662 2663 case T_OPTMGMT_ACK: 2664 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2665 zcmn_err(getzoneid(), CE_WARN, 2666 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2667 (ptrdiff_t)(MBLKL(mp))); 2668 freemsg(mp); 2669 return (NULL); 2670 } 2671 soqueueack(so, mp); 2672 return (NULL); 2673 default: 2674 #ifdef DEBUG 2675 zcmn_err(getzoneid(), CE_WARN, 2676 "sockfs: unknown TPI primitive %d received\n", 2677 tpr->type); 2678 #endif /* DEBUG */ 2679 freemsg(mp); 2680 return (NULL); 2681 } 2682 } 2683 2684 /* 2685 * This routine is registered with the stream head to receive other 2686 * (non-data, and non-proto) messages. 2687 * 2688 * Returns NULL if the message was consumed. 2689 * Returns an mblk to make that mblk be processed by the stream head. 2690 * 2691 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2692 * *pollwakeups) for the stream head to take action on. 2693 */ 2694 static mblk_t * 2695 strsock_misc(vnode_t *vp, mblk_t *mp, 2696 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2697 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2698 { 2699 struct sonode *so; 2700 sotpi_info_t *sti; 2701 2702 so = VTOSO(vp); 2703 sti = SOTOTPI(so); 2704 2705 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2706 (void *)vp, (void *)mp, DB_TYPE(mp))); 2707 2708 /* Set default return values */ 2709 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2710 2711 switch (DB_TYPE(mp)) { 2712 case M_PCSIG: 2713 /* 2714 * This assumes that an M_PCSIG for the urgent data arrives 2715 * before the corresponding T_EXDATA_IND. 2716 * 2717 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2718 * awoken before the urgent data shows up. 2719 * For OOBINLINE this can result in select returning 2720 * only exceptions as opposed to except|read. 2721 */ 2722 if (*mp->b_rptr == SIGURG) { 2723 mutex_enter(&so->so_lock); 2724 dprintso(so, 1, 2725 ("SIGURG(%p): counts %d/%d state %s\n", 2726 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2727 pr_state(so->so_state, so->so_mode))); 2728 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2729 dprintso(so, 1, 2730 ("after SIGURG(%p): counts %d/%d " 2731 " poll 0x%x sig 0x%x state %s\n", 2732 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2733 *pollwakeups, *allmsgsigs, 2734 pr_state(so->so_state, so->so_mode))); 2735 mutex_exit(&so->so_lock); 2736 } 2737 freemsg(mp); 2738 return (NULL); 2739 2740 case M_SIG: 2741 case M_HANGUP: 2742 case M_UNHANGUP: 2743 case M_ERROR: 2744 /* M_ERRORs etc are ignored */ 2745 freemsg(mp); 2746 return (NULL); 2747 2748 case M_FLUSH: 2749 /* 2750 * Do not flush read queue. If the M_FLUSH 2751 * arrives because of an impending T_discon_ind 2752 * we still have to keep any queued data - this is part of 2753 * socket semantics. 2754 */ 2755 if (*mp->b_rptr & FLUSHW) { 2756 *mp->b_rptr &= ~FLUSHR; 2757 return (mp); 2758 } 2759 freemsg(mp); 2760 return (NULL); 2761 2762 default: 2763 return (mp); 2764 } 2765 } 2766 2767 2768 /* Register to receive signals for certain events */ 2769 int 2770 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2771 { 2772 struct strsigset ss; 2773 int32_t rval; 2774 2775 /* 2776 * Note that SOLOCKED will be set except for the call from soaccept(). 2777 */ 2778 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2779 ss.ss_pid = pgrp; 2780 ss.ss_events = events; 2781 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2782 &rval)); 2783 } 2784 2785 2786 /* Register for events matching the SS_ASYNC flag */ 2787 int 2788 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2789 { 2790 int events = so->so_state & SS_ASYNC ? 2791 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2792 S_RDBAND | S_BANDURG; 2793 2794 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2795 } 2796 2797 2798 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2799 int 2800 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2801 { 2802 ASSERT(mutex_owned(&so->so_lock)); 2803 if (so->so_pgrp != 0) { 2804 int error; 2805 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2806 S_RDBAND | S_BANDURG : /* New sigs */ 2807 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2808 2809 so_lock_single(so); 2810 mutex_exit(&so->so_lock); 2811 2812 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2813 2814 mutex_enter(&so->so_lock); 2815 so_unlock_single(so, SOLOCKED); 2816 if (error) 2817 return (error); 2818 } 2819 so->so_state ^= SS_ASYNC; 2820 return (0); 2821 } 2822 2823 /* 2824 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2825 * any existing one. If passed zero, just clear the existing one. 2826 */ 2827 int 2828 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2829 { 2830 int events = so->so_state & SS_ASYNC ? 2831 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2832 S_RDBAND | S_BANDURG; 2833 int error; 2834 2835 ASSERT(mutex_owned(&so->so_lock)); 2836 2837 /* 2838 * Change socket process (group). 2839 * 2840 * strioctl (via so_set_asyncsigs) will perform permission check and 2841 * also keep a PID_HOLD to prevent the pid from being reused. 2842 */ 2843 so_lock_single(so); 2844 mutex_exit(&so->so_lock); 2845 2846 if (pgrp != 0) { 2847 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2848 pgrp, events)); 2849 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2850 if (error != 0) { 2851 eprintsoline(so, error); 2852 goto bad; 2853 } 2854 } 2855 /* Remove the previously registered process/group */ 2856 if (so->so_pgrp != 0) { 2857 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2858 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2859 if (error != 0) { 2860 eprintsoline(so, error); 2861 error = 0; 2862 } 2863 } 2864 mutex_enter(&so->so_lock); 2865 so_unlock_single(so, SOLOCKED); 2866 so->so_pgrp = pgrp; 2867 return (0); 2868 bad: 2869 mutex_enter(&so->so_lock); 2870 so_unlock_single(so, SOLOCKED); 2871 return (error); 2872 } 2873 2874 /* 2875 * Wrapper for getmsg. If the socket has been converted to a stream 2876 * pass the request to the stream head. 2877 */ 2878 int 2879 sock_getmsg( 2880 struct vnode *vp, 2881 struct strbuf *mctl, 2882 struct strbuf *mdata, 2883 uchar_t *prip, 2884 int *flagsp, 2885 int fmode, 2886 rval_t *rvp 2887 ) 2888 { 2889 struct sonode *so; 2890 2891 ASSERT(vp->v_type == VSOCK); 2892 /* 2893 * Use the stream head to find the real socket vnode. 2894 * This is needed when namefs sits above sockfs. Some 2895 * sockets (like SCTP) are not streams. 2896 */ 2897 if (!vp->v_stream) { 2898 return (ENOSTR); 2899 } 2900 ASSERT(vp->v_stream->sd_vnode); 2901 vp = vp->v_stream->sd_vnode; 2902 ASSERT(vn_matchops(vp, socket_vnodeops)); 2903 so = VTOSO(vp); 2904 2905 dprintso(so, 1, ("sock_getmsg(%p) %s\n", 2906 (void *)so, pr_state(so->so_state, so->so_mode))); 2907 2908 if (so->so_version == SOV_STREAM) { 2909 /* The imaginary "sockmod" has been popped - act as a stream */ 2910 return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp)); 2911 } 2912 eprintsoline(so, ENOSTR); 2913 return (ENOSTR); 2914 } 2915 2916 /* 2917 * Wrapper for putmsg. If the socket has been converted to a stream 2918 * pass the request to the stream head. 2919 * 2920 * Note that a while a regular socket (SOV_SOCKSTREAM) does support the 2921 * streams ioctl set it does not support putmsg and getmsg. 2922 * Allowing putmsg would prevent sockfs from tracking the state of 2923 * the socket/transport and would also invalidate the locking in sockfs. 2924 */ 2925 int 2926 sock_putmsg( 2927 struct vnode *vp, 2928 struct strbuf *mctl, 2929 struct strbuf *mdata, 2930 uchar_t pri, 2931 int flag, 2932 int fmode 2933 ) 2934 { 2935 struct sonode *so; 2936 2937 ASSERT(vp->v_type == VSOCK); 2938 /* 2939 * Use the stream head to find the real socket vnode. 2940 * This is needed when namefs sits above sockfs. 2941 */ 2942 if (!vp->v_stream) { 2943 return (ENOSTR); 2944 } 2945 ASSERT(vp->v_stream->sd_vnode); 2946 vp = vp->v_stream->sd_vnode; 2947 ASSERT(vn_matchops(vp, socket_vnodeops)); 2948 so = VTOSO(vp); 2949 2950 dprintso(so, 1, ("sock_putmsg(%p) %s\n", 2951 (void *)so, pr_state(so->so_state, so->so_mode))); 2952 2953 if (so->so_version == SOV_STREAM) { 2954 /* The imaginary "sockmod" has been popped - act as a stream */ 2955 return (strputmsg(vp, mctl, mdata, pri, flag, fmode)); 2956 } 2957 eprintsoline(so, ENOSTR); 2958 return (ENOSTR); 2959 } 2960 2961 /* 2962 * Special function called only from f_getfl(). 2963 * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0. 2964 * No locks are acquired here, so it is safe to use while uf_lock is held. 2965 * This exists solely for BSD fcntl() FASYNC compatibility. 2966 */ 2967 int 2968 sock_getfasync(vnode_t *vp) 2969 { 2970 struct sonode *so; 2971 2972 ASSERT(vp->v_type == VSOCK); 2973 /* 2974 * For stream model, v_stream is used; For non-stream, v_stream always 2975 * equals NULL 2976 */ 2977 if (vp->v_stream != NULL) 2978 so = VTOSO(vp->v_stream->sd_vnode); 2979 else 2980 so = VTOSO(vp); 2981 2982 if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC)) 2983 return (0); 2984 2985 return (FASYNC); 2986 } 2987