1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/inttypes.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/buf.h> 32 #include <sys/conf.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/sysmacros.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/debug.h> 39 #include <sys/errno.h> 40 #include <sys/time.h> 41 #include <sys/file.h> 42 #include <sys/user.h> 43 #include <sys/stream.h> 44 #include <sys/strsubr.h> 45 #include <sys/esunddi.h> 46 #include <sys/flock.h> 47 #include <sys/modctl.h> 48 #include <sys/vtrace.h> 49 #include <sys/strsun.h> 50 #include <sys/cmn_err.h> 51 #include <sys/proc.h> 52 #include <sys/ddi.h> 53 54 #include <sys/suntpi.h> 55 #include <sys/socket.h> 56 #include <sys/sockio.h> 57 #include <sys/socketvar.h> 58 #include <netinet/in.h> 59 #include <inet/common.h> 60 #include <inet/proto_set.h> 61 62 #include <sys/tiuser.h> 63 #define _SUN_TPI_VERSION 2 64 #include <sys/tihdr.h> 65 66 #include <c2/audit.h> 67 68 #include <fs/sockfs/socktpi.h> 69 #include <fs/sockfs/socktpi_impl.h> 70 71 int so_default_version = SOV_SOCKSTREAM; 72 73 #ifdef DEBUG 74 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 75 int sockdebug = 0; 76 77 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 78 int sockprinterr = 0; 79 80 /* 81 * Set so_default_options to SO_DEBUG is all sockets should be created 82 * with SO_DEBUG set. This is needed to get debug printouts from the 83 * socket() call itself. 84 */ 85 int so_default_options = 0; 86 #endif /* DEBUG */ 87 88 #ifdef SOCK_TEST 89 /* 90 * Set to number of ticks to limit cv_waits for code coverage testing. 91 * Set to 1000 when SO_DEBUG is set to 2. 92 */ 93 clock_t sock_test_timelimit = 0; 94 #endif /* SOCK_TEST */ 95 96 /* 97 * For concurrency testing of e.g. opening /dev/ip which does not 98 * handle T_INFO_REQ messages. 99 */ 100 int so_no_tinfo = 0; 101 102 /* 103 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 104 * to simply ignore the T_CAPABILITY_REQ. 105 */ 106 clock_t sock_capability_timeout = 2; /* seconds */ 107 108 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 109 static void so_removehooks(struct sonode *so); 110 111 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 112 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 113 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 114 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 115 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 116 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 117 118 /* 119 * Convert a socket to a stream. Invoked when the illusory sockmod 120 * is popped from the stream. 121 * Change the stream head back to default operation without losing 122 * any messages (T_conn_ind's are moved to the stream head queue). 123 */ 124 int 125 so_sock2stream(struct sonode *so) 126 { 127 struct vnode *vp = SOTOV(so); 128 queue_t *rq; 129 mblk_t *mp; 130 int error = 0; 131 sotpi_info_t *sti = SOTOTPI(so); 132 133 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 134 135 mutex_enter(&so->so_lock); 136 so_lock_single(so); 137 138 ASSERT(so->so_version != SOV_STREAM); 139 140 if (sti->sti_direct) { 141 mblk_t **mpp; 142 int rval; 143 144 /* 145 * Tell the transport below that sockmod is being popped 146 */ 147 mutex_exit(&so->so_lock); 148 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(), 149 &rval); 150 mutex_enter(&so->so_lock); 151 if (error != 0) { 152 dprintso(so, 0, ("so_sock2stream(%p): " 153 "_SIOCSOCKFALLBACK failed\n", (void *)so)); 154 goto exit; 155 } 156 sti->sti_direct = 0; 157 158 for (mpp = &sti->sti_conn_ind_head; (mp = *mpp) != NULL; 159 mpp = &mp->b_next) { 160 struct T_conn_ind *conn_ind; 161 162 /* 163 * strsock_proto() has already verified the length of 164 * this message block. 165 */ 166 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 167 168 conn_ind = (struct T_conn_ind *)mp->b_rptr; 169 if (conn_ind->OPT_length == 0 && 170 conn_ind->OPT_offset == 0) 171 continue; 172 173 if (DB_REF(mp) > 1) { 174 mblk_t *newmp; 175 size_t length; 176 cred_t *cr; 177 pid_t cpid; 178 int error; /* Dummy - error not returned */ 179 180 /* 181 * Copy the message block because it is used 182 * elsewhere, too. 183 * Can't use copyb since we want to wait 184 * yet allow for EINTR. 185 */ 186 /* Round up size for reuse */ 187 length = MAX(MBLKL(mp), 64); 188 cr = msg_getcred(mp, &cpid); 189 if (cr != NULL) { 190 newmp = allocb_cred_wait(length, 0, 191 &error, cr, cpid); 192 } else { 193 newmp = allocb_wait(length, 0, 0, 194 &error); 195 } 196 if (newmp == NULL) { 197 error = EINTR; 198 goto exit; 199 } 200 bcopy(mp->b_rptr, newmp->b_wptr, length); 201 newmp->b_wptr += length; 202 newmp->b_next = mp->b_next; 203 204 /* 205 * Link the new message block into the queue 206 * and free the old one. 207 */ 208 *mpp = newmp; 209 mp->b_next = NULL; 210 freemsg(mp); 211 212 mp = newmp; 213 conn_ind = (struct T_conn_ind *)mp->b_rptr; 214 } 215 216 /* 217 * Remove options added by TCP for accept fast-path. 218 */ 219 conn_ind->OPT_length = 0; 220 conn_ind->OPT_offset = 0; 221 } 222 } 223 224 so->so_version = SOV_STREAM; 225 so->so_proto_handle = NULL; 226 227 /* 228 * Remove the hooks in the stream head to avoid queuing more 229 * packets in sockfs. 230 */ 231 mutex_exit(&so->so_lock); 232 so_removehooks(so); 233 mutex_enter(&so->so_lock); 234 235 /* 236 * Clear any state related to urgent data. Leave any T_EXDATA_IND 237 * on the queue - the behavior of urgent data after a switch is 238 * left undefined. 239 */ 240 so->so_error = sti->sti_delayed_error = 0; 241 freemsg(so->so_oobmsg); 242 so->so_oobmsg = NULL; 243 sti->sti_oobsigcnt = sti->sti_oobcnt = 0; 244 245 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 246 SS_SAVEDEOR); 247 ASSERT(so_verify_oobstate(so)); 248 249 freemsg(sti->sti_ack_mp); 250 sti->sti_ack_mp = NULL; 251 252 /* 253 * Flush the T_DISCON_IND on sti_discon_ind_mp. 254 */ 255 so_flush_discon_ind(so); 256 257 /* 258 * Move any queued T_CONN_IND messages to stream head queue. 259 */ 260 rq = RD(strvp2wq(vp)); 261 while ((mp = sti->sti_conn_ind_head) != NULL) { 262 sti->sti_conn_ind_head = mp->b_next; 263 mp->b_next = NULL; 264 if (sti->sti_conn_ind_head == NULL) { 265 ASSERT(sti->sti_conn_ind_tail == mp); 266 sti->sti_conn_ind_tail = NULL; 267 } 268 dprintso(so, 0, 269 ("so_sock2stream(%p): moving T_CONN_IND\n", (void *)so)); 270 271 /* Drop lock across put() */ 272 mutex_exit(&so->so_lock); 273 put(rq, mp); 274 mutex_enter(&so->so_lock); 275 } 276 277 exit: 278 ASSERT(MUTEX_HELD(&so->so_lock)); 279 so_unlock_single(so, SOLOCKED); 280 mutex_exit(&so->so_lock); 281 return (error); 282 } 283 284 /* 285 * Covert a stream back to a socket. This is invoked when the illusory 286 * sockmod is pushed on a stream (where the stream was "created" by 287 * popping the illusory sockmod). 288 * This routine can not recreate the socket state (certain aspects of 289 * it like urgent data state and the bound/connected addresses for AF_UNIX 290 * sockets can not be recreated by asking the transport for information). 291 * Thus this routine implicitly assumes that the socket is in an initial 292 * state (as if it was just created). It flushes any messages queued on the 293 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 294 */ 295 void 296 so_stream2sock(struct sonode *so) 297 { 298 struct vnode *vp = SOTOV(so); 299 sotpi_info_t *sti = SOTOTPI(so); 300 301 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 302 303 mutex_enter(&so->so_lock); 304 so_lock_single(so); 305 ASSERT(so->so_version == SOV_STREAM); 306 so->so_version = SOV_SOCKSTREAM; 307 sti->sti_pushcnt = 0; 308 mutex_exit(&so->so_lock); 309 310 /* 311 * Set a permenent error to force any thread in sorecvmsg to 312 * return (and drop SOREADLOCKED). Clear the error once 313 * we have SOREADLOCKED. 314 * This makes a read sleeping during the I_PUSH of sockmod return 315 * EIO. 316 */ 317 strsetrerror(SOTOV(so), EIO, 1, NULL); 318 319 /* 320 * Get the read lock before flushing data to avoid 321 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 322 */ 323 mutex_enter(&so->so_lock); 324 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 325 mutex_exit(&so->so_lock); 326 327 strsetrerror(SOTOV(so), 0, 0, NULL); 328 so_installhooks(so); 329 330 /* 331 * Flush everything on the read queue. 332 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 333 * remain; those types of messages would confuse sockfs. 334 */ 335 strflushrq(vp, FLUSHALL); 336 mutex_enter(&so->so_lock); 337 338 /* 339 * Flush the T_DISCON_IND on sti_discon_ind_mp. 340 */ 341 so_flush_discon_ind(so); 342 so_unlock_read(so); /* Clear SOREADLOCKED */ 343 344 so_unlock_single(so, SOLOCKED); 345 mutex_exit(&so->so_lock); 346 } 347 348 /* 349 * Install the hooks in the stream head. 350 */ 351 void 352 so_installhooks(struct sonode *so) 353 { 354 struct vnode *vp = SOTOV(so); 355 356 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 357 strsock_proto, strsock_misc); 358 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 359 } 360 361 /* 362 * Remove the hooks in the stream head. 363 */ 364 static void 365 so_removehooks(struct sonode *so) 366 { 367 struct vnode *vp = SOTOV(so); 368 369 strsetrputhooks(vp, 0, NULL, NULL); 370 strsetwputhooks(vp, 0, STRTIMOUT); 371 /* 372 * Leave read behavior as it would have been for a normal 373 * stream i.e. a read of an M_PROTO will fail. 374 */ 375 } 376 377 void 378 so_basic_strinit(struct sonode *so) 379 { 380 struct vnode *vp = SOTOV(so); 381 struct stdata *stp; 382 mblk_t *mp; 383 sotpi_info_t *sti = SOTOTPI(so); 384 385 /* Preallocate an unbind_req message */ 386 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, CRED()); 387 mutex_enter(&so->so_lock); 388 sti->sti_unbind_mp = mp; 389 #ifdef DEBUG 390 so->so_options = so_default_options; 391 #endif /* DEBUG */ 392 mutex_exit(&so->so_lock); 393 394 so_installhooks(so); 395 396 stp = vp->v_stream; 397 /* 398 * Have to keep minpsz at zero in order to allow write/send of zero 399 * bytes. 400 */ 401 mutex_enter(&stp->sd_lock); 402 if (stp->sd_qn_minpsz == 1) 403 stp->sd_qn_minpsz = 0; 404 mutex_exit(&stp->sd_lock); 405 } 406 407 /* 408 * Initialize the streams side of a socket including 409 * T_info_req/ack processing. If tso is not NULL its values are used thereby 410 * avoiding the T_INFO_REQ. 411 */ 412 int 413 so_strinit(struct sonode *so, struct sonode *tso) 414 { 415 sotpi_info_t *sti = SOTOTPI(so); 416 sotpi_info_t *tsti; 417 int error; 418 419 so_basic_strinit(so); 420 421 /* 422 * The T_CAPABILITY_REQ should be the first message sent down because 423 * at least TCP has a fast-path for this which avoids timeouts while 424 * waiting for the T_CAPABILITY_ACK under high system load. 425 */ 426 if (tso == NULL) { 427 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 428 if (error) 429 return (error); 430 } else { 431 tsti = SOTOTPI(tso); 432 433 mutex_enter(&so->so_lock); 434 sti->sti_tsdu_size = tsti->sti_tsdu_size; 435 sti->sti_etsdu_size = tsti->sti_etsdu_size; 436 sti->sti_addr_size = tsti->sti_addr_size; 437 sti->sti_opt_size = tsti->sti_opt_size; 438 sti->sti_tidu_size = tsti->sti_tidu_size; 439 sti->sti_serv_type = tsti->sti_serv_type; 440 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 441 mutex_exit(&so->so_lock); 442 443 /* the following do_tcapability may update so->so_mode */ 444 if ((tsti->sti_serv_type != T_CLTS) && 445 (sti->sti_direct == 0)) { 446 error = do_tcapability(so, TC1_ACCEPTOR_ID); 447 if (error) 448 return (error); 449 } 450 } 451 /* 452 * If the addr_size is 0 we treat it as already bound 453 * and connected. This is used by the routing socket. 454 * We set the addr_size to something to allocate a the address 455 * structures. 456 */ 457 if (sti->sti_addr_size == 0) { 458 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 459 /* Address size can vary with address families. */ 460 if (so->so_family == AF_INET6) 461 sti->sti_addr_size = 462 (t_scalar_t)sizeof (struct sockaddr_in6); 463 else 464 sti->sti_addr_size = 465 (t_scalar_t)sizeof (struct sockaddr_in); 466 ASSERT(sti->sti_unbind_mp); 467 } 468 469 so_alloc_addr(so, sti->sti_addr_size); 470 471 return (0); 472 } 473 474 static void 475 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 476 { 477 sotpi_info_t *sti = SOTOTPI(so); 478 479 sti->sti_tsdu_size = tia->TSDU_size; 480 sti->sti_etsdu_size = tia->ETSDU_size; 481 sti->sti_addr_size = tia->ADDR_size; 482 sti->sti_opt_size = tia->OPT_size; 483 sti->sti_tidu_size = tia->TIDU_size; 484 sti->sti_serv_type = tia->SERV_type; 485 switch (tia->CURRENT_state) { 486 case TS_UNBND: 487 break; 488 case TS_IDLE: 489 so->so_state |= SS_ISBOUND; 490 sti->sti_laddr_len = 0; 491 sti->sti_laddr_valid = 0; 492 break; 493 case TS_DATA_XFER: 494 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 495 sti->sti_laddr_len = 0; 496 sti->sti_faddr_len = 0; 497 sti->sti_laddr_valid = 0; 498 sti->sti_faddr_valid = 0; 499 break; 500 } 501 502 /* 503 * Heuristics for determining the socket mode flags 504 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 505 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 506 * from the info ack. 507 */ 508 if (sti->sti_serv_type == T_CLTS) { 509 so->so_mode |= SM_ATOMIC | SM_ADDR; 510 } else { 511 so->so_mode |= SM_CONNREQUIRED; 512 if (sti->sti_etsdu_size != 0 && sti->sti_etsdu_size != -2) 513 so->so_mode |= SM_EXDATA; 514 } 515 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 516 /* Semantics are to discard tail end of messages */ 517 so->so_mode |= SM_ATOMIC; 518 } 519 if (so->so_family == AF_UNIX) { 520 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 521 if (sti->sti_addr_size == -1) { 522 /* MAXPATHLEN + soun_family + nul termination */ 523 sti->sti_addr_size = (t_scalar_t)(MAXPATHLEN + 524 sizeof (short) + 1); 525 } 526 if (so->so_type == SOCK_STREAM) { 527 /* 528 * Make it into a byte-stream transport. 529 * SOCK_SEQPACKET sockets are unchanged. 530 */ 531 sti->sti_tsdu_size = 0; 532 } 533 } else if (sti->sti_addr_size == -1) { 534 /* 535 * Logic extracted from sockmod - have to pick some max address 536 * length in order to preallocate the addresses. 537 */ 538 sti->sti_addr_size = SOA_DEFSIZE; 539 } 540 if (sti->sti_tsdu_size == 0) 541 so->so_mode |= SM_BYTESTREAM; 542 } 543 544 static int 545 check_tinfo(struct sonode *so) 546 { 547 sotpi_info_t *sti = SOTOTPI(so); 548 549 /* Consistency checks */ 550 if (so->so_type == SOCK_DGRAM && sti->sti_serv_type != T_CLTS) { 551 eprintso(so, ("service type and socket type mismatch\n")); 552 eprintsoline(so, EPROTO); 553 return (EPROTO); 554 } 555 if (so->so_type == SOCK_STREAM && sti->sti_serv_type == T_CLTS) { 556 eprintso(so, ("service type and socket type mismatch\n")); 557 eprintsoline(so, EPROTO); 558 return (EPROTO); 559 } 560 if (so->so_type == SOCK_SEQPACKET && sti->sti_serv_type == T_CLTS) { 561 eprintso(so, ("service type and socket type mismatch\n")); 562 eprintsoline(so, EPROTO); 563 return (EPROTO); 564 } 565 if (so->so_family == AF_INET && 566 sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 567 eprintso(so, 568 ("AF_INET must have sockaddr_in address length. Got %d\n", 569 sti->sti_addr_size)); 570 eprintsoline(so, EMSGSIZE); 571 return (EMSGSIZE); 572 } 573 if (so->so_family == AF_INET6 && 574 sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 575 eprintso(so, 576 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 577 sti->sti_addr_size)); 578 eprintsoline(so, EMSGSIZE); 579 return (EMSGSIZE); 580 } 581 582 dprintso(so, 1, ( 583 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 584 sti->sti_serv_type, sti->sti_tsdu_size, sti->sti_etsdu_size, 585 sti->sti_addr_size, sti->sti_opt_size, 586 sti->sti_tidu_size)); 587 dprintso(so, 1, ("tinfo: so_state %s\n", 588 pr_state(so->so_state, so->so_mode))); 589 return (0); 590 } 591 592 /* 593 * Send down T_info_req and wait for the ack. 594 * Record interesting T_info_ack values in the sonode. 595 */ 596 static int 597 do_tinfo(struct sonode *so) 598 { 599 struct T_info_req tir; 600 mblk_t *mp; 601 int error; 602 603 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 604 605 if (so_no_tinfo) { 606 SOTOTPI(so)->sti_addr_size = 0; 607 return (0); 608 } 609 610 dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so)); 611 612 /* Send T_INFO_REQ */ 613 tir.PRIM_type = T_INFO_REQ; 614 mp = soallocproto1(&tir, sizeof (tir), 615 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 616 _ALLOC_INTR, CRED()); 617 if (mp == NULL) { 618 eprintsoline(so, ENOBUFS); 619 return (ENOBUFS); 620 } 621 /* T_INFO_REQ has to be M_PCPROTO */ 622 DB_TYPE(mp) = M_PCPROTO; 623 624 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 625 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 626 if (error) { 627 eprintsoline(so, error); 628 return (error); 629 } 630 mutex_enter(&so->so_lock); 631 /* Wait for T_INFO_ACK */ 632 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 633 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 634 mutex_exit(&so->so_lock); 635 eprintsoline(so, error); 636 return (error); 637 } 638 639 ASSERT(mp); 640 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 641 mutex_exit(&so->so_lock); 642 freemsg(mp); 643 return (check_tinfo(so)); 644 } 645 646 /* 647 * Send down T_capability_req and wait for the ack. 648 * Record interesting T_capability_ack values in the sonode. 649 */ 650 static int 651 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 652 { 653 struct T_capability_req tcr; 654 struct T_capability_ack *tca; 655 mblk_t *mp; 656 int error; 657 sotpi_info_t *sti = SOTOTPI(so); 658 659 ASSERT(cap_bits1 != 0); 660 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 661 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 662 663 if (sti->sti_provinfo->tpi_capability == PI_NO) 664 return (do_tinfo(so)); 665 666 if (so_no_tinfo) { 667 sti->sti_addr_size = 0; 668 if ((cap_bits1 &= ~TC1_INFO) == 0) 669 return (0); 670 } 671 672 dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so)); 673 674 /* Send T_CAPABILITY_REQ */ 675 tcr.PRIM_type = T_CAPABILITY_REQ; 676 tcr.CAP_bits1 = cap_bits1; 677 mp = soallocproto1(&tcr, sizeof (tcr), 678 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 679 _ALLOC_INTR, CRED()); 680 if (mp == NULL) { 681 eprintsoline(so, ENOBUFS); 682 return (ENOBUFS); 683 } 684 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 685 DB_TYPE(mp) = M_PCPROTO; 686 687 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 688 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 689 if (error) { 690 eprintsoline(so, error); 691 return (error); 692 } 693 mutex_enter(&so->so_lock); 694 /* Wait for T_CAPABILITY_ACK */ 695 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 696 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 697 mutex_exit(&so->so_lock); 698 PI_PROVLOCK(sti->sti_provinfo); 699 if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) 700 sti->sti_provinfo->tpi_capability = PI_NO; 701 PI_PROVUNLOCK(sti->sti_provinfo); 702 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 703 if (cap_bits1 & TC1_INFO) { 704 /* 705 * If the T_CAPABILITY_REQ timed out and then a 706 * T_INFO_REQ gets a protocol error, most likely 707 * the capability was slow (vs. unsupported). Return 708 * ENOSR for this case as a best guess. 709 */ 710 if (error == ETIME) { 711 return ((error = do_tinfo(so)) == EPROTO ? 712 ENOSR : error); 713 } 714 return (do_tinfo(so)); 715 } 716 return (0); 717 } 718 719 ASSERT(mp); 720 tca = (struct T_capability_ack *)mp->b_rptr; 721 722 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 723 so_proc_tcapability_ack(so, tca); 724 725 cap_bits1 = tca->CAP_bits1; 726 727 mutex_exit(&so->so_lock); 728 freemsg(mp); 729 730 if (cap_bits1 & TC1_INFO) 731 return (check_tinfo(so)); 732 733 return (0); 734 } 735 736 /* 737 * Process a T_CAPABILITY_ACK 738 */ 739 void 740 so_proc_tcapability_ack(struct sonode *so, struct T_capability_ack *tca) 741 { 742 sotpi_info_t *sti = SOTOTPI(so); 743 744 if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) { 745 PI_PROVLOCK(sti->sti_provinfo); 746 sti->sti_provinfo->tpi_capability = PI_YES; 747 PI_PROVUNLOCK(sti->sti_provinfo); 748 } 749 750 if (tca->CAP_bits1 & TC1_ACCEPTOR_ID) { 751 sti->sti_acceptor_id = tca->ACCEPTOR_id; 752 so->so_mode |= SM_ACCEPTOR_ID; 753 } 754 755 if (tca->CAP_bits1 & TC1_INFO) 756 copy_tinfo(so, &tca->INFO_ack); 757 } 758 759 /* 760 * Retrieve socket error, clear error if not peek. 761 */ 762 int 763 sogeterr(struct sonode *so, boolean_t clear_err) 764 { 765 int error; 766 767 ASSERT(MUTEX_HELD(&so->so_lock)); 768 769 error = so->so_error; 770 if (clear_err) 771 so->so_error = 0; 772 773 return (error); 774 } 775 776 /* 777 * This routine is registered with the stream head to retrieve read 778 * side errors. 779 * It does not clear the socket error for a peeking read side operation. 780 * It the error is to be cleared it sets *clearerr. 781 */ 782 int 783 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 784 { 785 struct sonode *so = VTOSO(vp); 786 int error; 787 788 mutex_enter(&so->so_lock); 789 if (ispeek) { 790 error = so->so_error; 791 *clearerr = 0; 792 } else { 793 error = so->so_error; 794 so->so_error = 0; 795 *clearerr = 1; 796 } 797 mutex_exit(&so->so_lock); 798 return (error); 799 } 800 801 /* 802 * This routine is registered with the stream head to retrieve write 803 * side errors. 804 * It does not clear the socket error for a peeking read side operation. 805 * It the error is to be cleared it sets *clearerr. 806 */ 807 int 808 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 809 { 810 struct sonode *so = VTOSO(vp); 811 int error; 812 813 mutex_enter(&so->so_lock); 814 if (so->so_state & SS_CANTSENDMORE) { 815 error = EPIPE; 816 *clearerr = 0; 817 } else { 818 error = so->so_error; 819 if (ispeek) { 820 *clearerr = 0; 821 } else { 822 so->so_error = 0; 823 *clearerr = 1; 824 } 825 } 826 mutex_exit(&so->so_lock); 827 return (error); 828 } 829 830 /* 831 * Set a nonpersistent read and write error on the socket. 832 * Used when there is a T_uderror_ind for a connected socket. 833 * The caller also needs to call strsetrerror and strsetwerror 834 * after dropping the lock. 835 */ 836 void 837 soseterror(struct sonode *so, int error) 838 { 839 ASSERT(error != 0); 840 841 ASSERT(MUTEX_HELD(&so->so_lock)); 842 so->so_error = (ushort_t)error; 843 } 844 845 void 846 soisconnecting(struct sonode *so) 847 { 848 ASSERT(MUTEX_HELD(&so->so_lock)); 849 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 850 so->so_state |= SS_ISCONNECTING; 851 cv_broadcast(&so->so_state_cv); 852 } 853 854 void 855 soisconnected(struct sonode *so) 856 { 857 ASSERT(MUTEX_HELD(&so->so_lock)); 858 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 859 so->so_state |= SS_ISCONNECTED; 860 cv_broadcast(&so->so_state_cv); 861 } 862 863 /* 864 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 865 */ 866 void 867 soisdisconnected(struct sonode *so, int error) 868 { 869 ASSERT(MUTEX_HELD(&so->so_lock)); 870 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 871 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 872 so->so_error = (ushort_t)error; 873 if (so->so_peercred != NULL) { 874 crfree(so->so_peercred); 875 so->so_peercred = NULL; 876 } 877 cv_broadcast(&so->so_state_cv); 878 } 879 880 /* 881 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 882 * Does not affect write side. 883 * The caller also has to call strsetrerror. 884 */ 885 static void 886 sobreakconn(struct sonode *so, int error) 887 { 888 ASSERT(MUTEX_HELD(&so->so_lock)); 889 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 890 so->so_error = (ushort_t)error; 891 cv_broadcast(&so->so_state_cv); 892 } 893 894 /* 895 * Can no longer send. 896 * Caller must also call strsetwerror. 897 * 898 * We mark the peer address as no longer valid for getpeername, but 899 * leave it around for so_unix_close to notify the peer (that 900 * transport has no addressing held at that layer). 901 */ 902 void 903 socantsendmore(struct sonode *so) 904 { 905 ASSERT(MUTEX_HELD(&so->so_lock)); 906 so->so_state |= SS_CANTSENDMORE; 907 cv_broadcast(&so->so_state_cv); 908 } 909 910 /* 911 * The caller must call strseteof(,1) as well as this routine 912 * to change the socket state. 913 */ 914 void 915 socantrcvmore(struct sonode *so) 916 { 917 ASSERT(MUTEX_HELD(&so->so_lock)); 918 so->so_state |= SS_CANTRCVMORE; 919 cv_broadcast(&so->so_state_cv); 920 } 921 922 /* 923 * The caller has sent down a "request_prim" primitive and wants to wait for 924 * an ack ("ack_prim") or an T_ERROR_ACK for it. 925 * The specified "ack_prim" can be a T_OK_ACK. 926 * 927 * Assumes that all the TPI acks are M_PCPROTO messages. 928 * 929 * Note that the socket is single-threaded (using so_lock_single) 930 * for all operations that generate TPI ack messages. Since 931 * only TPI ack messages are M_PCPROTO we should never receive 932 * anything except either the ack we are expecting or a T_ERROR_ACK 933 * for the same primitive. 934 */ 935 int 936 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 937 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 938 { 939 mblk_t *mp; 940 union T_primitives *tpr; 941 int error; 942 943 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 944 (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait)); 945 946 ASSERT(MUTEX_HELD(&so->so_lock)); 947 948 error = sowaitack(so, &mp, wait); 949 if (error) 950 return (error); 951 952 dprintso(so, 1, ("got msg %p\n", (void *)mp)); 953 if (DB_TYPE(mp) != M_PCPROTO || 954 MBLKL(mp) < sizeof (tpr->type)) { 955 freemsg(mp); 956 eprintsoline(so, EPROTO); 957 return (EPROTO); 958 } 959 tpr = (union T_primitives *)mp->b_rptr; 960 /* 961 * Did we get the primitive that we were asking for? 962 * For T_OK_ACK we also check that it matches the request primitive. 963 */ 964 if (tpr->type == ack_prim && 965 (ack_prim != T_OK_ACK || 966 tpr->ok_ack.CORRECT_prim == request_prim)) { 967 if (MBLKL(mp) >= (ssize_t)min_size) { 968 /* Found what we are looking for */ 969 *mpp = mp; 970 return (0); 971 } 972 /* Too short */ 973 freemsg(mp); 974 eprintsoline(so, EPROTO); 975 return (EPROTO); 976 } 977 978 if (tpr->type == T_ERROR_ACK && 979 tpr->error_ack.ERROR_prim == request_prim) { 980 /* Error to the primitive we were looking for */ 981 if (tpr->error_ack.TLI_error == TSYSERR) { 982 error = tpr->error_ack.UNIX_error; 983 } else { 984 error = proto_tlitosyserr(tpr->error_ack.TLI_error); 985 } 986 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 987 tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error, 988 tpr->error_ack.UNIX_error, error)); 989 freemsg(mp); 990 return (error); 991 } 992 /* 993 * Wrong primitive or T_ERROR_ACK for the wrong primitive 994 */ 995 #ifdef DEBUG 996 if (tpr->type == T_ERROR_ACK) { 997 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 998 tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error, 999 tpr->error_ack.UNIX_error)); 1000 } else if (tpr->type == T_OK_ACK) { 1001 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 1002 tpr->ok_ack.CORRECT_prim, ack_prim, request_prim)); 1003 } else { 1004 dprintso(so, 0, 1005 ("unexpected primitive %d, expected %d for %d\n", 1006 tpr->type, ack_prim, request_prim)); 1007 } 1008 #endif /* DEBUG */ 1009 1010 freemsg(mp); 1011 eprintsoline(so, EPROTO); 1012 return (EPROTO); 1013 } 1014 1015 /* 1016 * Wait for a T_OK_ACK for the specified primitive. 1017 */ 1018 int 1019 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1020 { 1021 mblk_t *mp; 1022 int error; 1023 1024 error = sowaitprim(so, request_prim, T_OK_ACK, 1025 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1026 if (error) 1027 return (error); 1028 freemsg(mp); 1029 return (0); 1030 } 1031 1032 /* 1033 * Queue a received TPI ack message on sti_ack_mp. 1034 */ 1035 void 1036 soqueueack(struct sonode *so, mblk_t *mp) 1037 { 1038 sotpi_info_t *sti = SOTOTPI(so); 1039 1040 if (DB_TYPE(mp) != M_PCPROTO) { 1041 zcmn_err(getzoneid(), CE_WARN, 1042 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1043 *(t_scalar_t *)mp->b_rptr); 1044 freemsg(mp); 1045 return; 1046 } 1047 1048 mutex_enter(&so->so_lock); 1049 if (sti->sti_ack_mp != NULL) { 1050 dprintso(so, 1, ("sti_ack_mp already set\n")); 1051 freemsg(sti->sti_ack_mp); 1052 sti->sti_ack_mp = NULL; 1053 } 1054 sti->sti_ack_mp = mp; 1055 cv_broadcast(&sti->sti_ack_cv); 1056 mutex_exit(&so->so_lock); 1057 } 1058 1059 /* 1060 * Wait for a TPI ack ignoring signals and errors. 1061 */ 1062 int 1063 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1064 { 1065 sotpi_info_t *sti = SOTOTPI(so); 1066 1067 ASSERT(MUTEX_HELD(&so->so_lock)); 1068 1069 while (sti->sti_ack_mp == NULL) { 1070 #ifdef SOCK_TEST 1071 if (wait == 0 && sock_test_timelimit != 0) 1072 wait = sock_test_timelimit; 1073 #endif 1074 if (wait != 0) { 1075 /* 1076 * Only wait for the time limit. 1077 */ 1078 if (cv_reltimedwait(&sti->sti_ack_cv, &so->so_lock, 1079 wait, TR_CLOCK_TICK) == -1) { 1080 eprintsoline(so, ETIME); 1081 return (ETIME); 1082 } 1083 } 1084 else 1085 cv_wait(&sti->sti_ack_cv, &so->so_lock); 1086 } 1087 *mpp = sti->sti_ack_mp; 1088 #ifdef DEBUG 1089 { 1090 union T_primitives *tpr; 1091 mblk_t *mp = *mpp; 1092 1093 tpr = (union T_primitives *)mp->b_rptr; 1094 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1095 ASSERT(tpr->type == T_OK_ACK || 1096 tpr->type == T_ERROR_ACK || 1097 tpr->type == T_BIND_ACK || 1098 tpr->type == T_CAPABILITY_ACK || 1099 tpr->type == T_INFO_ACK || 1100 tpr->type == T_OPTMGMT_ACK); 1101 } 1102 #endif /* DEBUG */ 1103 sti->sti_ack_mp = NULL; 1104 return (0); 1105 } 1106 1107 /* 1108 * Queue a received T_CONN_IND message on sti_conn_ind_head/tail. 1109 */ 1110 void 1111 soqueueconnind(struct sonode *so, mblk_t *mp) 1112 { 1113 sotpi_info_t *sti = SOTOTPI(so); 1114 1115 if (DB_TYPE(mp) != M_PROTO) { 1116 zcmn_err(getzoneid(), CE_WARN, 1117 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1118 freemsg(mp); 1119 return; 1120 } 1121 1122 mutex_enter(&so->so_lock); 1123 ASSERT(mp->b_next == NULL); 1124 if (sti->sti_conn_ind_head == NULL) { 1125 sti->sti_conn_ind_head = mp; 1126 } else { 1127 ASSERT(sti->sti_conn_ind_tail->b_next == NULL); 1128 sti->sti_conn_ind_tail->b_next = mp; 1129 } 1130 sti->sti_conn_ind_tail = mp; 1131 /* Wakeup a single consumer of the T_CONN_IND */ 1132 cv_signal(&so->so_acceptq_cv); 1133 mutex_exit(&so->so_lock); 1134 } 1135 1136 /* 1137 * Wait for a T_CONN_IND. 1138 * Don't wait if nonblocking. 1139 * Accept signals and socket errors. 1140 */ 1141 int 1142 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1143 { 1144 mblk_t *mp; 1145 sotpi_info_t *sti = SOTOTPI(so); 1146 int error = 0; 1147 1148 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1149 mutex_enter(&so->so_lock); 1150 check_error: 1151 if (so->so_error) { 1152 error = sogeterr(so, B_TRUE); 1153 if (error) { 1154 mutex_exit(&so->so_lock); 1155 return (error); 1156 } 1157 } 1158 1159 if (sti->sti_conn_ind_head == NULL) { 1160 if (fmode & (FNDELAY|FNONBLOCK)) { 1161 error = EWOULDBLOCK; 1162 goto done; 1163 } 1164 1165 if (so->so_state & SS_CLOSING) { 1166 error = EINTR; 1167 goto done; 1168 } 1169 1170 if (!cv_wait_sig_swap(&so->so_acceptq_cv, &so->so_lock)) { 1171 error = EINTR; 1172 goto done; 1173 } 1174 goto check_error; 1175 } 1176 mp = sti->sti_conn_ind_head; 1177 sti->sti_conn_ind_head = mp->b_next; 1178 mp->b_next = NULL; 1179 if (sti->sti_conn_ind_head == NULL) { 1180 ASSERT(sti->sti_conn_ind_tail == mp); 1181 sti->sti_conn_ind_tail = NULL; 1182 } 1183 *mpp = mp; 1184 done: 1185 mutex_exit(&so->so_lock); 1186 return (error); 1187 } 1188 1189 /* 1190 * Flush a T_CONN_IND matching the sequence number from the list. 1191 * Return zero if found; non-zero otherwise. 1192 * This is called very infrequently thus it is ok to do a linear search. 1193 */ 1194 int 1195 soflushconnind(struct sonode *so, t_scalar_t seqno) 1196 { 1197 mblk_t *prevmp, *mp; 1198 struct T_conn_ind *tci; 1199 sotpi_info_t *sti = SOTOTPI(so); 1200 1201 mutex_enter(&so->so_lock); 1202 for (prevmp = NULL, mp = sti->sti_conn_ind_head; mp != NULL; 1203 prevmp = mp, mp = mp->b_next) { 1204 tci = (struct T_conn_ind *)mp->b_rptr; 1205 if (tci->SEQ_number == seqno) { 1206 dprintso(so, 1, 1207 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1208 /* Deleting last? */ 1209 if (sti->sti_conn_ind_tail == mp) { 1210 sti->sti_conn_ind_tail = prevmp; 1211 } 1212 if (prevmp == NULL) { 1213 /* Deleting first */ 1214 sti->sti_conn_ind_head = mp->b_next; 1215 } else { 1216 prevmp->b_next = mp->b_next; 1217 } 1218 mp->b_next = NULL; 1219 1220 ASSERT((sti->sti_conn_ind_head == NULL && 1221 sti->sti_conn_ind_tail == NULL) || 1222 (sti->sti_conn_ind_head != NULL && 1223 sti->sti_conn_ind_tail != NULL)); 1224 1225 so->so_error = ECONNABORTED; 1226 mutex_exit(&so->so_lock); 1227 1228 freemsg(mp); 1229 return (0); 1230 } 1231 } 1232 mutex_exit(&so->so_lock); 1233 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1234 return (-1); 1235 } 1236 1237 /* 1238 * Wait until the socket is connected or there is an error. 1239 * fmode should contain any nonblocking flags. nosig should be 1240 * set if the caller does not want the wait to be interrupted by a signal. 1241 */ 1242 int 1243 sowaitconnected(struct sonode *so, int fmode, int nosig) 1244 { 1245 int error; 1246 1247 ASSERT(MUTEX_HELD(&so->so_lock)); 1248 1249 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1250 SS_ISCONNECTING && so->so_error == 0) { 1251 1252 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", 1253 (void *)so)); 1254 if (fmode & (FNDELAY|FNONBLOCK)) 1255 return (EINPROGRESS); 1256 1257 if (so->so_state & SS_CLOSING) 1258 return (EINTR); 1259 1260 if (nosig) 1261 cv_wait(&so->so_state_cv, &so->so_lock); 1262 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1263 /* 1264 * Return EINTR and let the application use 1265 * nonblocking techniques for detecting when 1266 * the connection has been established. 1267 */ 1268 return (EINTR); 1269 } 1270 dprintso(so, 1, ("awoken on %p\n", (void *)so)); 1271 } 1272 1273 if (so->so_error != 0) { 1274 error = sogeterr(so, B_TRUE); 1275 ASSERT(error != 0); 1276 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1277 return (error); 1278 } 1279 if (!(so->so_state & SS_ISCONNECTED)) { 1280 /* 1281 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1282 * zero errno. Or another thread could have consumed so_error 1283 * e.g. by calling read. 1284 */ 1285 error = ECONNREFUSED; 1286 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1287 return (error); 1288 } 1289 return (0); 1290 } 1291 1292 1293 /* 1294 * Handle the signal generation aspect of urgent data. 1295 */ 1296 static void 1297 so_oob_sig(struct sonode *so, int extrasig, 1298 strsigset_t *signals, strpollset_t *pollwakeups) 1299 { 1300 sotpi_info_t *sti = SOTOTPI(so); 1301 1302 ASSERT(MUTEX_HELD(&so->so_lock)); 1303 1304 ASSERT(so_verify_oobstate(so)); 1305 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1306 if (sti->sti_oobsigcnt > sti->sti_oobcnt) { 1307 /* 1308 * Signal has already been generated once for this 1309 * urgent "event". However, since TCP can receive updated 1310 * urgent pointers we still generate a signal. 1311 */ 1312 ASSERT(so->so_state & SS_OOBPEND); 1313 if (extrasig) { 1314 *signals |= S_RDBAND; 1315 *pollwakeups |= POLLRDBAND; 1316 } 1317 return; 1318 } 1319 1320 sti->sti_oobsigcnt++; 1321 ASSERT(sti->sti_oobsigcnt > 0); /* Wraparound */ 1322 ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt); 1323 1324 /* 1325 * Record (for select/poll) that urgent data is pending. 1326 */ 1327 so->so_state |= SS_OOBPEND; 1328 /* 1329 * New urgent data on the way so forget about any old 1330 * urgent data. 1331 */ 1332 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1333 if (so->so_oobmsg != NULL) { 1334 dprintso(so, 1, ("sock: discarding old oob\n")); 1335 freemsg(so->so_oobmsg); 1336 so->so_oobmsg = NULL; 1337 } 1338 *signals |= S_RDBAND; 1339 *pollwakeups |= POLLRDBAND; 1340 ASSERT(so_verify_oobstate(so)); 1341 } 1342 1343 /* 1344 * Handle the processing of the T_EXDATA_IND with urgent data. 1345 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1346 */ 1347 /* ARGSUSED2 */ 1348 static mblk_t * 1349 so_oob_exdata(struct sonode *so, mblk_t *mp, 1350 strsigset_t *signals, strpollset_t *pollwakeups) 1351 { 1352 sotpi_info_t *sti = SOTOTPI(so); 1353 1354 ASSERT(MUTEX_HELD(&so->so_lock)); 1355 1356 ASSERT(so_verify_oobstate(so)); 1357 1358 ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt); 1359 1360 sti->sti_oobcnt++; 1361 ASSERT(sti->sti_oobcnt > 0); /* wraparound? */ 1362 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1363 1364 /* 1365 * Set MSGMARK for SIOCATMARK. 1366 */ 1367 mp->b_flag |= MSGMARK; 1368 1369 ASSERT(so_verify_oobstate(so)); 1370 return (mp); 1371 } 1372 1373 /* 1374 * Handle the processing of the actual urgent data. 1375 * Returns the data mblk if it should be queued on the read queue. 1376 */ 1377 static mblk_t * 1378 so_oob_data(struct sonode *so, mblk_t *mp, 1379 strsigset_t *signals, strpollset_t *pollwakeups) 1380 { 1381 sotpi_info_t *sti = SOTOTPI(so); 1382 1383 ASSERT(MUTEX_HELD(&so->so_lock)); 1384 1385 ASSERT(so_verify_oobstate(so)); 1386 1387 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 1388 ASSERT(mp != NULL); 1389 /* 1390 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1391 * Otherwise we store it in so_oobmsg. 1392 */ 1393 ASSERT(so->so_oobmsg == NULL); 1394 if (so->so_options & SO_OOBINLINE) { 1395 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1396 *signals |= S_INPUT | S_RDNORM; 1397 } else { 1398 *pollwakeups |= POLLRDBAND; 1399 so->so_state |= SS_HAVEOOBDATA; 1400 so->so_oobmsg = mp; 1401 mp = NULL; 1402 } 1403 ASSERT(so_verify_oobstate(so)); 1404 return (mp); 1405 } 1406 1407 /* 1408 * Caller must hold the mutex. 1409 * For delayed processing, save the T_DISCON_IND received 1410 * from below on sti_discon_ind_mp. 1411 * When the message is processed the framework will call: 1412 * (*func)(so, mp); 1413 */ 1414 static void 1415 so_save_discon_ind(struct sonode *so, 1416 mblk_t *mp, 1417 void (*func)(struct sonode *so, mblk_t *)) 1418 { 1419 sotpi_info_t *sti = SOTOTPI(so); 1420 1421 ASSERT(MUTEX_HELD(&so->so_lock)); 1422 1423 /* 1424 * Discard new T_DISCON_IND if we have already received another. 1425 * Currently the earlier message can either be on sti_discon_ind_mp 1426 * or being processed. 1427 */ 1428 if (sti->sti_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1429 zcmn_err(getzoneid(), CE_WARN, 1430 "sockfs: received unexpected additional T_DISCON_IND\n"); 1431 freemsg(mp); 1432 return; 1433 } 1434 mp->b_prev = (mblk_t *)func; 1435 mp->b_next = NULL; 1436 sti->sti_discon_ind_mp = mp; 1437 } 1438 1439 /* 1440 * Caller must hold the mutex and make sure that either SOLOCKED 1441 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1442 * Perform delayed processing of T_DISCON_IND message on sti_discon_ind_mp. 1443 * Need to ensure that strsock_proto() will not end up sleeping for 1444 * SOASYNC_UNBIND, while executing this function. 1445 */ 1446 void 1447 so_drain_discon_ind(struct sonode *so) 1448 { 1449 mblk_t *bp; 1450 void (*func)(struct sonode *so, mblk_t *); 1451 sotpi_info_t *sti = SOTOTPI(so); 1452 1453 ASSERT(MUTEX_HELD(&so->so_lock)); 1454 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1455 1456 /* Process T_DISCON_IND on sti_discon_ind_mp */ 1457 if ((bp = sti->sti_discon_ind_mp) != NULL) { 1458 sti->sti_discon_ind_mp = NULL; 1459 func = (void (*)())bp->b_prev; 1460 bp->b_prev = NULL; 1461 1462 /* 1463 * This (*func) is supposed to generate a message downstream 1464 * and we need to have a flag set until the corresponding 1465 * upstream message reaches stream head. 1466 * When processing T_DISCON_IND in strsock_discon_ind 1467 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1468 * drop the flag after we get the ACK in strsock_proto. 1469 */ 1470 (void) (*func)(so, bp); 1471 } 1472 } 1473 1474 /* 1475 * Caller must hold the mutex. 1476 * Remove the T_DISCON_IND on sti_discon_ind_mp. 1477 */ 1478 void 1479 so_flush_discon_ind(struct sonode *so) 1480 { 1481 mblk_t *bp; 1482 sotpi_info_t *sti = SOTOTPI(so); 1483 1484 ASSERT(MUTEX_HELD(&so->so_lock)); 1485 1486 /* 1487 * Remove T_DISCON_IND mblk at sti_discon_ind_mp. 1488 */ 1489 if ((bp = sti->sti_discon_ind_mp) != NULL) { 1490 sti->sti_discon_ind_mp = NULL; 1491 bp->b_prev = NULL; 1492 freemsg(bp); 1493 } 1494 } 1495 1496 /* 1497 * Caller must hold the mutex. 1498 * 1499 * This function is used to process the T_DISCON_IND message. It does 1500 * immediate processing when called from strsock_proto and delayed 1501 * processing of discon_ind saved on sti_discon_ind_mp when called from 1502 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1503 * sti_discon_ind_mp for delayed processing, this function is registered 1504 * as the callback function to process the message. 1505 * 1506 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1507 * unbind operation, and should be released only after we receive the ACK 1508 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1509 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1510 * sent from either this function or tcp_unbind(), flushing away any TPI 1511 * message that is being sent down and stays in a lower module's queue. 1512 * 1513 * This function drops so_lock and grabs it again. 1514 */ 1515 static void 1516 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1517 { 1518 struct vnode *vp; 1519 struct stdata *stp; 1520 union T_primitives *tpr; 1521 struct T_unbind_req *ubr; 1522 mblk_t *mp; 1523 int error; 1524 sotpi_info_t *sti = SOTOTPI(so); 1525 1526 ASSERT(MUTEX_HELD(&so->so_lock)); 1527 ASSERT(discon_mp); 1528 ASSERT(discon_mp->b_rptr); 1529 1530 tpr = (union T_primitives *)discon_mp->b_rptr; 1531 ASSERT(tpr->type == T_DISCON_IND); 1532 1533 vp = SOTOV(so); 1534 stp = vp->v_stream; 1535 ASSERT(stp); 1536 1537 /* 1538 * Not a listener 1539 */ 1540 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1541 1542 /* 1543 * This assumes that the name space for DISCON_reason 1544 * is the errno name space. 1545 */ 1546 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1547 sti->sti_laddr_valid = 0; 1548 sti->sti_faddr_valid = 0; 1549 1550 /* 1551 * Unbind with the transport without blocking. 1552 * If we've already received a T_DISCON_IND do not unbind. 1553 * 1554 * If there is no preallocated unbind message, we have already 1555 * unbound with the transport 1556 * 1557 * If the socket is not bound, no need to unbind. 1558 */ 1559 mp = sti->sti_unbind_mp; 1560 if (mp == NULL) { 1561 ASSERT(!(so->so_state & SS_ISBOUND)); 1562 mutex_exit(&so->so_lock); 1563 } else if (!(so->so_state & SS_ISBOUND)) { 1564 mutex_exit(&so->so_lock); 1565 } else { 1566 sti->sti_unbind_mp = NULL; 1567 1568 /* 1569 * Is another T_DISCON_IND being processed. 1570 */ 1571 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1572 1573 /* 1574 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1575 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1576 * only after we receive the ACK in strsock_proto. 1577 */ 1578 so->so_flag |= SOASYNC_UNBIND; 1579 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1580 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1581 sti->sti_laddr_valid = 0; 1582 mutex_exit(&so->so_lock); 1583 1584 /* 1585 * Send down T_UNBIND_REQ ignoring flow control. 1586 * XXX Assumes that MSG_IGNFLOW implies that this thread 1587 * does not run service procedures. 1588 */ 1589 ASSERT(DB_TYPE(mp) == M_PROTO); 1590 ubr = (struct T_unbind_req *)mp->b_rptr; 1591 mp->b_wptr += sizeof (*ubr); 1592 ubr->PRIM_type = T_UNBIND_REQ; 1593 1594 /* 1595 * Flush the read and write side (except stream head read queue) 1596 * and send down T_UNBIND_REQ. 1597 */ 1598 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1599 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1600 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1601 /* LINTED - warning: statement has no consequent: if */ 1602 if (error) { 1603 eprintsoline(so, error); 1604 } 1605 } 1606 1607 if (tpr->discon_ind.DISCON_reason != 0) 1608 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1609 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1610 strseteof(SOTOV(so), 1); 1611 /* 1612 * strseteof takes care of read side wakeups, 1613 * pollwakeups, and signals. 1614 */ 1615 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1616 freemsg(discon_mp); 1617 1618 1619 pollwakeup(&stp->sd_pollist, POLLOUT); 1620 mutex_enter(&stp->sd_lock); 1621 1622 /* 1623 * Wake sleeping write 1624 */ 1625 if (stp->sd_flag & WSLEEP) { 1626 stp->sd_flag &= ~WSLEEP; 1627 cv_broadcast(&stp->sd_wrq->q_wait); 1628 } 1629 1630 /* 1631 * strsendsig can handle multiple signals with a 1632 * single call. Send SIGPOLL for S_OUTPUT event. 1633 */ 1634 if (stp->sd_sigflags & S_OUTPUT) 1635 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1636 1637 mutex_exit(&stp->sd_lock); 1638 mutex_enter(&so->so_lock); 1639 } 1640 1641 /* 1642 * This routine is registered with the stream head to receive M_PROTO 1643 * and M_PCPROTO messages. 1644 * 1645 * Returns NULL if the message was consumed. 1646 * Returns an mblk to make that mblk be processed (and queued) by the stream 1647 * head. 1648 * 1649 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1650 * *pollwakeups) for the stream head to take action on. Note that since 1651 * sockets always deliver SIGIO for every new piece of data this routine 1652 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1653 * 1654 * This routine handles all data related TPI messages independent of 1655 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1656 * arrive on a SOCK_STREAM. 1657 */ 1658 static mblk_t * 1659 strsock_proto(vnode_t *vp, mblk_t *mp, 1660 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1661 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1662 { 1663 union T_primitives *tpr; 1664 struct sonode *so; 1665 sotpi_info_t *sti; 1666 uint32_t auditing = AU_AUDITING(); 1667 1668 so = VTOSO(vp); 1669 sti = SOTOTPI(so); 1670 1671 dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp)); 1672 1673 /* Set default return values */ 1674 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1675 1676 ASSERT(DB_TYPE(mp) == M_PROTO || 1677 DB_TYPE(mp) == M_PCPROTO); 1678 1679 if (MBLKL(mp) < sizeof (tpr->type)) { 1680 /* The message is too short to even contain the primitive */ 1681 zcmn_err(getzoneid(), CE_WARN, 1682 "sockfs: Too short TPI message received. Len = %ld\n", 1683 (ptrdiff_t)(MBLKL(mp))); 1684 freemsg(mp); 1685 return (NULL); 1686 } 1687 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1688 /* The read pointer is not aligned correctly for TPI */ 1689 zcmn_err(getzoneid(), CE_WARN, 1690 "sockfs: Unaligned TPI message received. rptr = %p\n", 1691 (void *)mp->b_rptr); 1692 freemsg(mp); 1693 return (NULL); 1694 } 1695 tpr = (union T_primitives *)mp->b_rptr; 1696 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1697 1698 switch (tpr->type) { 1699 1700 case T_DATA_IND: 1701 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1702 zcmn_err(getzoneid(), CE_WARN, 1703 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1704 (ptrdiff_t)(MBLKL(mp))); 1705 freemsg(mp); 1706 return (NULL); 1707 } 1708 /* 1709 * Ignore zero-length T_DATA_IND messages. These might be 1710 * generated by some transports. 1711 * This is needed to prevent read (which skips the M_PROTO 1712 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1713 * on a non-blocking socket after select/poll has indicated 1714 * that data is available). 1715 */ 1716 if (msgdsize(mp->b_cont) == 0) { 1717 dprintso(so, 0, 1718 ("strsock_proto: zero length T_DATA_IND\n")); 1719 freemsg(mp); 1720 return (NULL); 1721 } 1722 *allmsgsigs = S_INPUT | S_RDNORM; 1723 *pollwakeups = POLLIN | POLLRDNORM; 1724 *wakeups = RSLEEP; 1725 return (mp); 1726 1727 case T_UNITDATA_IND: { 1728 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1729 void *addr; 1730 t_uscalar_t addrlen; 1731 1732 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1733 zcmn_err(getzoneid(), CE_WARN, 1734 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1735 (ptrdiff_t)(MBLKL(mp))); 1736 freemsg(mp); 1737 return (NULL); 1738 } 1739 1740 /* Is this is not a connected datagram socket? */ 1741 if ((so->so_mode & SM_CONNREQUIRED) || 1742 !(so->so_state & SS_ISCONNECTED)) { 1743 /* 1744 * Not a connected datagram socket. Look for 1745 * the SO_UNIX_CLOSE option. If such an option is found 1746 * discard the message (since it has no meaning 1747 * unless connected). 1748 */ 1749 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1750 tudi->OPT_length != 0) { 1751 void *opt; 1752 t_uscalar_t optlen = tudi->OPT_length; 1753 1754 opt = sogetoff(mp, tudi->OPT_offset, 1755 optlen, __TPI_ALIGN_SIZE); 1756 if (opt == NULL) { 1757 /* The len/off falls outside mp */ 1758 freemsg(mp); 1759 mutex_enter(&so->so_lock); 1760 soseterror(so, EPROTO); 1761 mutex_exit(&so->so_lock); 1762 zcmn_err(getzoneid(), CE_WARN, 1763 "sockfs: T_unidata_ind with " 1764 "invalid optlen/offset %u/%d\n", 1765 optlen, tudi->OPT_offset); 1766 return (NULL); 1767 } 1768 if (so_getopt_unix_close(opt, optlen)) { 1769 freemsg(mp); 1770 return (NULL); 1771 } 1772 } 1773 *allmsgsigs = S_INPUT | S_RDNORM; 1774 *pollwakeups = POLLIN | POLLRDNORM; 1775 *wakeups = RSLEEP; 1776 if (auditing) 1777 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1778 mp, 0); 1779 return (mp); 1780 } 1781 1782 /* 1783 * A connect datagram socket. For AF_INET{,6} we verify that 1784 * the source address matches the "connected to" address. 1785 * The semantics of AF_UNIX sockets is to not verify 1786 * the source address. 1787 * Note that this source address verification is transport 1788 * specific. Thus the real fix would be to extent TPI 1789 * to allow T_CONN_REQ messages to be send to connectionless 1790 * transport providers and always let the transport provider 1791 * do whatever filtering is needed. 1792 * 1793 * The verification/filtering semantics for transports 1794 * other than AF_INET and AF_UNIX are unknown. The choice 1795 * would be to either filter using bcmp or let all messages 1796 * get through. This code does not filter other address 1797 * families since this at least allows the application to 1798 * work around any missing filtering. 1799 * 1800 * XXX Should we move filtering to UDP/ICMP??? 1801 * That would require passing e.g. a T_DISCON_REQ to UDP 1802 * when the socket becomes unconnected. 1803 */ 1804 addrlen = tudi->SRC_length; 1805 /* 1806 * The alignment restriction is really to strict but 1807 * we want enough alignment to inspect the fields of 1808 * a sockaddr_in. 1809 */ 1810 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1811 __TPI_ALIGN_SIZE); 1812 if (addr == NULL) { 1813 freemsg(mp); 1814 mutex_enter(&so->so_lock); 1815 soseterror(so, EPROTO); 1816 mutex_exit(&so->so_lock); 1817 zcmn_err(getzoneid(), CE_WARN, 1818 "sockfs: T_unidata_ind with invalid " 1819 "addrlen/offset %u/%d\n", 1820 addrlen, tudi->SRC_offset); 1821 return (NULL); 1822 } 1823 1824 if (so->so_family == AF_INET) { 1825 /* 1826 * For AF_INET we allow wildcarding both sin_addr 1827 * and sin_port. 1828 */ 1829 struct sockaddr_in *faddr, *sin; 1830 1831 /* Prevent sti_faddr_sa from changing while accessed */ 1832 mutex_enter(&so->so_lock); 1833 ASSERT(sti->sti_faddr_len == 1834 (socklen_t)sizeof (struct sockaddr_in)); 1835 faddr = (struct sockaddr_in *)sti->sti_faddr_sa; 1836 sin = (struct sockaddr_in *)addr; 1837 if (addrlen != 1838 (t_uscalar_t)sizeof (struct sockaddr_in) || 1839 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1840 faddr->sin_addr.s_addr != INADDR_ANY) || 1841 (so->so_type != SOCK_RAW && 1842 sin->sin_port != faddr->sin_port && 1843 faddr->sin_port != 0)) { 1844 #ifdef DEBUG 1845 dprintso(so, 0, 1846 ("sockfs: T_UNITDATA_IND mismatch: %s", 1847 pr_addr(so->so_family, 1848 (struct sockaddr *)addr, addrlen))); 1849 dprintso(so, 0, (" - %s\n", 1850 pr_addr(so->so_family, sti->sti_faddr_sa, 1851 (t_uscalar_t)sti->sti_faddr_len))); 1852 #endif /* DEBUG */ 1853 mutex_exit(&so->so_lock); 1854 freemsg(mp); 1855 return (NULL); 1856 } 1857 mutex_exit(&so->so_lock); 1858 } else if (so->so_family == AF_INET6) { 1859 /* 1860 * For AF_INET6 we allow wildcarding both sin6_addr 1861 * and sin6_port. 1862 */ 1863 struct sockaddr_in6 *faddr6, *sin6; 1864 static struct in6_addr zeroes; /* inits to all zeros */ 1865 1866 /* Prevent sti_faddr_sa from changing while accessed */ 1867 mutex_enter(&so->so_lock); 1868 ASSERT(sti->sti_faddr_len == 1869 (socklen_t)sizeof (struct sockaddr_in6)); 1870 faddr6 = (struct sockaddr_in6 *)sti->sti_faddr_sa; 1871 sin6 = (struct sockaddr_in6 *)addr; 1872 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1873 if (addrlen != 1874 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1875 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1876 &faddr6->sin6_addr) && 1877 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1878 (so->so_type != SOCK_RAW && 1879 sin6->sin6_port != faddr6->sin6_port && 1880 faddr6->sin6_port != 0)) { 1881 #ifdef DEBUG 1882 dprintso(so, 0, 1883 ("sockfs: T_UNITDATA_IND mismatch: %s", 1884 pr_addr(so->so_family, 1885 (struct sockaddr *)addr, addrlen))); 1886 dprintso(so, 0, (" - %s\n", 1887 pr_addr(so->so_family, sti->sti_faddr_sa, 1888 (t_uscalar_t)sti->sti_faddr_len))); 1889 #endif /* DEBUG */ 1890 mutex_exit(&so->so_lock); 1891 freemsg(mp); 1892 return (NULL); 1893 } 1894 mutex_exit(&so->so_lock); 1895 } else if (so->so_family == AF_UNIX && 1896 msgdsize(mp->b_cont) == 0 && 1897 tudi->OPT_length != 0) { 1898 /* 1899 * Attempt to extract AF_UNIX 1900 * SO_UNIX_CLOSE indication from options. 1901 */ 1902 void *opt; 1903 t_uscalar_t optlen = tudi->OPT_length; 1904 1905 opt = sogetoff(mp, tudi->OPT_offset, 1906 optlen, __TPI_ALIGN_SIZE); 1907 if (opt == NULL) { 1908 /* The len/off falls outside mp */ 1909 freemsg(mp); 1910 mutex_enter(&so->so_lock); 1911 soseterror(so, EPROTO); 1912 mutex_exit(&so->so_lock); 1913 zcmn_err(getzoneid(), CE_WARN, 1914 "sockfs: T_unidata_ind with invalid " 1915 "optlen/offset %u/%d\n", 1916 optlen, tudi->OPT_offset); 1917 return (NULL); 1918 } 1919 /* 1920 * If we received a unix close indication mark the 1921 * socket and discard this message. 1922 */ 1923 if (so_getopt_unix_close(opt, optlen)) { 1924 mutex_enter(&so->so_lock); 1925 sobreakconn(so, ECONNRESET); 1926 mutex_exit(&so->so_lock); 1927 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1928 freemsg(mp); 1929 *pollwakeups = POLLIN | POLLRDNORM; 1930 *allmsgsigs = S_INPUT | S_RDNORM; 1931 *wakeups = RSLEEP; 1932 return (NULL); 1933 } 1934 } 1935 *allmsgsigs = S_INPUT | S_RDNORM; 1936 *pollwakeups = POLLIN | POLLRDNORM; 1937 *wakeups = RSLEEP; 1938 return (mp); 1939 } 1940 1941 case T_OPTDATA_IND: { 1942 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1943 1944 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1945 zcmn_err(getzoneid(), CE_WARN, 1946 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1947 (ptrdiff_t)(MBLKL(mp))); 1948 freemsg(mp); 1949 return (NULL); 1950 } 1951 /* 1952 * Allow zero-length messages carrying options. 1953 * This is used when carrying the SO_UNIX_CLOSE option. 1954 */ 1955 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 1956 tdi->OPT_length != 0) { 1957 /* 1958 * Attempt to extract AF_UNIX close indication 1959 * from the options. Ignore any other options - 1960 * those are handled once the message is removed 1961 * from the queue. 1962 * The close indication message should not carry data. 1963 */ 1964 void *opt; 1965 t_uscalar_t optlen = tdi->OPT_length; 1966 1967 opt = sogetoff(mp, tdi->OPT_offset, 1968 optlen, __TPI_ALIGN_SIZE); 1969 if (opt == NULL) { 1970 /* The len/off falls outside mp */ 1971 freemsg(mp); 1972 mutex_enter(&so->so_lock); 1973 soseterror(so, EPROTO); 1974 mutex_exit(&so->so_lock); 1975 zcmn_err(getzoneid(), CE_WARN, 1976 "sockfs: T_optdata_ind with invalid " 1977 "optlen/offset %u/%d\n", 1978 optlen, tdi->OPT_offset); 1979 return (NULL); 1980 } 1981 /* 1982 * If we received a close indication mark the 1983 * socket and discard this message. 1984 */ 1985 if (so_getopt_unix_close(opt, optlen)) { 1986 mutex_enter(&so->so_lock); 1987 socantsendmore(so); 1988 sti->sti_faddr_valid = 0; 1989 mutex_exit(&so->so_lock); 1990 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1991 freemsg(mp); 1992 return (NULL); 1993 } 1994 } 1995 *allmsgsigs = S_INPUT | S_RDNORM; 1996 *pollwakeups = POLLIN | POLLRDNORM; 1997 *wakeups = RSLEEP; 1998 return (mp); 1999 } 2000 2001 case T_EXDATA_IND: { 2002 mblk_t *mctl, *mdata; 2003 mblk_t *lbp; 2004 union T_primitives *tprp; 2005 struct stdata *stp; 2006 queue_t *qp; 2007 2008 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 2009 zcmn_err(getzoneid(), CE_WARN, 2010 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 2011 (ptrdiff_t)(MBLKL(mp))); 2012 freemsg(mp); 2013 return (NULL); 2014 } 2015 /* 2016 * Ignore zero-length T_EXDATA_IND messages. These might be 2017 * generated by some transports. 2018 * 2019 * This is needed to prevent read (which skips the M_PROTO 2020 * part) to unexpectedly return 0 (or return EWOULDBLOCK 2021 * on a non-blocking socket after select/poll has indicated 2022 * that data is available). 2023 */ 2024 dprintso(so, 1, 2025 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 2026 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2027 pr_state(so->so_state, so->so_mode))); 2028 2029 if (msgdsize(mp->b_cont) == 0) { 2030 dprintso(so, 0, 2031 ("strsock_proto: zero length T_EXDATA_IND\n")); 2032 freemsg(mp); 2033 return (NULL); 2034 } 2035 2036 /* 2037 * Split into the T_EXDATA_IND and the M_DATA part. 2038 * We process these three pieces separately: 2039 * signal generation 2040 * handling T_EXDATA_IND 2041 * handling M_DATA component 2042 */ 2043 mctl = mp; 2044 mdata = mctl->b_cont; 2045 mctl->b_cont = NULL; 2046 mutex_enter(&so->so_lock); 2047 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2048 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2049 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2050 2051 stp = vp->v_stream; 2052 ASSERT(stp != NULL); 2053 qp = _RD(stp->sd_wrq); 2054 2055 mutex_enter(QLOCK(qp)); 2056 lbp = qp->q_last; 2057 2058 /* 2059 * We want to avoid queueing up a string of T_EXDATA_IND 2060 * messages with no intervening data messages at the stream 2061 * head. These messages contribute to the total message 2062 * count. Eventually this can lead to STREAMS flow contol 2063 * and also cause TCP to advertise a zero window condition 2064 * to the peer. This can happen in the degenerate case where 2065 * the sender and receiver exchange only OOB data. The sender 2066 * only sends messages with MSG_OOB flag and the receiver 2067 * receives only MSG_OOB messages and does not use SO_OOBINLINE. 2068 * An example of this scenario has been reported in applications 2069 * that use OOB data to exchange heart beats. Flow control 2070 * relief will never happen if the application only reads OOB 2071 * data which is done directly by sorecvoob() and the 2072 * T_EXDATA_IND messages at the streamhead won't be consumed. 2073 * Note that there is no correctness issue in compressing the 2074 * string of T_EXDATA_IND messages into a single T_EXDATA_IND 2075 * message. A single read that does not specify MSG_OOB will 2076 * read across all the marks in a loop in sotpi_recvmsg(). 2077 * Each mark is individually distinguishable only if the 2078 * T_EXDATA_IND messages are separated by data messages. 2079 */ 2080 if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) { 2081 tprp = (union T_primitives *)lbp->b_rptr; 2082 if ((tprp->type == T_EXDATA_IND) && 2083 !(so->so_options & SO_OOBINLINE)) { 2084 2085 /* 2086 * free the new M_PROTO message 2087 */ 2088 freemsg(mctl); 2089 2090 /* 2091 * adjust the OOB count and OOB signal count 2092 * just incremented for the new OOB data. 2093 */ 2094 sti->sti_oobcnt--; 2095 sti->sti_oobsigcnt--; 2096 mutex_exit(QLOCK(qp)); 2097 mutex_exit(&so->so_lock); 2098 return (NULL); 2099 } 2100 } 2101 mutex_exit(QLOCK(qp)); 2102 2103 /* 2104 * Pass the T_EXDATA_IND and the M_DATA back separately 2105 * by using b_next linkage. (The stream head will queue any 2106 * b_next linked messages separately.) This is needed 2107 * since MSGMARK applies to the last by of the message 2108 * hence we can not have any M_DATA component attached 2109 * to the marked T_EXDATA_IND. Note that the stream head 2110 * will not consolidate M_DATA messages onto an MSGMARK'ed 2111 * message in order to preserve the constraint that 2112 * the T_EXDATA_IND always is a separate message. 2113 */ 2114 ASSERT(mctl != NULL); 2115 mctl->b_next = mdata; 2116 mp = mctl; 2117 #ifdef DEBUG 2118 if (mdata == NULL) { 2119 dprintso(so, 1, 2120 ("after outofline T_EXDATA_IND(%p): " 2121 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2122 (void *)vp, sti->sti_oobsigcnt, 2123 sti->sti_oobcnt, *pollwakeups, *allmsgsigs, 2124 pr_state(so->so_state, so->so_mode))); 2125 } else { 2126 dprintso(so, 1, 2127 ("after inline T_EXDATA_IND(%p): " 2128 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2129 (void *)vp, sti->sti_oobsigcnt, 2130 sti->sti_oobcnt, *pollwakeups, *allmsgsigs, 2131 pr_state(so->so_state, so->so_mode))); 2132 } 2133 #endif /* DEBUG */ 2134 mutex_exit(&so->so_lock); 2135 *wakeups = RSLEEP; 2136 return (mp); 2137 } 2138 2139 case T_CONN_CON: { 2140 struct T_conn_con *conn_con; 2141 void *addr; 2142 t_uscalar_t addrlen; 2143 2144 /* 2145 * Verify the state, update the state to ISCONNECTED, 2146 * record the potentially new address in the message, 2147 * and drop the message. 2148 */ 2149 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2150 zcmn_err(getzoneid(), CE_WARN, 2151 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2152 (ptrdiff_t)(MBLKL(mp))); 2153 freemsg(mp); 2154 return (NULL); 2155 } 2156 2157 mutex_enter(&so->so_lock); 2158 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2159 SS_ISCONNECTING) { 2160 mutex_exit(&so->so_lock); 2161 dprintso(so, 1, 2162 ("T_CONN_CON: state %x\n", so->so_state)); 2163 freemsg(mp); 2164 return (NULL); 2165 } 2166 2167 conn_con = &tpr->conn_con; 2168 addrlen = conn_con->RES_length; 2169 /* 2170 * Allow the address to be of different size than sent down 2171 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2172 * For AF_UNIX require the identical length. 2173 */ 2174 if (so->so_family == AF_UNIX ? 2175 addrlen != (t_uscalar_t)sizeof (sti->sti_ux_laddr) : 2176 addrlen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2177 zcmn_err(getzoneid(), CE_WARN, 2178 "sockfs: T_conn_con with different " 2179 "length %u/%d\n", 2180 addrlen, conn_con->RES_length); 2181 soisdisconnected(so, EPROTO); 2182 sti->sti_laddr_valid = 0; 2183 sti->sti_faddr_valid = 0; 2184 mutex_exit(&so->so_lock); 2185 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2186 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2187 strseteof(SOTOV(so), 1); 2188 freemsg(mp); 2189 /* 2190 * strseteof takes care of read side wakeups, 2191 * pollwakeups, and signals. 2192 */ 2193 *wakeups = WSLEEP; 2194 *allmsgsigs = S_OUTPUT; 2195 *pollwakeups = POLLOUT; 2196 return (NULL); 2197 } 2198 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2199 if (addr == NULL) { 2200 zcmn_err(getzoneid(), CE_WARN, 2201 "sockfs: T_conn_con with invalid " 2202 "addrlen/offset %u/%d\n", 2203 addrlen, conn_con->RES_offset); 2204 mutex_exit(&so->so_lock); 2205 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2206 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2207 strseteof(SOTOV(so), 1); 2208 freemsg(mp); 2209 /* 2210 * strseteof takes care of read side wakeups, 2211 * pollwakeups, and signals. 2212 */ 2213 *wakeups = WSLEEP; 2214 *allmsgsigs = S_OUTPUT; 2215 *pollwakeups = POLLOUT; 2216 return (NULL); 2217 } 2218 2219 /* 2220 * Save for getpeername. 2221 */ 2222 if (so->so_family != AF_UNIX) { 2223 sti->sti_faddr_len = (socklen_t)addrlen; 2224 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2225 bcopy(addr, sti->sti_faddr_sa, addrlen); 2226 sti->sti_faddr_valid = 1; 2227 } 2228 2229 if (so->so_peercred != NULL) 2230 crfree(so->so_peercred); 2231 so->so_peercred = msg_getcred(mp, &so->so_cpid); 2232 if (so->so_peercred != NULL) 2233 crhold(so->so_peercred); 2234 2235 /* Wakeup anybody sleeping in sowaitconnected */ 2236 soisconnected(so); 2237 mutex_exit(&so->so_lock); 2238 2239 /* 2240 * The socket is now available for sending data. 2241 */ 2242 *wakeups = WSLEEP; 2243 *allmsgsigs = S_OUTPUT; 2244 *pollwakeups = POLLOUT; 2245 freemsg(mp); 2246 return (NULL); 2247 } 2248 2249 case T_CONN_IND: 2250 /* 2251 * Verify the min size and queue the message on 2252 * the sti_conn_ind_head/tail list. 2253 */ 2254 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2255 zcmn_err(getzoneid(), CE_WARN, 2256 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2257 (ptrdiff_t)(MBLKL(mp))); 2258 freemsg(mp); 2259 return (NULL); 2260 } 2261 2262 if (auditing) 2263 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2264 if (!(so->so_state & SS_ACCEPTCONN)) { 2265 zcmn_err(getzoneid(), CE_WARN, 2266 "sockfs: T_conn_ind on non-listening socket\n"); 2267 freemsg(mp); 2268 return (NULL); 2269 } 2270 2271 soqueueconnind(so, mp); 2272 *allmsgsigs = S_INPUT | S_RDNORM; 2273 *pollwakeups = POLLIN | POLLRDNORM; 2274 *wakeups = RSLEEP; 2275 return (NULL); 2276 2277 case T_ORDREL_IND: 2278 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2279 zcmn_err(getzoneid(), CE_WARN, 2280 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2281 (ptrdiff_t)(MBLKL(mp))); 2282 freemsg(mp); 2283 return (NULL); 2284 } 2285 2286 /* 2287 * Some providers send this when not fully connected. 2288 * SunLink X.25 needs to retrieve disconnect reason after 2289 * disconnect for compatibility. It uses T_ORDREL_IND 2290 * instead of T_DISCON_IND so that it may use the 2291 * endpoint after a connect failure to retrieve the 2292 * reason using an ioctl. Thus we explicitly clear 2293 * SS_ISCONNECTING here for SunLink X.25. 2294 * This is a needed TPI violation. 2295 */ 2296 mutex_enter(&so->so_lock); 2297 so->so_state &= ~SS_ISCONNECTING; 2298 socantrcvmore(so); 2299 mutex_exit(&so->so_lock); 2300 strseteof(SOTOV(so), 1); 2301 /* 2302 * strseteof takes care of read side wakeups, 2303 * pollwakeups, and signals. 2304 */ 2305 freemsg(mp); 2306 return (NULL); 2307 2308 case T_DISCON_IND: 2309 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2310 zcmn_err(getzoneid(), CE_WARN, 2311 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2312 (ptrdiff_t)(MBLKL(mp))); 2313 freemsg(mp); 2314 return (NULL); 2315 } 2316 if (so->so_state & SS_ACCEPTCONN) { 2317 /* 2318 * This is a listener. Look for a queued T_CONN_IND 2319 * with a matching sequence number and remove it 2320 * from the list. 2321 * It is normal to not find the sequence number since 2322 * the soaccept might have already dequeued it 2323 * (in which case the T_CONN_RES will fail with 2324 * TBADSEQ). 2325 */ 2326 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2327 freemsg(mp); 2328 return (0); 2329 } 2330 2331 /* 2332 * Not a listener 2333 * 2334 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2335 * Such a discon_ind appears when the peer has first done 2336 * a shutdown() followed by a close() in which case we just 2337 * want to record socantsendmore. 2338 * In this case sockfs first receives a T_ORDREL_IND followed 2339 * by a T_DISCON_IND. 2340 * Note that for other transports (e.g. TCP) we need to handle 2341 * the discon_ind in this case since it signals an error. 2342 */ 2343 mutex_enter(&so->so_lock); 2344 if ((so->so_state & SS_CANTRCVMORE) && 2345 (so->so_family == AF_UNIX)) { 2346 socantsendmore(so); 2347 sti->sti_faddr_valid = 0; 2348 mutex_exit(&so->so_lock); 2349 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2350 dprintso(so, 1, 2351 ("T_DISCON_IND: error %d\n", so->so_error)); 2352 freemsg(mp); 2353 /* 2354 * Set these variables for caller to process them. 2355 * For the else part where T_DISCON_IND is processed, 2356 * this will be done in the function being called 2357 * (strsock_discon_ind()) 2358 */ 2359 *wakeups = WSLEEP; 2360 *allmsgsigs = S_OUTPUT; 2361 *pollwakeups = POLLOUT; 2362 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2363 /* 2364 * Deferred processing of T_DISCON_IND 2365 */ 2366 so_save_discon_ind(so, mp, strsock_discon_ind); 2367 mutex_exit(&so->so_lock); 2368 } else { 2369 /* 2370 * Process T_DISCON_IND now 2371 */ 2372 (void) strsock_discon_ind(so, mp); 2373 mutex_exit(&so->so_lock); 2374 } 2375 return (NULL); 2376 2377 case T_UDERROR_IND: { 2378 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2379 void *addr; 2380 t_uscalar_t addrlen; 2381 int error; 2382 2383 dprintso(so, 0, 2384 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2385 2386 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2387 zcmn_err(getzoneid(), CE_WARN, 2388 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2389 (ptrdiff_t)(MBLKL(mp))); 2390 freemsg(mp); 2391 return (NULL); 2392 } 2393 /* Ignore on connection-oriented transports */ 2394 if (so->so_mode & SM_CONNREQUIRED) { 2395 freemsg(mp); 2396 eprintsoline(so, 0); 2397 zcmn_err(getzoneid(), CE_WARN, 2398 "sockfs: T_uderror_ind on connection-oriented " 2399 "transport\n"); 2400 return (NULL); 2401 } 2402 addrlen = tudi->DEST_length; 2403 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2404 if (addr == NULL) { 2405 zcmn_err(getzoneid(), CE_WARN, 2406 "sockfs: T_uderror_ind with invalid " 2407 "addrlen/offset %u/%d\n", 2408 addrlen, tudi->DEST_offset); 2409 freemsg(mp); 2410 return (NULL); 2411 } 2412 2413 /* Verify source address for connected socket. */ 2414 mutex_enter(&so->so_lock); 2415 if (so->so_state & SS_ISCONNECTED) { 2416 void *faddr; 2417 t_uscalar_t faddr_len; 2418 boolean_t match = B_FALSE; 2419 2420 switch (so->so_family) { 2421 case AF_INET: { 2422 /* Compare just IP address and port */ 2423 struct sockaddr_in *sin1, *sin2; 2424 2425 sin1 = (struct sockaddr_in *)sti->sti_faddr_sa; 2426 sin2 = (struct sockaddr_in *)addr; 2427 if (addrlen == sizeof (struct sockaddr_in) && 2428 sin1->sin_port == sin2->sin_port && 2429 sin1->sin_addr.s_addr == 2430 sin2->sin_addr.s_addr) 2431 match = B_TRUE; 2432 break; 2433 } 2434 case AF_INET6: { 2435 /* Compare just IP address and port. Not flow */ 2436 struct sockaddr_in6 *sin1, *sin2; 2437 2438 sin1 = (struct sockaddr_in6 *)sti->sti_faddr_sa; 2439 sin2 = (struct sockaddr_in6 *)addr; 2440 if (addrlen == sizeof (struct sockaddr_in6) && 2441 sin1->sin6_port == sin2->sin6_port && 2442 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2443 &sin2->sin6_addr)) 2444 match = B_TRUE; 2445 break; 2446 } 2447 case AF_UNIX: 2448 faddr = &sti->sti_ux_faddr; 2449 faddr_len = 2450 (t_uscalar_t)sizeof (sti->sti_ux_faddr); 2451 if (faddr_len == addrlen && 2452 bcmp(addr, faddr, addrlen) == 0) 2453 match = B_TRUE; 2454 break; 2455 default: 2456 faddr = sti->sti_faddr_sa; 2457 faddr_len = (t_uscalar_t)sti->sti_faddr_len; 2458 if (faddr_len == addrlen && 2459 bcmp(addr, faddr, addrlen) == 0) 2460 match = B_TRUE; 2461 break; 2462 } 2463 2464 if (!match) { 2465 #ifdef DEBUG 2466 dprintso(so, 0, 2467 ("sockfs: T_UDERR_IND mismatch: %s - ", 2468 pr_addr(so->so_family, 2469 (struct sockaddr *)addr, addrlen))); 2470 dprintso(so, 0, ("%s\n", 2471 pr_addr(so->so_family, sti->sti_faddr_sa, 2472 sti->sti_faddr_len))); 2473 #endif /* DEBUG */ 2474 mutex_exit(&so->so_lock); 2475 freemsg(mp); 2476 return (NULL); 2477 } 2478 /* 2479 * Make the write error nonpersistent. If the error 2480 * is zero we use ECONNRESET. 2481 * This assumes that the name space for ERROR_type 2482 * is the errno name space. 2483 */ 2484 if (tudi->ERROR_type != 0) 2485 error = tudi->ERROR_type; 2486 else 2487 error = ECONNRESET; 2488 2489 soseterror(so, error); 2490 mutex_exit(&so->so_lock); 2491 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2492 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2493 *wakeups = RSLEEP | WSLEEP; 2494 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2495 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2496 freemsg(mp); 2497 return (NULL); 2498 } 2499 /* 2500 * If the application asked for delayed errors 2501 * record the T_UDERROR_IND sti_eaddr_mp and the reason in 2502 * sti_delayed_error for delayed error posting. If the reason 2503 * is zero use ECONNRESET. 2504 * Note that delayed error indications do not make sense for 2505 * AF_UNIX sockets since sendto checks that the destination 2506 * address is valid at the time of the sendto. 2507 */ 2508 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2509 mutex_exit(&so->so_lock); 2510 freemsg(mp); 2511 return (NULL); 2512 } 2513 if (sti->sti_eaddr_mp != NULL) 2514 freemsg(sti->sti_eaddr_mp); 2515 2516 sti->sti_eaddr_mp = mp; 2517 if (tudi->ERROR_type != 0) 2518 error = tudi->ERROR_type; 2519 else 2520 error = ECONNRESET; 2521 sti->sti_delayed_error = (ushort_t)error; 2522 mutex_exit(&so->so_lock); 2523 return (NULL); 2524 } 2525 2526 case T_ERROR_ACK: 2527 dprintso(so, 0, 2528 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2529 tpr->error_ack.ERROR_prim, 2530 tpr->error_ack.TLI_error, 2531 tpr->error_ack.UNIX_error)); 2532 2533 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2534 zcmn_err(getzoneid(), CE_WARN, 2535 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2536 (ptrdiff_t)(MBLKL(mp))); 2537 freemsg(mp); 2538 return (NULL); 2539 } 2540 /* 2541 * Check if we were waiting for the async message 2542 */ 2543 mutex_enter(&so->so_lock); 2544 if ((so->so_flag & SOASYNC_UNBIND) && 2545 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2546 so_unlock_single(so, SOASYNC_UNBIND); 2547 mutex_exit(&so->so_lock); 2548 freemsg(mp); 2549 return (NULL); 2550 } 2551 mutex_exit(&so->so_lock); 2552 soqueueack(so, mp); 2553 return (NULL); 2554 2555 case T_OK_ACK: 2556 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2557 zcmn_err(getzoneid(), CE_WARN, 2558 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2559 (ptrdiff_t)(MBLKL(mp))); 2560 freemsg(mp); 2561 return (NULL); 2562 } 2563 /* 2564 * Check if we were waiting for the async message 2565 */ 2566 mutex_enter(&so->so_lock); 2567 if ((so->so_flag & SOASYNC_UNBIND) && 2568 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2569 dprintso(so, 1, 2570 ("strsock_proto: T_OK_ACK async unbind\n")); 2571 so_unlock_single(so, SOASYNC_UNBIND); 2572 mutex_exit(&so->so_lock); 2573 freemsg(mp); 2574 return (NULL); 2575 } 2576 mutex_exit(&so->so_lock); 2577 soqueueack(so, mp); 2578 return (NULL); 2579 2580 case T_INFO_ACK: 2581 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2582 zcmn_err(getzoneid(), CE_WARN, 2583 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2584 (ptrdiff_t)(MBLKL(mp))); 2585 freemsg(mp); 2586 return (NULL); 2587 } 2588 soqueueack(so, mp); 2589 return (NULL); 2590 2591 case T_CAPABILITY_ACK: 2592 /* 2593 * A T_capability_ack need only be large enough to hold 2594 * the PRIM_type and CAP_bits1 fields; checking for anything 2595 * larger might reject a correct response from an older 2596 * provider. 2597 */ 2598 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2599 zcmn_err(getzoneid(), CE_WARN, 2600 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2601 (ptrdiff_t)(MBLKL(mp))); 2602 freemsg(mp); 2603 return (NULL); 2604 } 2605 soqueueack(so, mp); 2606 return (NULL); 2607 2608 case T_BIND_ACK: 2609 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2610 zcmn_err(getzoneid(), CE_WARN, 2611 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2612 (ptrdiff_t)(MBLKL(mp))); 2613 freemsg(mp); 2614 return (NULL); 2615 } 2616 soqueueack(so, mp); 2617 return (NULL); 2618 2619 case T_OPTMGMT_ACK: 2620 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2621 zcmn_err(getzoneid(), CE_WARN, 2622 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2623 (ptrdiff_t)(MBLKL(mp))); 2624 freemsg(mp); 2625 return (NULL); 2626 } 2627 soqueueack(so, mp); 2628 return (NULL); 2629 default: 2630 #ifdef DEBUG 2631 zcmn_err(getzoneid(), CE_WARN, 2632 "sockfs: unknown TPI primitive %d received\n", 2633 tpr->type); 2634 #endif /* DEBUG */ 2635 freemsg(mp); 2636 return (NULL); 2637 } 2638 } 2639 2640 /* 2641 * This routine is registered with the stream head to receive other 2642 * (non-data, and non-proto) messages. 2643 * 2644 * Returns NULL if the message was consumed. 2645 * Returns an mblk to make that mblk be processed by the stream head. 2646 * 2647 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2648 * *pollwakeups) for the stream head to take action on. 2649 */ 2650 static mblk_t * 2651 strsock_misc(vnode_t *vp, mblk_t *mp, 2652 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2653 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2654 { 2655 struct sonode *so; 2656 sotpi_info_t *sti; 2657 2658 so = VTOSO(vp); 2659 sti = SOTOTPI(so); 2660 2661 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2662 (void *)vp, (void *)mp, DB_TYPE(mp))); 2663 2664 /* Set default return values */ 2665 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2666 2667 switch (DB_TYPE(mp)) { 2668 case M_PCSIG: 2669 /* 2670 * This assumes that an M_PCSIG for the urgent data arrives 2671 * before the corresponding T_EXDATA_IND. 2672 * 2673 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2674 * awoken before the urgent data shows up. 2675 * For OOBINLINE this can result in select returning 2676 * only exceptions as opposed to except|read. 2677 */ 2678 if (*mp->b_rptr == SIGURG) { 2679 mutex_enter(&so->so_lock); 2680 dprintso(so, 1, 2681 ("SIGURG(%p): counts %d/%d state %s\n", 2682 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2683 pr_state(so->so_state, so->so_mode))); 2684 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2685 dprintso(so, 1, 2686 ("after SIGURG(%p): counts %d/%d " 2687 " poll 0x%x sig 0x%x state %s\n", 2688 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt, 2689 *pollwakeups, *allmsgsigs, 2690 pr_state(so->so_state, so->so_mode))); 2691 mutex_exit(&so->so_lock); 2692 } 2693 freemsg(mp); 2694 return (NULL); 2695 2696 case M_SIG: 2697 case M_HANGUP: 2698 case M_UNHANGUP: 2699 case M_ERROR: 2700 /* M_ERRORs etc are ignored */ 2701 freemsg(mp); 2702 return (NULL); 2703 2704 case M_FLUSH: 2705 /* 2706 * Do not flush read queue. If the M_FLUSH 2707 * arrives because of an impending T_discon_ind 2708 * we still have to keep any queued data - this is part of 2709 * socket semantics. 2710 */ 2711 if (*mp->b_rptr & FLUSHW) { 2712 *mp->b_rptr &= ~FLUSHR; 2713 return (mp); 2714 } 2715 freemsg(mp); 2716 return (NULL); 2717 2718 default: 2719 return (mp); 2720 } 2721 } 2722 2723 2724 /* Register to receive signals for certain events */ 2725 int 2726 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2727 { 2728 struct strsigset ss; 2729 int32_t rval; 2730 2731 /* 2732 * Note that SOLOCKED will be set except for the call from soaccept(). 2733 */ 2734 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2735 ss.ss_pid = pgrp; 2736 ss.ss_events = events; 2737 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2738 &rval)); 2739 } 2740 2741 2742 /* Register for events matching the SS_ASYNC flag */ 2743 int 2744 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2745 { 2746 int events = so->so_state & SS_ASYNC ? 2747 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2748 S_RDBAND | S_BANDURG; 2749 2750 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2751 } 2752 2753 2754 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2755 int 2756 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2757 { 2758 ASSERT(mutex_owned(&so->so_lock)); 2759 if (so->so_pgrp != 0) { 2760 int error; 2761 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2762 S_RDBAND | S_BANDURG : /* New sigs */ 2763 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2764 2765 so_lock_single(so); 2766 mutex_exit(&so->so_lock); 2767 2768 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2769 2770 mutex_enter(&so->so_lock); 2771 so_unlock_single(so, SOLOCKED); 2772 if (error) 2773 return (error); 2774 } 2775 so->so_state ^= SS_ASYNC; 2776 return (0); 2777 } 2778 2779 /* 2780 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2781 * any existing one. If passed zero, just clear the existing one. 2782 */ 2783 int 2784 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2785 { 2786 int events = so->so_state & SS_ASYNC ? 2787 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2788 S_RDBAND | S_BANDURG; 2789 int error; 2790 2791 ASSERT(mutex_owned(&so->so_lock)); 2792 2793 /* 2794 * Change socket process (group). 2795 * 2796 * strioctl (via so_set_asyncsigs) will perform permission check and 2797 * also keep a PID_HOLD to prevent the pid from being reused. 2798 */ 2799 so_lock_single(so); 2800 mutex_exit(&so->so_lock); 2801 2802 if (pgrp != 0) { 2803 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2804 pgrp, events)); 2805 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2806 if (error != 0) { 2807 eprintsoline(so, error); 2808 goto bad; 2809 } 2810 } 2811 /* Remove the previously registered process/group */ 2812 if (so->so_pgrp != 0) { 2813 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2814 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2815 if (error != 0) { 2816 eprintsoline(so, error); 2817 error = 0; 2818 } 2819 } 2820 mutex_enter(&so->so_lock); 2821 so_unlock_single(so, SOLOCKED); 2822 so->so_pgrp = pgrp; 2823 return (0); 2824 bad: 2825 mutex_enter(&so->so_lock); 2826 so_unlock_single(so, SOLOCKED); 2827 return (error); 2828 } 2829 2830 /* 2831 * Wrapper for getmsg. If the socket has been converted to a stream 2832 * pass the request to the stream head. 2833 */ 2834 int 2835 sock_getmsg( 2836 struct vnode *vp, 2837 struct strbuf *mctl, 2838 struct strbuf *mdata, 2839 uchar_t *prip, 2840 int *flagsp, 2841 int fmode, 2842 rval_t *rvp 2843 ) 2844 { 2845 struct sonode *so; 2846 2847 ASSERT(vp->v_type == VSOCK); 2848 /* 2849 * Use the stream head to find the real socket vnode. 2850 * This is needed when namefs sits above sockfs. Some 2851 * sockets (like SCTP) are not streams. 2852 */ 2853 if (!vp->v_stream) { 2854 return (ENOSTR); 2855 } 2856 ASSERT(vp->v_stream->sd_vnode); 2857 vp = vp->v_stream->sd_vnode; 2858 ASSERT(vn_matchops(vp, socket_vnodeops)); 2859 so = VTOSO(vp); 2860 2861 dprintso(so, 1, ("sock_getmsg(%p) %s\n", 2862 (void *)so, pr_state(so->so_state, so->so_mode))); 2863 2864 if (so->so_version == SOV_STREAM) { 2865 /* The imaginary "sockmod" has been popped - act as a stream */ 2866 return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp)); 2867 } 2868 eprintsoline(so, ENOSTR); 2869 return (ENOSTR); 2870 } 2871 2872 /* 2873 * Wrapper for putmsg. If the socket has been converted to a stream 2874 * pass the request to the stream head. 2875 * 2876 * Note that a while a regular socket (SOV_SOCKSTREAM) does support the 2877 * streams ioctl set it does not support putmsg and getmsg. 2878 * Allowing putmsg would prevent sockfs from tracking the state of 2879 * the socket/transport and would also invalidate the locking in sockfs. 2880 */ 2881 int 2882 sock_putmsg( 2883 struct vnode *vp, 2884 struct strbuf *mctl, 2885 struct strbuf *mdata, 2886 uchar_t pri, 2887 int flag, 2888 int fmode 2889 ) 2890 { 2891 struct sonode *so; 2892 2893 ASSERT(vp->v_type == VSOCK); 2894 /* 2895 * Use the stream head to find the real socket vnode. 2896 * This is needed when namefs sits above sockfs. 2897 */ 2898 if (!vp->v_stream) { 2899 return (ENOSTR); 2900 } 2901 ASSERT(vp->v_stream->sd_vnode); 2902 vp = vp->v_stream->sd_vnode; 2903 ASSERT(vn_matchops(vp, socket_vnodeops)); 2904 so = VTOSO(vp); 2905 2906 dprintso(so, 1, ("sock_putmsg(%p) %s\n", 2907 (void *)so, pr_state(so->so_state, so->so_mode))); 2908 2909 if (so->so_version == SOV_STREAM) { 2910 /* The imaginary "sockmod" has been popped - act as a stream */ 2911 return (strputmsg(vp, mctl, mdata, pri, flag, fmode)); 2912 } 2913 eprintsoline(so, ENOSTR); 2914 return (ENOSTR); 2915 } 2916 2917 /* 2918 * Special function called only from f_getfl(). 2919 * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0. 2920 * No locks are acquired here, so it is safe to use while uf_lock is held. 2921 * This exists solely for BSD fcntl() FASYNC compatibility. 2922 */ 2923 int 2924 sock_getfasync(vnode_t *vp) 2925 { 2926 struct sonode *so; 2927 2928 ASSERT(vp->v_type == VSOCK); 2929 /* 2930 * For stream model, v_stream is used; For non-stream, v_stream always 2931 * equals NULL 2932 */ 2933 if (vp->v_stream != NULL) 2934 so = VTOSO(vp->v_stream->sd_vnode); 2935 else 2936 so = VTOSO(vp); 2937 2938 if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC)) 2939 return (0); 2940 2941 return (FASYNC); 2942 } 2943