1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/inttypes.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/buf.h> 35 #include <sys/conf.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/sysmacros.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/user.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/esunddi.h> 49 #include <sys/flock.h> 50 #include <sys/modctl.h> 51 #include <sys/vtrace.h> 52 #include <sys/strsun.h> 53 #include <sys/cmn_err.h> 54 #include <sys/proc.h> 55 #include <sys/ddi.h> 56 #include <sys/kmem_impl.h> 57 58 #include <sys/suntpi.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/socketvar.h> 62 #include <netinet/in.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 68 #include <c2/audit.h> 69 70 int so_default_version = SOV_SOCKSTREAM; 71 72 #ifdef DEBUG 73 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 74 int sockdebug = 0; 75 76 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 77 int sockprinterr = 0; 78 79 /* 80 * Set so_default_options to SO_DEBUG is all sockets should be created 81 * with SO_DEBUG set. This is needed to get debug printouts from the 82 * socket() call itself. 83 */ 84 int so_default_options = 0; 85 #endif /* DEBUG */ 86 87 #ifdef SOCK_TEST 88 /* 89 * Set to number of ticks to limit cv_waits for code coverage testing. 90 * Set to 1000 when SO_DEBUG is set to 2. 91 */ 92 clock_t sock_test_timelimit = 0; 93 #endif /* SOCK_TEST */ 94 95 /* 96 * For concurrency testing of e.g. opening /dev/ip which does not 97 * handle T_INFO_REQ messages. 98 */ 99 int so_no_tinfo = 0; 100 101 /* 102 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 103 * to simply ignore the T_CAPABILITY_REQ. 104 */ 105 clock_t sock_capability_timeout = 2; /* seconds */ 106 107 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 108 static void so_removehooks(struct sonode *so); 109 110 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 111 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 112 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 113 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 114 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 115 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 116 117 static int tlitosyserr(int terr); 118 119 /* 120 * Convert a socket to a stream. Invoked when the illusory sockmod 121 * is popped from the stream. 122 * Change the stream head back to default operation without losing 123 * any messages (T_conn_ind's are moved to the stream head queue). 124 */ 125 int 126 so_sock2stream(struct sonode *so) 127 { 128 struct vnode *vp = SOTOV(so); 129 queue_t *rq; 130 mblk_t *mp; 131 int error = 0; 132 133 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 134 135 mutex_enter(&so->so_lock); 136 so_lock_single(so); 137 138 ASSERT(so->so_version != SOV_STREAM); 139 140 if (so->so_state & SS_DIRECT) { 141 mblk_t **mpp; 142 int rval; 143 144 /* 145 * Tell the transport below that sockmod is being popped 146 */ 147 mutex_exit(&so->so_lock); 148 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(), 149 &rval); 150 mutex_enter(&so->so_lock); 151 if (error != 0) { 152 dprintso(so, 0, ("so_sock2stream(%p): " 153 "_SIOCSOCKFALLBACK failed\n", so)); 154 goto exit; 155 } 156 so->so_state &= ~SS_DIRECT; 157 158 for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL; 159 mpp = &mp->b_next) { 160 struct T_conn_ind *conn_ind; 161 162 /* 163 * strsock_proto() has already verified the length of 164 * this message block. 165 */ 166 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 167 168 conn_ind = (struct T_conn_ind *)mp->b_rptr; 169 if (conn_ind->OPT_length == 0 && 170 conn_ind->OPT_offset == 0) 171 continue; 172 173 if (DB_REF(mp) > 1) { 174 mblk_t *newmp; 175 size_t length; 176 cred_t *cr; 177 178 /* 179 * Copy the message block because it is used 180 * elsewhere, too. 181 */ 182 length = MBLKL(mp); 183 newmp = soallocproto(length, _ALLOC_INTR); 184 if (newmp == NULL) { 185 error = EINTR; 186 goto exit; 187 } 188 bcopy(mp->b_rptr, newmp->b_wptr, length); 189 newmp->b_wptr += length; 190 newmp->b_next = mp->b_next; 191 cr = DB_CRED(mp); 192 if (cr != NULL) 193 mblk_setcred(newmp, cr); 194 DB_CPID(newmp) = DB_CPID(mp); 195 196 /* 197 * Link the new message block into the queue 198 * and free the old one. 199 */ 200 *mpp = newmp; 201 mp->b_next = NULL; 202 freemsg(mp); 203 204 mp = newmp; 205 conn_ind = (struct T_conn_ind *)mp->b_rptr; 206 } 207 208 /* 209 * Remove options added by TCP for accept fast-path. 210 */ 211 conn_ind->OPT_length = 0; 212 conn_ind->OPT_offset = 0; 213 } 214 } 215 216 so->so_version = SOV_STREAM; 217 so->so_priv = NULL; 218 219 /* 220 * Remove the hooks in the stream head to avoid queuing more 221 * packets in sockfs. 222 */ 223 mutex_exit(&so->so_lock); 224 so_removehooks(so); 225 mutex_enter(&so->so_lock); 226 227 /* 228 * Clear any state related to urgent data. Leave any T_EXDATA_IND 229 * on the queue - the behavior of urgent data after a switch is 230 * left undefined. 231 */ 232 so->so_error = so->so_delayed_error = 0; 233 freemsg(so->so_oobmsg); 234 so->so_oobmsg = NULL; 235 so->so_oobsigcnt = so->so_oobcnt = 0; 236 237 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 238 SS_HASCONNIND|SS_SAVEDEOR); 239 ASSERT(so_verify_oobstate(so)); 240 241 freemsg(so->so_ack_mp); 242 so->so_ack_mp = NULL; 243 244 /* 245 * Flush the T_DISCON_IND on so_discon_ind_mp. 246 */ 247 so_flush_discon_ind(so); 248 249 /* 250 * Move any queued T_CONN_IND messages to stream head queue. 251 */ 252 rq = RD(strvp2wq(vp)); 253 while ((mp = so->so_conn_ind_head) != NULL) { 254 so->so_conn_ind_head = mp->b_next; 255 mp->b_next = NULL; 256 if (so->so_conn_ind_head == NULL) { 257 ASSERT(so->so_conn_ind_tail == mp); 258 so->so_conn_ind_tail = NULL; 259 } 260 dprintso(so, 0, 261 ("so_sock2stream(%p): moving T_CONN_IND\n", 262 so)); 263 264 /* Drop lock across put() */ 265 mutex_exit(&so->so_lock); 266 put(rq, mp); 267 mutex_enter(&so->so_lock); 268 } 269 270 exit: 271 ASSERT(MUTEX_HELD(&so->so_lock)); 272 so_unlock_single(so, SOLOCKED); 273 mutex_exit(&so->so_lock); 274 return (error); 275 } 276 277 /* 278 * Covert a stream back to a socket. This is invoked when the illusory 279 * sockmod is pushed on a stream (where the stream was "created" by 280 * popping the illusory sockmod). 281 * This routine can not recreate the socket state (certain aspects of 282 * it like urgent data state and the bound/connected addresses for AF_UNIX 283 * sockets can not be recreated by asking the transport for information). 284 * Thus this routine implicitly assumes that the socket is in an initial 285 * state (as if it was just created). It flushes any messages queued on the 286 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 287 */ 288 void 289 so_stream2sock(struct sonode *so) 290 { 291 struct vnode *vp = SOTOV(so); 292 293 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 294 295 mutex_enter(&so->so_lock); 296 so_lock_single(so); 297 ASSERT(so->so_version == SOV_STREAM); 298 so->so_version = SOV_SOCKSTREAM; 299 so->so_pushcnt = 0; 300 mutex_exit(&so->so_lock); 301 302 /* 303 * Set a permenent error to force any thread in sorecvmsg to 304 * return (and drop SOREADLOCKED). Clear the error once 305 * we have SOREADLOCKED. 306 * This makes a read sleeping during the I_PUSH of sockmod return 307 * EIO. 308 */ 309 strsetrerror(SOTOV(so), EIO, 1, NULL); 310 311 /* 312 * Get the read lock before flushing data to avoid 313 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 314 */ 315 mutex_enter(&so->so_lock); 316 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 317 mutex_exit(&so->so_lock); 318 319 strsetrerror(SOTOV(so), 0, 0, NULL); 320 so_installhooks(so); 321 322 /* 323 * Flush everything on the read queue. 324 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 325 * remain; those types of messages would confuse sockfs. 326 */ 327 strflushrq(vp, FLUSHALL); 328 mutex_enter(&so->so_lock); 329 330 /* 331 * Flush the T_DISCON_IND on so_discon_ind_mp. 332 */ 333 so_flush_discon_ind(so); 334 so_unlock_read(so); /* Clear SOREADLOCKED */ 335 336 so_unlock_single(so, SOLOCKED); 337 mutex_exit(&so->so_lock); 338 } 339 340 /* 341 * Install the hooks in the stream head. 342 */ 343 void 344 so_installhooks(struct sonode *so) 345 { 346 struct vnode *vp = SOTOV(so); 347 348 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 349 strsock_proto, strsock_misc); 350 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 351 } 352 353 /* 354 * Remove the hooks in the stream head. 355 */ 356 static void 357 so_removehooks(struct sonode *so) 358 { 359 struct vnode *vp = SOTOV(so); 360 361 strsetrputhooks(vp, 0, NULL, NULL); 362 strsetwputhooks(vp, 0, STRTIMOUT); 363 /* 364 * Leave read behavior as it would have been for a normal 365 * stream i.e. a read of an M_PROTO will fail. 366 */ 367 } 368 369 /* 370 * Initialize the streams side of a socket including 371 * T_info_req/ack processing. If tso is not NULL its values are used thereby 372 * avoiding the T_INFO_REQ. 373 */ 374 int 375 so_strinit(struct sonode *so, struct sonode *tso) 376 { 377 struct vnode *vp = SOTOV(so); 378 struct stdata *stp; 379 mblk_t *mp; 380 int error; 381 382 dprintso(so, 1, ("so_strinit(%p)\n", so)); 383 384 /* Preallocate an unbind_req message */ 385 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 386 mutex_enter(&so->so_lock); 387 so->so_unbind_mp = mp; 388 #ifdef DEBUG 389 so->so_options = so_default_options; 390 #endif /* DEBUG */ 391 mutex_exit(&so->so_lock); 392 393 so_installhooks(so); 394 395 /* 396 * The T_CAPABILITY_REQ should be the first message sent down because 397 * at least TCP has a fast-path for this which avoids timeouts while 398 * waiting for the T_CAPABILITY_ACK under high system load. 399 */ 400 if (tso == NULL) { 401 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 402 if (error) 403 return (error); 404 } else { 405 mutex_enter(&so->so_lock); 406 so->so_tsdu_size = tso->so_tsdu_size; 407 so->so_etsdu_size = tso->so_etsdu_size; 408 so->so_addr_size = tso->so_addr_size; 409 so->so_opt_size = tso->so_opt_size; 410 so->so_tidu_size = tso->so_tidu_size; 411 so->so_serv_type = tso->so_serv_type; 412 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 413 mutex_exit(&so->so_lock); 414 415 /* the following do_tcapability may update so->so_mode */ 416 if ((tso->so_serv_type != T_CLTS) && 417 !(so->so_state & SS_DIRECT)) { 418 error = do_tcapability(so, TC1_ACCEPTOR_ID); 419 if (error) 420 return (error); 421 } 422 } 423 /* 424 * If the addr_size is 0 we treat it as already bound 425 * and connected. This is used by the routing socket. 426 * We set the addr_size to something to allocate a the address 427 * structures. 428 */ 429 if (so->so_addr_size == 0) { 430 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 431 /* Address size can vary with address families. */ 432 if (so->so_family == AF_INET6) 433 so->so_addr_size = 434 (t_scalar_t)sizeof (struct sockaddr_in6); 435 else 436 so->so_addr_size = 437 (t_scalar_t)sizeof (struct sockaddr_in); 438 ASSERT(so->so_unbind_mp); 439 } 440 /* 441 * Allocate the addresses. 442 */ 443 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 444 ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0); 445 so->so_laddr_maxlen = so->so_faddr_maxlen = 446 P2ROUNDUP(so->so_addr_size, KMEM_ALIGN); 447 so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP); 448 so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa 449 + so->so_laddr_maxlen); 450 451 if (so->so_family == AF_UNIX) { 452 /* 453 * Initialize AF_UNIX related fields. 454 */ 455 bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr)); 456 bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr)); 457 } 458 459 stp = vp->v_stream; 460 /* 461 * Have to keep minpsz at zero in order to allow write/send of zero 462 * bytes. 463 */ 464 mutex_enter(&stp->sd_lock); 465 if (stp->sd_qn_minpsz == 1) 466 stp->sd_qn_minpsz = 0; 467 mutex_exit(&stp->sd_lock); 468 469 return (0); 470 } 471 472 static void 473 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 474 { 475 so->so_tsdu_size = tia->TSDU_size; 476 so->so_etsdu_size = tia->ETSDU_size; 477 so->so_addr_size = tia->ADDR_size; 478 so->so_opt_size = tia->OPT_size; 479 so->so_tidu_size = tia->TIDU_size; 480 so->so_serv_type = tia->SERV_type; 481 switch (tia->CURRENT_state) { 482 case TS_UNBND: 483 break; 484 case TS_IDLE: 485 so->so_state |= SS_ISBOUND; 486 so->so_laddr_len = 0; 487 so->so_state &= ~SS_LADDR_VALID; 488 break; 489 case TS_DATA_XFER: 490 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 491 so->so_laddr_len = 0; 492 so->so_faddr_len = 0; 493 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 494 break; 495 } 496 497 /* 498 * Heuristics for determining the socket mode flags 499 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 500 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 501 * from the info ack. 502 */ 503 if (so->so_serv_type == T_CLTS) { 504 so->so_mode |= SM_ATOMIC | SM_ADDR; 505 } else { 506 so->so_mode |= SM_CONNREQUIRED; 507 if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2) 508 so->so_mode |= SM_EXDATA; 509 } 510 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 511 /* Semantics are to discard tail end of messages */ 512 so->so_mode |= SM_ATOMIC; 513 } 514 if (so->so_family == AF_UNIX) { 515 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 516 if (so->so_addr_size == -1) { 517 /* MAXPATHLEN + soun_family + nul termination */ 518 so->so_addr_size = (t_scalar_t)(MAXPATHLEN + 519 sizeof (short) + 1); 520 } 521 if (so->so_type == SOCK_STREAM) { 522 /* 523 * Make it into a byte-stream transport. 524 * SOCK_SEQPACKET sockets are unchanged. 525 */ 526 so->so_tsdu_size = 0; 527 } 528 } else if (so->so_addr_size == -1) { 529 /* 530 * Logic extracted from sockmod - have to pick some max address 531 * length in order to preallocate the addresses. 532 */ 533 so->so_addr_size = SOA_DEFSIZE; 534 } 535 if (so->so_tsdu_size == 0) 536 so->so_mode |= SM_BYTESTREAM; 537 } 538 539 static int 540 check_tinfo(struct sonode *so) 541 { 542 /* Consistency checks */ 543 if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) { 544 eprintso(so, ("service type and socket type mismatch\n")); 545 eprintsoline(so, EPROTO); 546 return (EPROTO); 547 } 548 if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) { 549 eprintso(so, ("service type and socket type mismatch\n")); 550 eprintsoline(so, EPROTO); 551 return (EPROTO); 552 } 553 if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) { 554 eprintso(so, ("service type and socket type mismatch\n")); 555 eprintsoline(so, EPROTO); 556 return (EPROTO); 557 } 558 if (so->so_family == AF_INET && 559 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 560 eprintso(so, 561 ("AF_INET must have sockaddr_in address length. Got %d\n", 562 so->so_addr_size)); 563 eprintsoline(so, EMSGSIZE); 564 return (EMSGSIZE); 565 } 566 if (so->so_family == AF_INET6 && 567 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 568 eprintso(so, 569 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 570 so->so_addr_size)); 571 eprintsoline(so, EMSGSIZE); 572 return (EMSGSIZE); 573 } 574 575 dprintso(so, 1, ( 576 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 577 so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size, 578 so->so_addr_size, so->so_opt_size, 579 so->so_tidu_size)); 580 dprintso(so, 1, ("tinfo: so_state %s\n", 581 pr_state(so->so_state, so->so_mode))); 582 return (0); 583 } 584 585 /* 586 * Send down T_info_req and wait for the ack. 587 * Record interesting T_info_ack values in the sonode. 588 */ 589 static int 590 do_tinfo(struct sonode *so) 591 { 592 struct T_info_req tir; 593 mblk_t *mp; 594 int error; 595 596 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 597 598 if (so_no_tinfo) { 599 so->so_addr_size = 0; 600 return (0); 601 } 602 603 dprintso(so, 1, ("do_tinfo(%p)\n", so)); 604 605 /* Send T_INFO_REQ */ 606 tir.PRIM_type = T_INFO_REQ; 607 mp = soallocproto1(&tir, sizeof (tir), 608 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 609 _ALLOC_INTR); 610 if (mp == NULL) { 611 eprintsoline(so, ENOBUFS); 612 return (ENOBUFS); 613 } 614 /* T_INFO_REQ has to be M_PCPROTO */ 615 DB_TYPE(mp) = M_PCPROTO; 616 617 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 618 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 619 if (error) { 620 eprintsoline(so, error); 621 return (error); 622 } 623 mutex_enter(&so->so_lock); 624 /* Wait for T_INFO_ACK */ 625 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 626 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 627 mutex_exit(&so->so_lock); 628 eprintsoline(so, error); 629 return (error); 630 } 631 632 ASSERT(mp); 633 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 634 mutex_exit(&so->so_lock); 635 freemsg(mp); 636 return (check_tinfo(so)); 637 } 638 639 /* 640 * Send down T_capability_req and wait for the ack. 641 * Record interesting T_capability_ack values in the sonode. 642 */ 643 static int 644 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 645 { 646 struct T_capability_req tcr; 647 struct T_capability_ack *tca; 648 mblk_t *mp; 649 int error; 650 651 ASSERT(cap_bits1 != 0); 652 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 653 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 654 655 if (so->so_provinfo->tpi_capability == PI_NO) 656 return (do_tinfo(so)); 657 658 if (so_no_tinfo) { 659 so->so_addr_size = 0; 660 if ((cap_bits1 &= ~TC1_INFO) == 0) 661 return (0); 662 } 663 664 dprintso(so, 1, ("do_tcapability(%p)\n", so)); 665 666 /* Send T_CAPABILITY_REQ */ 667 tcr.PRIM_type = T_CAPABILITY_REQ; 668 tcr.CAP_bits1 = cap_bits1; 669 mp = soallocproto1(&tcr, sizeof (tcr), 670 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 671 _ALLOC_INTR); 672 if (mp == NULL) { 673 eprintsoline(so, ENOBUFS); 674 return (ENOBUFS); 675 } 676 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 677 DB_TYPE(mp) = M_PCPROTO; 678 679 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 680 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 681 if (error) { 682 eprintsoline(so, error); 683 return (error); 684 } 685 mutex_enter(&so->so_lock); 686 /* Wait for T_CAPABILITY_ACK */ 687 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 688 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 689 mutex_exit(&so->so_lock); 690 PI_PROVLOCK(so->so_provinfo); 691 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) 692 so->so_provinfo->tpi_capability = PI_NO; 693 PI_PROVUNLOCK(so->so_provinfo); 694 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 695 if (cap_bits1 & TC1_INFO) { 696 /* 697 * If the T_CAPABILITY_REQ timed out and then a 698 * T_INFO_REQ gets a protocol error, most likely 699 * the capability was slow (vs. unsupported). Return 700 * ENOSR for this case as a best guess. 701 */ 702 if (error == ETIME) { 703 return ((error = do_tinfo(so)) == EPROTO ? 704 ENOSR : error); 705 } 706 return (do_tinfo(so)); 707 } 708 return (0); 709 } 710 711 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) { 712 PI_PROVLOCK(so->so_provinfo); 713 so->so_provinfo->tpi_capability = PI_YES; 714 PI_PROVUNLOCK(so->so_provinfo); 715 } 716 717 ASSERT(mp); 718 tca = (struct T_capability_ack *)mp->b_rptr; 719 720 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 721 722 cap_bits1 = tca->CAP_bits1; 723 724 if (cap_bits1 & TC1_ACCEPTOR_ID) { 725 so->so_acceptor_id = tca->ACCEPTOR_id; 726 so->so_mode |= SM_ACCEPTOR_ID; 727 } 728 729 if (cap_bits1 & TC1_INFO) 730 copy_tinfo(so, &tca->INFO_ack); 731 732 mutex_exit(&so->so_lock); 733 freemsg(mp); 734 735 if (cap_bits1 & TC1_INFO) 736 return (check_tinfo(so)); 737 738 return (0); 739 } 740 741 /* 742 * Retrieve and clear the socket error. 743 */ 744 int 745 sogeterr(struct sonode *so) 746 { 747 int error; 748 749 ASSERT(MUTEX_HELD(&so->so_lock)); 750 751 error = so->so_error; 752 so->so_error = 0; 753 754 return (error); 755 } 756 757 /* 758 * This routine is registered with the stream head to retrieve read 759 * side errors. 760 * It does not clear the socket error for a peeking read side operation. 761 * It the error is to be cleared it sets *clearerr. 762 */ 763 int 764 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 765 { 766 struct sonode *so = VTOSO(vp); 767 int error; 768 769 mutex_enter(&so->so_lock); 770 if (ispeek) { 771 error = so->so_error; 772 *clearerr = 0; 773 } else { 774 error = so->so_error; 775 so->so_error = 0; 776 *clearerr = 1; 777 } 778 mutex_exit(&so->so_lock); 779 return (error); 780 } 781 782 /* 783 * This routine is registered with the stream head to retrieve write 784 * side errors. 785 * It does not clear the socket error for a peeking read side operation. 786 * It the error is to be cleared it sets *clearerr. 787 */ 788 int 789 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 790 { 791 struct sonode *so = VTOSO(vp); 792 int error; 793 794 mutex_enter(&so->so_lock); 795 if (so->so_state & SS_CANTSENDMORE) { 796 error = EPIPE; 797 *clearerr = 0; 798 } else { 799 error = so->so_error; 800 if (ispeek) { 801 *clearerr = 0; 802 } else { 803 so->so_error = 0; 804 *clearerr = 1; 805 } 806 } 807 mutex_exit(&so->so_lock); 808 return (error); 809 } 810 811 /* 812 * Set a nonpersistent read and write error on the socket. 813 * Used when there is a T_uderror_ind for a connected socket. 814 * The caller also needs to call strsetrerror and strsetwerror 815 * after dropping the lock. 816 */ 817 void 818 soseterror(struct sonode *so, int error) 819 { 820 ASSERT(error != 0); 821 822 ASSERT(MUTEX_HELD(&so->so_lock)); 823 so->so_error = (ushort_t)error; 824 } 825 826 void 827 soisconnecting(struct sonode *so) 828 { 829 ASSERT(MUTEX_HELD(&so->so_lock)); 830 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 831 so->so_state |= SS_ISCONNECTING; 832 cv_broadcast(&so->so_state_cv); 833 } 834 835 void 836 soisconnected(struct sonode *so) 837 { 838 ASSERT(MUTEX_HELD(&so->so_lock)); 839 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 840 so->so_state |= SS_ISCONNECTED; 841 cv_broadcast(&so->so_state_cv); 842 } 843 844 /* 845 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 846 */ 847 void 848 soisdisconnected(struct sonode *so, int error) 849 { 850 ASSERT(MUTEX_HELD(&so->so_lock)); 851 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING| 852 SS_LADDR_VALID|SS_FADDR_VALID); 853 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 854 so->so_error = (ushort_t)error; 855 if (so->so_peercred != NULL) { 856 crfree(so->so_peercred); 857 so->so_peercred = NULL; 858 } 859 cv_broadcast(&so->so_state_cv); 860 } 861 862 /* 863 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 864 * Does not affect write side. 865 * The caller also has to call strsetrerror. 866 */ 867 static void 868 sobreakconn(struct sonode *so, int error) 869 { 870 ASSERT(MUTEX_HELD(&so->so_lock)); 871 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 872 so->so_error = (ushort_t)error; 873 cv_broadcast(&so->so_state_cv); 874 } 875 876 /* 877 * Can no longer send. 878 * Caller must also call strsetwerror. 879 * 880 * We mark the peer address as no longer valid for getpeername, but 881 * leave it around for so_unix_close to notify the peer (that 882 * transport has no addressing held at that layer). 883 */ 884 void 885 socantsendmore(struct sonode *so) 886 { 887 ASSERT(MUTEX_HELD(&so->so_lock)); 888 so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE; 889 cv_broadcast(&so->so_state_cv); 890 } 891 892 /* 893 * The caller must call strseteof(,1) as well as this routine 894 * to change the socket state. 895 */ 896 void 897 socantrcvmore(struct sonode *so) 898 { 899 ASSERT(MUTEX_HELD(&so->so_lock)); 900 so->so_state |= SS_CANTRCVMORE; 901 cv_broadcast(&so->so_state_cv); 902 } 903 904 /* 905 * The caller has sent down a "request_prim" primitive and wants to wait for 906 * an ack ("ack_prim") or an T_ERROR_ACK for it. 907 * The specified "ack_prim" can be a T_OK_ACK. 908 * 909 * Assumes that all the TPI acks are M_PCPROTO messages. 910 * 911 * Note that the socket is single-threaded (using so_lock_single) 912 * for all operations that generate TPI ack messages. Since 913 * only TPI ack messages are M_PCPROTO we should never receive 914 * anything except either the ack we are expecting or a T_ERROR_ACK 915 * for the same primitive. 916 */ 917 int 918 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 919 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 920 { 921 mblk_t *mp; 922 union T_primitives *tpr; 923 int error; 924 925 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 926 so, request_prim, ack_prim, min_size, mpp, wait)); 927 928 ASSERT(MUTEX_HELD(&so->so_lock)); 929 930 error = sowaitack(so, &mp, wait); 931 if (error) 932 return (error); 933 934 dprintso(so, 1, ("got msg %p\n", mp)); 935 if (DB_TYPE(mp) != M_PCPROTO || 936 MBLKL(mp) < sizeof (tpr->type)) { 937 freemsg(mp); 938 eprintsoline(so, EPROTO); 939 return (EPROTO); 940 } 941 tpr = (union T_primitives *)mp->b_rptr; 942 /* 943 * Did we get the primitive that we were asking for? 944 * For T_OK_ACK we also check that it matches the request primitive. 945 */ 946 if (tpr->type == ack_prim && 947 (ack_prim != T_OK_ACK || 948 tpr->ok_ack.CORRECT_prim == request_prim)) { 949 if (MBLKL(mp) >= (ssize_t)min_size) { 950 /* Found what we are looking for */ 951 *mpp = mp; 952 return (0); 953 } 954 /* Too short */ 955 freemsg(mp); 956 eprintsoline(so, EPROTO); 957 return (EPROTO); 958 } 959 960 if (tpr->type == T_ERROR_ACK && 961 tpr->error_ack.ERROR_prim == request_prim) { 962 /* Error to the primitive we were looking for */ 963 if (tpr->error_ack.TLI_error == TSYSERR) { 964 error = tpr->error_ack.UNIX_error; 965 } else { 966 error = tlitosyserr(tpr->error_ack.TLI_error); 967 } 968 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 969 tpr->error_ack.ERROR_prim, 970 tpr->error_ack.TLI_error, 971 tpr->error_ack.UNIX_error, 972 error)); 973 freemsg(mp); 974 return (error); 975 } 976 /* 977 * Wrong primitive or T_ERROR_ACK for the wrong primitive 978 */ 979 #ifdef DEBUG 980 if (tpr->type == T_ERROR_ACK) { 981 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 982 tpr->error_ack.ERROR_prim, 983 tpr->error_ack.TLI_error, 984 tpr->error_ack.UNIX_error)); 985 } else if (tpr->type == T_OK_ACK) { 986 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 987 tpr->ok_ack.CORRECT_prim, 988 ack_prim, request_prim)); 989 } else { 990 dprintso(so, 0, 991 ("unexpected primitive %d, expected %d for %d\n", 992 tpr->type, ack_prim, request_prim)); 993 } 994 #endif /* DEBUG */ 995 996 freemsg(mp); 997 eprintsoline(so, EPROTO); 998 return (EPROTO); 999 } 1000 1001 /* 1002 * Wait for a T_OK_ACK for the specified primitive. 1003 */ 1004 int 1005 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1006 { 1007 mblk_t *mp; 1008 int error; 1009 1010 error = sowaitprim(so, request_prim, T_OK_ACK, 1011 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1012 if (error) 1013 return (error); 1014 freemsg(mp); 1015 return (0); 1016 } 1017 1018 /* 1019 * Queue a received TPI ack message on so_ack_mp. 1020 */ 1021 void 1022 soqueueack(struct sonode *so, mblk_t *mp) 1023 { 1024 if (DB_TYPE(mp) != M_PCPROTO) { 1025 cmn_err(CE_WARN, 1026 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1027 *(t_scalar_t *)mp->b_rptr); 1028 freemsg(mp); 1029 return; 1030 } 1031 1032 mutex_enter(&so->so_lock); 1033 if (so->so_ack_mp != NULL) { 1034 dprintso(so, 1, ("so_ack_mp already set\n")); 1035 freemsg(so->so_ack_mp); 1036 so->so_ack_mp = NULL; 1037 } 1038 so->so_ack_mp = mp; 1039 cv_broadcast(&so->so_ack_cv); 1040 mutex_exit(&so->so_lock); 1041 } 1042 1043 /* 1044 * Wait for a TPI ack ignoring signals and errors. 1045 */ 1046 int 1047 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1048 { 1049 ASSERT(MUTEX_HELD(&so->so_lock)); 1050 1051 while (so->so_ack_mp == NULL) { 1052 #ifdef SOCK_TEST 1053 if (wait == 0 && sock_test_timelimit != 0) 1054 wait = sock_test_timelimit; 1055 #endif 1056 if (wait != 0) { 1057 /* 1058 * Only wait for the time limit. 1059 */ 1060 clock_t now; 1061 1062 time_to_wait(&now, wait); 1063 if (cv_timedwait(&so->so_ack_cv, &so->so_lock, 1064 now) == -1) { 1065 eprintsoline(so, ETIME); 1066 return (ETIME); 1067 } 1068 } 1069 else 1070 cv_wait(&so->so_ack_cv, &so->so_lock); 1071 } 1072 *mpp = so->so_ack_mp; 1073 #ifdef DEBUG 1074 { 1075 union T_primitives *tpr; 1076 mblk_t *mp = *mpp; 1077 1078 tpr = (union T_primitives *)mp->b_rptr; 1079 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1080 ASSERT(tpr->type == T_OK_ACK || 1081 tpr->type == T_ERROR_ACK || 1082 tpr->type == T_BIND_ACK || 1083 tpr->type == T_CAPABILITY_ACK || 1084 tpr->type == T_INFO_ACK || 1085 tpr->type == T_OPTMGMT_ACK); 1086 } 1087 #endif /* DEBUG */ 1088 so->so_ack_mp = NULL; 1089 return (0); 1090 } 1091 1092 /* 1093 * Queue a received T_CONN_IND message on so_conn_ind_head/tail. 1094 */ 1095 void 1096 soqueueconnind(struct sonode *so, mblk_t *mp) 1097 { 1098 if (DB_TYPE(mp) != M_PROTO) { 1099 cmn_err(CE_WARN, 1100 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1101 freemsg(mp); 1102 return; 1103 } 1104 1105 mutex_enter(&so->so_lock); 1106 ASSERT(mp->b_next == NULL); 1107 if (so->so_conn_ind_head == NULL) { 1108 so->so_conn_ind_head = mp; 1109 so->so_state |= SS_HASCONNIND; 1110 } else { 1111 ASSERT(so->so_state & SS_HASCONNIND); 1112 ASSERT(so->so_conn_ind_tail->b_next == NULL); 1113 so->so_conn_ind_tail->b_next = mp; 1114 } 1115 so->so_conn_ind_tail = mp; 1116 /* Wakeup a single consumer of the T_CONN_IND */ 1117 cv_signal(&so->so_connind_cv); 1118 mutex_exit(&so->so_lock); 1119 } 1120 1121 /* 1122 * Wait for a T_CONN_IND. 1123 * Don't wait if nonblocking. 1124 * Accept signals and socket errors. 1125 */ 1126 int 1127 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1128 { 1129 mblk_t *mp; 1130 int error = 0; 1131 1132 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1133 mutex_enter(&so->so_lock); 1134 check_error: 1135 if (so->so_error) { 1136 error = sogeterr(so); 1137 if (error) { 1138 mutex_exit(&so->so_lock); 1139 return (error); 1140 } 1141 } 1142 1143 if (so->so_conn_ind_head == NULL) { 1144 if (fmode & (FNDELAY|FNONBLOCK)) { 1145 error = EWOULDBLOCK; 1146 goto done; 1147 } 1148 if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) { 1149 error = EINTR; 1150 goto done; 1151 } 1152 goto check_error; 1153 } 1154 mp = so->so_conn_ind_head; 1155 so->so_conn_ind_head = mp->b_next; 1156 mp->b_next = NULL; 1157 if (so->so_conn_ind_head == NULL) { 1158 ASSERT(so->so_conn_ind_tail == mp); 1159 so->so_conn_ind_tail = NULL; 1160 so->so_state &= ~SS_HASCONNIND; 1161 } 1162 *mpp = mp; 1163 done: 1164 mutex_exit(&so->so_lock); 1165 return (error); 1166 } 1167 1168 /* 1169 * Flush a T_CONN_IND matching the sequence number from the list. 1170 * Return zero if found; non-zero otherwise. 1171 * This is called very infrequently thus it is ok to do a linear search. 1172 */ 1173 int 1174 soflushconnind(struct sonode *so, t_scalar_t seqno) 1175 { 1176 mblk_t *prevmp, *mp; 1177 struct T_conn_ind *tci; 1178 1179 mutex_enter(&so->so_lock); 1180 for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL; 1181 prevmp = mp, mp = mp->b_next) { 1182 tci = (struct T_conn_ind *)mp->b_rptr; 1183 if (tci->SEQ_number == seqno) { 1184 dprintso(so, 1, 1185 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1186 /* Deleting last? */ 1187 if (so->so_conn_ind_tail == mp) { 1188 so->so_conn_ind_tail = prevmp; 1189 } 1190 if (prevmp == NULL) { 1191 /* Deleting first */ 1192 so->so_conn_ind_head = mp->b_next; 1193 } else { 1194 prevmp->b_next = mp->b_next; 1195 } 1196 mp->b_next = NULL; 1197 if (so->so_conn_ind_head == NULL) { 1198 ASSERT(so->so_conn_ind_tail == NULL); 1199 so->so_state &= ~SS_HASCONNIND; 1200 } else { 1201 ASSERT(so->so_conn_ind_tail != NULL); 1202 } 1203 so->so_error = ECONNABORTED; 1204 mutex_exit(&so->so_lock); 1205 freemsg(mp); 1206 return (0); 1207 } 1208 } 1209 mutex_exit(&so->so_lock); 1210 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1211 return (-1); 1212 } 1213 1214 /* 1215 * Wait until the socket is connected or there is an error. 1216 * fmode should contain any nonblocking flags. nosig should be 1217 * set if the caller does not want the wait to be interrupted by a signal. 1218 */ 1219 int 1220 sowaitconnected(struct sonode *so, int fmode, int nosig) 1221 { 1222 int error; 1223 1224 ASSERT(MUTEX_HELD(&so->so_lock)); 1225 1226 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1227 SS_ISCONNECTING && so->so_error == 0) { 1228 1229 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", so)); 1230 if (fmode & (FNDELAY|FNONBLOCK)) 1231 return (EINPROGRESS); 1232 1233 if (nosig) 1234 cv_wait(&so->so_state_cv, &so->so_lock); 1235 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1236 /* 1237 * Return EINTR and let the application use 1238 * nonblocking techniques for detecting when 1239 * the connection has been established. 1240 */ 1241 return (EINTR); 1242 } 1243 dprintso(so, 1, ("awoken on %p\n", so)); 1244 } 1245 1246 if (so->so_error != 0) { 1247 error = sogeterr(so); 1248 ASSERT(error != 0); 1249 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1250 return (error); 1251 } 1252 if (!(so->so_state & SS_ISCONNECTED)) { 1253 /* 1254 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1255 * zero errno. Or another thread could have consumed so_error 1256 * e.g. by calling read. 1257 */ 1258 error = ECONNREFUSED; 1259 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1260 return (error); 1261 } 1262 return (0); 1263 } 1264 1265 1266 /* 1267 * Handle the signal generation aspect of urgent data. 1268 */ 1269 static void 1270 so_oob_sig(struct sonode *so, int extrasig, 1271 strsigset_t *signals, strpollset_t *pollwakeups) 1272 { 1273 ASSERT(MUTEX_HELD(&so->so_lock)); 1274 1275 ASSERT(so_verify_oobstate(so)); 1276 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1277 if (so->so_oobsigcnt > so->so_oobcnt) { 1278 /* 1279 * Signal has already been generated once for this 1280 * urgent "event". However, since TCP can receive updated 1281 * urgent pointers we still generate a signal. 1282 */ 1283 ASSERT(so->so_state & SS_OOBPEND); 1284 if (extrasig) { 1285 *signals |= S_RDBAND; 1286 *pollwakeups |= POLLRDBAND; 1287 } 1288 return; 1289 } 1290 1291 so->so_oobsigcnt++; 1292 ASSERT(so->so_oobsigcnt > 0); /* Wraparound */ 1293 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1294 1295 /* 1296 * Record (for select/poll) that urgent data is pending. 1297 */ 1298 so->so_state |= SS_OOBPEND; 1299 /* 1300 * New urgent data on the way so forget about any old 1301 * urgent data. 1302 */ 1303 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1304 if (so->so_oobmsg != NULL) { 1305 dprintso(so, 1, ("sock: discarding old oob\n")); 1306 freemsg(so->so_oobmsg); 1307 so->so_oobmsg = NULL; 1308 } 1309 *signals |= S_RDBAND; 1310 *pollwakeups |= POLLRDBAND; 1311 ASSERT(so_verify_oobstate(so)); 1312 } 1313 1314 /* 1315 * Handle the processing of the T_EXDATA_IND with urgent data. 1316 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1317 */ 1318 /* ARGSUSED2 */ 1319 static mblk_t * 1320 so_oob_exdata(struct sonode *so, mblk_t *mp, 1321 strsigset_t *signals, strpollset_t *pollwakeups) 1322 { 1323 ASSERT(MUTEX_HELD(&so->so_lock)); 1324 1325 ASSERT(so_verify_oobstate(so)); 1326 1327 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1328 1329 so->so_oobcnt++; 1330 ASSERT(so->so_oobcnt > 0); /* wraparound? */ 1331 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1332 1333 /* 1334 * Set MSGMARK for SIOCATMARK. 1335 */ 1336 mp->b_flag |= MSGMARK; 1337 1338 ASSERT(so_verify_oobstate(so)); 1339 return (mp); 1340 } 1341 1342 /* 1343 * Handle the processing of the actual urgent data. 1344 * Returns the data mblk if it should be queued on the read queue. 1345 */ 1346 static mblk_t * 1347 so_oob_data(struct sonode *so, mblk_t *mp, 1348 strsigset_t *signals, strpollset_t *pollwakeups) 1349 { 1350 ASSERT(MUTEX_HELD(&so->so_lock)); 1351 1352 ASSERT(so_verify_oobstate(so)); 1353 1354 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1355 ASSERT(mp != NULL); 1356 /* 1357 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1358 * Otherwise we store it in so_oobmsg. 1359 */ 1360 ASSERT(so->so_oobmsg == NULL); 1361 if (so->so_options & SO_OOBINLINE) { 1362 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1363 *signals |= S_INPUT | S_RDNORM; 1364 } else { 1365 *pollwakeups |= POLLRDBAND; 1366 so->so_state |= SS_HAVEOOBDATA; 1367 so->so_oobmsg = mp; 1368 mp = NULL; 1369 } 1370 ASSERT(so_verify_oobstate(so)); 1371 return (mp); 1372 } 1373 1374 /* 1375 * Caller must hold the mutex. 1376 * For delayed processing, save the T_DISCON_IND received 1377 * from below on so_discon_ind_mp. 1378 * When the message is processed the framework will call: 1379 * (*func)(so, mp); 1380 */ 1381 static void 1382 so_save_discon_ind(struct sonode *so, 1383 mblk_t *mp, 1384 void (*func)(struct sonode *so, mblk_t *)) 1385 { 1386 ASSERT(MUTEX_HELD(&so->so_lock)); 1387 1388 /* 1389 * Discard new T_DISCON_IND if we have already received another. 1390 * Currently the earlier message can either be on so_discon_ind_mp 1391 * or being processed. 1392 */ 1393 if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1394 cmn_err(CE_WARN, 1395 "sockfs: received unexpected additional T_DISCON_IND\n"); 1396 freemsg(mp); 1397 return; 1398 } 1399 mp->b_prev = (mblk_t *)func; 1400 mp->b_next = NULL; 1401 so->so_discon_ind_mp = mp; 1402 } 1403 1404 /* 1405 * Caller must hold the mutex and make sure that either SOLOCKED 1406 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1407 * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp. 1408 * Need to ensure that strsock_proto() will not end up sleeping for 1409 * SOASYNC_UNBIND, while executing this function. 1410 */ 1411 void 1412 so_drain_discon_ind(struct sonode *so) 1413 { 1414 mblk_t *bp; 1415 void (*func)(struct sonode *so, mblk_t *); 1416 1417 ASSERT(MUTEX_HELD(&so->so_lock)); 1418 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1419 1420 /* Process T_DISCON_IND on so_discon_ind_mp */ 1421 if ((bp = so->so_discon_ind_mp) != NULL) { 1422 so->so_discon_ind_mp = NULL; 1423 func = (void (*)())bp->b_prev; 1424 bp->b_prev = NULL; 1425 1426 /* 1427 * This (*func) is supposed to generate a message downstream 1428 * and we need to have a flag set until the corresponding 1429 * upstream message reaches stream head. 1430 * When processing T_DISCON_IND in strsock_discon_ind 1431 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1432 * drop the flag after we get the ACK in strsock_proto. 1433 */ 1434 (void) (*func)(so, bp); 1435 } 1436 } 1437 1438 /* 1439 * Caller must hold the mutex. 1440 * Remove the T_DISCON_IND on so_discon_ind_mp. 1441 */ 1442 void 1443 so_flush_discon_ind(struct sonode *so) 1444 { 1445 mblk_t *bp; 1446 1447 ASSERT(MUTEX_HELD(&so->so_lock)); 1448 1449 /* 1450 * Remove T_DISCON_IND mblk at so_discon_ind_mp. 1451 */ 1452 if ((bp = so->so_discon_ind_mp) != NULL) { 1453 so->so_discon_ind_mp = NULL; 1454 bp->b_prev = NULL; 1455 freemsg(bp); 1456 } 1457 } 1458 1459 /* 1460 * Caller must hold the mutex. 1461 * 1462 * This function is used to process the T_DISCON_IND message. It does 1463 * immediate processing when called from strsock_proto and delayed 1464 * processing of discon_ind saved on so_discon_ind_mp when called from 1465 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1466 * so_discon_ind_mp for delayed processing, this function is registered 1467 * as the callback function to process the message. 1468 * 1469 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1470 * unbind operation, and should be released only after we receive the ACK 1471 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1472 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1473 * sent from either this function or tcp_unbind(), flushing away any TPI 1474 * message that is being sent down and stays in a lower module's queue. 1475 * 1476 * This function drops so_lock and grabs it again. 1477 */ 1478 static void 1479 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1480 { 1481 struct vnode *vp; 1482 struct stdata *stp; 1483 union T_primitives *tpr; 1484 struct T_unbind_req *ubr; 1485 mblk_t *mp; 1486 int error; 1487 1488 ASSERT(MUTEX_HELD(&so->so_lock)); 1489 ASSERT(discon_mp); 1490 ASSERT(discon_mp->b_rptr); 1491 1492 tpr = (union T_primitives *)discon_mp->b_rptr; 1493 ASSERT(tpr->type == T_DISCON_IND); 1494 1495 vp = SOTOV(so); 1496 stp = vp->v_stream; 1497 ASSERT(stp); 1498 1499 /* 1500 * Not a listener 1501 */ 1502 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1503 1504 /* 1505 * This assumes that the name space for DISCON_reason 1506 * is the errno name space. 1507 */ 1508 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1509 1510 /* 1511 * Unbind with the transport without blocking. 1512 * If we've already received a T_DISCON_IND do not unbind. 1513 * 1514 * If there is no preallocated unbind message, we have already 1515 * unbound with the transport 1516 * 1517 * If the socket is not bound, no need to unbind. 1518 */ 1519 mp = so->so_unbind_mp; 1520 if (mp == NULL) { 1521 ASSERT(!(so->so_state & SS_ISBOUND)); 1522 mutex_exit(&so->so_lock); 1523 } else if (!(so->so_state & SS_ISBOUND)) { 1524 mutex_exit(&so->so_lock); 1525 } else { 1526 so->so_unbind_mp = NULL; 1527 1528 /* 1529 * Is another T_DISCON_IND being processed. 1530 */ 1531 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1532 1533 /* 1534 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1535 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1536 * only after we receive the ACK in strsock_proto. 1537 */ 1538 so->so_flag |= SOASYNC_UNBIND; 1539 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1540 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1541 mutex_exit(&so->so_lock); 1542 1543 /* 1544 * Send down T_UNBIND_REQ ignoring flow control. 1545 * XXX Assumes that MSG_IGNFLOW implies that this thread 1546 * does not run service procedures. 1547 */ 1548 ASSERT(DB_TYPE(mp) == M_PROTO); 1549 ubr = (struct T_unbind_req *)mp->b_rptr; 1550 mp->b_wptr += sizeof (*ubr); 1551 ubr->PRIM_type = T_UNBIND_REQ; 1552 1553 /* 1554 * Flush the read and write side (except stream head read queue) 1555 * and send down T_UNBIND_REQ. 1556 */ 1557 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1558 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1559 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1560 /* LINTED - warning: statement has no consequent: if */ 1561 if (error) { 1562 eprintsoline(so, error); 1563 } 1564 } 1565 1566 if (tpr->discon_ind.DISCON_reason != 0) 1567 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1568 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1569 strseteof(SOTOV(so), 1); 1570 /* 1571 * strseteof takes care of read side wakeups, 1572 * pollwakeups, and signals. 1573 */ 1574 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1575 freemsg(discon_mp); 1576 1577 1578 pollwakeup(&stp->sd_pollist, POLLOUT); 1579 mutex_enter(&stp->sd_lock); 1580 1581 /* 1582 * Wake sleeping write 1583 */ 1584 if (stp->sd_flag & WSLEEP) { 1585 stp->sd_flag &= ~WSLEEP; 1586 cv_broadcast(&stp->sd_wrq->q_wait); 1587 } 1588 1589 /* 1590 * strsendsig can handle multiple signals with a 1591 * single call. Send SIGPOLL for S_OUTPUT event. 1592 */ 1593 if (stp->sd_sigflags & S_OUTPUT) 1594 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1595 1596 mutex_exit(&stp->sd_lock); 1597 mutex_enter(&so->so_lock); 1598 } 1599 1600 /* 1601 * This routine is registered with the stream head to receive M_PROTO 1602 * and M_PCPROTO messages. 1603 * 1604 * Returns NULL if the message was consumed. 1605 * Returns an mblk to make that mblk be processed (and queued) by the stream 1606 * head. 1607 * 1608 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1609 * *pollwakeups) for the stream head to take action on. Note that since 1610 * sockets always deliver SIGIO for every new piece of data this routine 1611 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1612 * 1613 * This routine handles all data related TPI messages independent of 1614 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1615 * arrive on a SOCK_STREAM. 1616 */ 1617 static mblk_t * 1618 strsock_proto(vnode_t *vp, mblk_t *mp, 1619 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1620 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1621 { 1622 union T_primitives *tpr; 1623 struct sonode *so; 1624 1625 so = VTOSO(vp); 1626 1627 dprintso(so, 1, ("strsock_proto(%p, %p)\n", vp, mp)); 1628 1629 /* Set default return values */ 1630 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1631 1632 ASSERT(DB_TYPE(mp) == M_PROTO || 1633 DB_TYPE(mp) == M_PCPROTO); 1634 1635 if (MBLKL(mp) < sizeof (tpr->type)) { 1636 /* The message is too short to even contain the primitive */ 1637 cmn_err(CE_WARN, 1638 "sockfs: Too short TPI message received. Len = %ld\n", 1639 (ptrdiff_t)(MBLKL(mp))); 1640 freemsg(mp); 1641 return (NULL); 1642 } 1643 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1644 /* The read pointer is not aligned correctly for TPI */ 1645 cmn_err(CE_WARN, 1646 "sockfs: Unaligned TPI message received. rptr = %p\n", 1647 (void *)mp->b_rptr); 1648 freemsg(mp); 1649 return (NULL); 1650 } 1651 tpr = (union T_primitives *)mp->b_rptr; 1652 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1653 1654 switch (tpr->type) { 1655 1656 case T_DATA_IND: 1657 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1658 cmn_err(CE_WARN, 1659 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1660 (ptrdiff_t)(MBLKL(mp))); 1661 freemsg(mp); 1662 return (NULL); 1663 } 1664 /* 1665 * Ignore zero-length T_DATA_IND messages. These might be 1666 * generated by some transports. 1667 * This is needed to prevent read (which skips the M_PROTO 1668 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1669 * on a non-blocking socket after select/poll has indicated 1670 * that data is available). 1671 */ 1672 if (msgdsize(mp->b_cont) == 0) { 1673 dprintso(so, 0, 1674 ("strsock_proto: zero length T_DATA_IND\n")); 1675 freemsg(mp); 1676 return (NULL); 1677 } 1678 *allmsgsigs = S_INPUT | S_RDNORM; 1679 *pollwakeups = POLLIN | POLLRDNORM; 1680 *wakeups = RSLEEP; 1681 return (mp); 1682 1683 case T_UNITDATA_IND: { 1684 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1685 void *addr; 1686 t_uscalar_t addrlen; 1687 1688 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1689 cmn_err(CE_WARN, 1690 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1691 (ptrdiff_t)(MBLKL(mp))); 1692 freemsg(mp); 1693 return (NULL); 1694 } 1695 1696 /* Is this is not a connected datagram socket? */ 1697 if ((so->so_mode & SM_CONNREQUIRED) || 1698 !(so->so_state & SS_ISCONNECTED)) { 1699 /* 1700 * Not a connected datagram socket. Look for 1701 * the SO_UNIX_CLOSE option. If such an option is found 1702 * discard the message (since it has no meaning 1703 * unless connected). 1704 */ 1705 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1706 tudi->OPT_length != 0) { 1707 void *opt; 1708 t_uscalar_t optlen = tudi->OPT_length; 1709 1710 opt = sogetoff(mp, tudi->OPT_offset, 1711 optlen, __TPI_ALIGN_SIZE); 1712 if (opt == NULL) { 1713 /* The len/off falls outside mp */ 1714 freemsg(mp); 1715 mutex_enter(&so->so_lock); 1716 soseterror(so, EPROTO); 1717 mutex_exit(&so->so_lock); 1718 cmn_err(CE_WARN, 1719 "sockfs: T_unidata_ind with " 1720 "invalid optlen/offset %u/%d\n", 1721 optlen, tudi->OPT_offset); 1722 return (NULL); 1723 } 1724 if (so_getopt_unix_close(opt, optlen)) { 1725 freemsg(mp); 1726 return (NULL); 1727 } 1728 } 1729 *allmsgsigs = S_INPUT | S_RDNORM; 1730 *pollwakeups = POLLIN | POLLRDNORM; 1731 *wakeups = RSLEEP; 1732 #ifdef C2_AUDIT 1733 if (audit_active) 1734 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1735 mp, 0); 1736 #endif /* C2_AUDIT */ 1737 return (mp); 1738 } 1739 1740 /* 1741 * A connect datagram socket. For AF_INET{,6} we verify that 1742 * the source address matches the "connected to" address. 1743 * The semantics of AF_UNIX sockets is to not verify 1744 * the source address. 1745 * Note that this source address verification is transport 1746 * specific. Thus the real fix would be to extent TPI 1747 * to allow T_CONN_REQ messages to be send to connectionless 1748 * transport providers and always let the transport provider 1749 * do whatever filtering is needed. 1750 * 1751 * The verification/filtering semantics for transports 1752 * other than AF_INET and AF_UNIX are unknown. The choice 1753 * would be to either filter using bcmp or let all messages 1754 * get through. This code does not filter other address 1755 * families since this at least allows the application to 1756 * work around any missing filtering. 1757 * 1758 * XXX Should we move filtering to UDP/ICMP??? 1759 * That would require passing e.g. a T_DISCON_REQ to UDP 1760 * when the socket becomes unconnected. 1761 */ 1762 addrlen = tudi->SRC_length; 1763 /* 1764 * The alignment restriction is really to strict but 1765 * we want enough alignment to inspect the fields of 1766 * a sockaddr_in. 1767 */ 1768 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1769 __TPI_ALIGN_SIZE); 1770 if (addr == NULL) { 1771 freemsg(mp); 1772 mutex_enter(&so->so_lock); 1773 soseterror(so, EPROTO); 1774 mutex_exit(&so->so_lock); 1775 cmn_err(CE_WARN, 1776 "sockfs: T_unidata_ind with invalid " 1777 "addrlen/offset %u/%d\n", 1778 addrlen, tudi->SRC_offset); 1779 return (NULL); 1780 } 1781 1782 if (so->so_family == AF_INET) { 1783 /* 1784 * For AF_INET we allow wildcarding both sin_addr 1785 * and sin_port. 1786 */ 1787 struct sockaddr_in *faddr, *sin; 1788 1789 /* Prevent so_faddr_sa from changing while accessed */ 1790 mutex_enter(&so->so_lock); 1791 ASSERT(so->so_faddr_len == 1792 (socklen_t)sizeof (struct sockaddr_in)); 1793 faddr = (struct sockaddr_in *)so->so_faddr_sa; 1794 sin = (struct sockaddr_in *)addr; 1795 if (addrlen != 1796 (t_uscalar_t)sizeof (struct sockaddr_in) || 1797 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1798 faddr->sin_addr.s_addr != INADDR_ANY) || 1799 (so->so_type != SOCK_RAW && 1800 sin->sin_port != faddr->sin_port && 1801 faddr->sin_port != 0)) { 1802 #ifdef DEBUG 1803 dprintso(so, 0, 1804 ("sockfs: T_UNITDATA_IND mismatch: %s", 1805 pr_addr(so->so_family, 1806 (struct sockaddr *)addr, 1807 addrlen))); 1808 dprintso(so, 0, (" - %s\n", 1809 pr_addr(so->so_family, so->so_faddr_sa, 1810 (t_uscalar_t)so->so_faddr_len))); 1811 #endif /* DEBUG */ 1812 mutex_exit(&so->so_lock); 1813 freemsg(mp); 1814 return (NULL); 1815 } 1816 mutex_exit(&so->so_lock); 1817 } else if (so->so_family == AF_INET6) { 1818 /* 1819 * For AF_INET6 we allow wildcarding both sin6_addr 1820 * and sin6_port. 1821 */ 1822 struct sockaddr_in6 *faddr6, *sin6; 1823 static struct in6_addr zeroes; /* inits to all zeros */ 1824 1825 /* Prevent so_faddr_sa from changing while accessed */ 1826 mutex_enter(&so->so_lock); 1827 ASSERT(so->so_faddr_len == 1828 (socklen_t)sizeof (struct sockaddr_in6)); 1829 faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa; 1830 sin6 = (struct sockaddr_in6 *)addr; 1831 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1832 if (addrlen != 1833 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1834 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1835 &faddr6->sin6_addr) && 1836 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1837 (so->so_type != SOCK_RAW && 1838 sin6->sin6_port != faddr6->sin6_port && 1839 faddr6->sin6_port != 0)) { 1840 #ifdef DEBUG 1841 dprintso(so, 0, 1842 ("sockfs: T_UNITDATA_IND mismatch: %s", 1843 pr_addr(so->so_family, 1844 (struct sockaddr *)addr, 1845 addrlen))); 1846 dprintso(so, 0, (" - %s\n", 1847 pr_addr(so->so_family, so->so_faddr_sa, 1848 (t_uscalar_t)so->so_faddr_len))); 1849 #endif /* DEBUG */ 1850 mutex_exit(&so->so_lock); 1851 freemsg(mp); 1852 return (NULL); 1853 } 1854 mutex_exit(&so->so_lock); 1855 } else if (so->so_family == AF_UNIX && 1856 msgdsize(mp->b_cont) == 0 && 1857 tudi->OPT_length != 0) { 1858 /* 1859 * Attempt to extract AF_UNIX 1860 * SO_UNIX_CLOSE indication from options. 1861 */ 1862 void *opt; 1863 t_uscalar_t optlen = tudi->OPT_length; 1864 1865 opt = sogetoff(mp, tudi->OPT_offset, 1866 optlen, __TPI_ALIGN_SIZE); 1867 if (opt == NULL) { 1868 /* The len/off falls outside mp */ 1869 freemsg(mp); 1870 mutex_enter(&so->so_lock); 1871 soseterror(so, EPROTO); 1872 mutex_exit(&so->so_lock); 1873 cmn_err(CE_WARN, 1874 "sockfs: T_unidata_ind with invalid " 1875 "optlen/offset %u/%d\n", 1876 optlen, tudi->OPT_offset); 1877 return (NULL); 1878 } 1879 /* 1880 * If we received a unix close indication mark the 1881 * socket and discard this message. 1882 */ 1883 if (so_getopt_unix_close(opt, optlen)) { 1884 mutex_enter(&so->so_lock); 1885 sobreakconn(so, ECONNRESET); 1886 mutex_exit(&so->so_lock); 1887 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1888 freemsg(mp); 1889 *pollwakeups = POLLIN | POLLRDNORM; 1890 *allmsgsigs = S_INPUT | S_RDNORM; 1891 *wakeups = RSLEEP; 1892 return (NULL); 1893 } 1894 } 1895 *allmsgsigs = S_INPUT | S_RDNORM; 1896 *pollwakeups = POLLIN | POLLRDNORM; 1897 *wakeups = RSLEEP; 1898 return (mp); 1899 } 1900 1901 case T_OPTDATA_IND: { 1902 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1903 1904 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1905 cmn_err(CE_WARN, 1906 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1907 (ptrdiff_t)(MBLKL(mp))); 1908 freemsg(mp); 1909 return (NULL); 1910 } 1911 /* 1912 * Allow zero-length messages carrying options. 1913 * This is used when carrying the SO_UNIX_CLOSE option. 1914 */ 1915 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 1916 tdi->OPT_length != 0) { 1917 /* 1918 * Attempt to extract AF_UNIX close indication 1919 * from the options. Ignore any other options - 1920 * those are handled once the message is removed 1921 * from the queue. 1922 * The close indication message should not carry data. 1923 */ 1924 void *opt; 1925 t_uscalar_t optlen = tdi->OPT_length; 1926 1927 opt = sogetoff(mp, tdi->OPT_offset, 1928 optlen, __TPI_ALIGN_SIZE); 1929 if (opt == NULL) { 1930 /* The len/off falls outside mp */ 1931 freemsg(mp); 1932 mutex_enter(&so->so_lock); 1933 soseterror(so, EPROTO); 1934 mutex_exit(&so->so_lock); 1935 cmn_err(CE_WARN, 1936 "sockfs: T_optdata_ind with invalid " 1937 "optlen/offset %u/%d\n", 1938 optlen, tdi->OPT_offset); 1939 return (NULL); 1940 } 1941 /* 1942 * If we received a close indication mark the 1943 * socket and discard this message. 1944 */ 1945 if (so_getopt_unix_close(opt, optlen)) { 1946 mutex_enter(&so->so_lock); 1947 socantsendmore(so); 1948 mutex_exit(&so->so_lock); 1949 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1950 freemsg(mp); 1951 return (NULL); 1952 } 1953 } 1954 *allmsgsigs = S_INPUT | S_RDNORM; 1955 *pollwakeups = POLLIN | POLLRDNORM; 1956 *wakeups = RSLEEP; 1957 return (mp); 1958 } 1959 1960 case T_EXDATA_IND: { 1961 mblk_t *mctl, *mdata; 1962 1963 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 1964 cmn_err(CE_WARN, 1965 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 1966 (ptrdiff_t)(MBLKL(mp))); 1967 freemsg(mp); 1968 return (NULL); 1969 } 1970 /* 1971 * Ignore zero-length T_EXDATA_IND messages. These might be 1972 * generated by some transports. 1973 * 1974 * This is needed to prevent read (which skips the M_PROTO 1975 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1976 * on a non-blocking socket after select/poll has indicated 1977 * that data is available). 1978 */ 1979 dprintso(so, 1, 1980 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 1981 vp, so->so_oobsigcnt, so->so_oobcnt, 1982 pr_state(so->so_state, so->so_mode))); 1983 1984 if (msgdsize(mp->b_cont) == 0) { 1985 dprintso(so, 0, 1986 ("strsock_proto: zero length T_EXDATA_IND\n")); 1987 freemsg(mp); 1988 return (NULL); 1989 } 1990 1991 /* 1992 * Split into the T_EXDATA_IND and the M_DATA part. 1993 * We process these three pieces separately: 1994 * signal generation 1995 * handling T_EXDATA_IND 1996 * handling M_DATA component 1997 */ 1998 mctl = mp; 1999 mdata = mctl->b_cont; 2000 mctl->b_cont = NULL; 2001 mutex_enter(&so->so_lock); 2002 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2003 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2004 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2005 2006 /* 2007 * Pass the T_EXDATA_IND and the M_DATA back separately 2008 * by using b_next linkage. (The stream head will queue any 2009 * b_next linked messages separately.) This is needed 2010 * since MSGMARK applies to the last by of the message 2011 * hence we can not have any M_DATA component attached 2012 * to the marked T_EXDATA_IND. Note that the stream head 2013 * will not consolidate M_DATA messages onto an MSGMARK'ed 2014 * message in order to preserve the constraint that 2015 * the T_EXDATA_IND always is a separate message. 2016 */ 2017 ASSERT(mctl != NULL); 2018 mctl->b_next = mdata; 2019 mp = mctl; 2020 #ifdef DEBUG 2021 if (mdata == NULL) { 2022 dprintso(so, 1, 2023 ("after outofline T_EXDATA_IND(%p): " 2024 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2025 vp, so->so_oobsigcnt, 2026 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2027 pr_state(so->so_state, so->so_mode))); 2028 } else { 2029 dprintso(so, 1, 2030 ("after inline T_EXDATA_IND(%p): " 2031 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2032 vp, so->so_oobsigcnt, 2033 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2034 pr_state(so->so_state, so->so_mode))); 2035 } 2036 #endif /* DEBUG */ 2037 mutex_exit(&so->so_lock); 2038 *wakeups = RSLEEP; 2039 return (mp); 2040 } 2041 2042 case T_CONN_CON: { 2043 struct T_conn_con *conn_con; 2044 void *addr; 2045 t_uscalar_t addrlen; 2046 2047 /* 2048 * Verify the state, update the state to ISCONNECTED, 2049 * record the potentially new address in the message, 2050 * and drop the message. 2051 */ 2052 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2053 cmn_err(CE_WARN, 2054 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2055 (ptrdiff_t)(MBLKL(mp))); 2056 freemsg(mp); 2057 return (NULL); 2058 } 2059 2060 mutex_enter(&so->so_lock); 2061 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2062 SS_ISCONNECTING) { 2063 mutex_exit(&so->so_lock); 2064 dprintso(so, 1, 2065 ("T_CONN_CON: state %x\n", so->so_state)); 2066 freemsg(mp); 2067 return (NULL); 2068 } 2069 2070 conn_con = &tpr->conn_con; 2071 addrlen = conn_con->RES_length; 2072 /* 2073 * Allow the address to be of different size than sent down 2074 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2075 * For AF_UNIX require the identical length. 2076 */ 2077 if (so->so_family == AF_UNIX ? 2078 addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) : 2079 addrlen > (t_uscalar_t)so->so_faddr_maxlen) { 2080 cmn_err(CE_WARN, 2081 "sockfs: T_conn_con with different " 2082 "length %u/%d\n", 2083 addrlen, conn_con->RES_length); 2084 soisdisconnected(so, EPROTO); 2085 mutex_exit(&so->so_lock); 2086 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2087 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2088 strseteof(SOTOV(so), 1); 2089 freemsg(mp); 2090 /* 2091 * strseteof takes care of read side wakeups, 2092 * pollwakeups, and signals. 2093 */ 2094 *wakeups = WSLEEP; 2095 *allmsgsigs = S_OUTPUT; 2096 *pollwakeups = POLLOUT; 2097 return (NULL); 2098 } 2099 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2100 if (addr == NULL) { 2101 cmn_err(CE_WARN, 2102 "sockfs: T_conn_con with invalid " 2103 "addrlen/offset %u/%d\n", 2104 addrlen, conn_con->RES_offset); 2105 mutex_exit(&so->so_lock); 2106 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2107 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2108 strseteof(SOTOV(so), 1); 2109 freemsg(mp); 2110 /* 2111 * strseteof takes care of read side wakeups, 2112 * pollwakeups, and signals. 2113 */ 2114 *wakeups = WSLEEP; 2115 *allmsgsigs = S_OUTPUT; 2116 *pollwakeups = POLLOUT; 2117 return (NULL); 2118 } 2119 2120 /* 2121 * Save for getpeername. 2122 */ 2123 if (so->so_family != AF_UNIX) { 2124 so->so_faddr_len = (socklen_t)addrlen; 2125 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2126 bcopy(addr, so->so_faddr_sa, addrlen); 2127 so->so_state |= SS_FADDR_VALID; 2128 } 2129 2130 if (so->so_peercred != NULL) 2131 crfree(so->so_peercred); 2132 so->so_peercred = DB_CRED(mp); 2133 so->so_cpid = DB_CPID(mp); 2134 if (so->so_peercred != NULL) 2135 crhold(so->so_peercred); 2136 2137 /* Wakeup anybody sleeping in sowaitconnected */ 2138 soisconnected(so); 2139 mutex_exit(&so->so_lock); 2140 2141 /* 2142 * The socket is now available for sending data. 2143 */ 2144 *wakeups = WSLEEP; 2145 *allmsgsigs = S_OUTPUT; 2146 *pollwakeups = POLLOUT; 2147 freemsg(mp); 2148 return (NULL); 2149 } 2150 2151 case T_CONN_IND: 2152 /* 2153 * Verify the min size and queue the message on 2154 * the so_conn_ind_head/tail list. 2155 */ 2156 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2157 cmn_err(CE_WARN, 2158 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2159 (ptrdiff_t)(MBLKL(mp))); 2160 freemsg(mp); 2161 return (NULL); 2162 } 2163 2164 #ifdef C2_AUDIT 2165 if (audit_active) 2166 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2167 #endif /* C2_AUDIT */ 2168 if (!(so->so_state & SS_ACCEPTCONN)) { 2169 cmn_err(CE_WARN, 2170 "sockfs: T_conn_ind on non-listening socket\n"); 2171 freemsg(mp); 2172 return (NULL); 2173 } 2174 soqueueconnind(so, mp); 2175 *allmsgsigs = S_INPUT | S_RDNORM; 2176 *pollwakeups = POLLIN | POLLRDNORM; 2177 *wakeups = RSLEEP; 2178 return (NULL); 2179 2180 case T_ORDREL_IND: 2181 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2182 cmn_err(CE_WARN, 2183 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2184 (ptrdiff_t)(MBLKL(mp))); 2185 freemsg(mp); 2186 return (NULL); 2187 } 2188 2189 /* 2190 * Some providers send this when not fully connected. 2191 * SunLink X.25 needs to retrieve disconnect reason after 2192 * disconnect for compatibility. It uses T_ORDREL_IND 2193 * instead of T_DISCON_IND so that it may use the 2194 * endpoint after a connect failure to retrieve the 2195 * reason using an ioctl. Thus we explicitly clear 2196 * SS_ISCONNECTING here for SunLink X.25. 2197 * This is a needed TPI violation. 2198 */ 2199 mutex_enter(&so->so_lock); 2200 so->so_state &= ~SS_ISCONNECTING; 2201 socantrcvmore(so); 2202 mutex_exit(&so->so_lock); 2203 strseteof(SOTOV(so), 1); 2204 /* 2205 * strseteof takes care of read side wakeups, 2206 * pollwakeups, and signals. 2207 */ 2208 freemsg(mp); 2209 return (NULL); 2210 2211 case T_DISCON_IND: 2212 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2213 cmn_err(CE_WARN, 2214 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2215 (ptrdiff_t)(MBLKL(mp))); 2216 freemsg(mp); 2217 return (NULL); 2218 } 2219 if (so->so_state & SS_ACCEPTCONN) { 2220 /* 2221 * This is a listener. Look for a queued T_CONN_IND 2222 * with a matching sequence number and remove it 2223 * from the list. 2224 * It is normal to not find the sequence number since 2225 * the soaccept might have already dequeued it 2226 * (in which case the T_CONN_RES will fail with 2227 * TBADSEQ). 2228 */ 2229 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2230 freemsg(mp); 2231 return (0); 2232 } 2233 2234 /* 2235 * Not a listener 2236 * 2237 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2238 * Such a discon_ind appears when the peer has first done 2239 * a shutdown() followed by a close() in which case we just 2240 * want to record socantsendmore. 2241 * In this case sockfs first receives a T_ORDREL_IND followed 2242 * by a T_DISCON_IND. 2243 * Note that for other transports (e.g. TCP) we need to handle 2244 * the discon_ind in this case since it signals an error. 2245 */ 2246 mutex_enter(&so->so_lock); 2247 if ((so->so_state & SS_CANTRCVMORE) && 2248 (so->so_family == AF_UNIX)) { 2249 socantsendmore(so); 2250 mutex_exit(&so->so_lock); 2251 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2252 dprintso(so, 1, 2253 ("T_DISCON_IND: error %d\n", so->so_error)); 2254 freemsg(mp); 2255 /* 2256 * Set these variables for caller to process them. 2257 * For the else part where T_DISCON_IND is processed, 2258 * this will be done in the function being called 2259 * (strsock_discon_ind()) 2260 */ 2261 *wakeups = WSLEEP; 2262 *allmsgsigs = S_OUTPUT; 2263 *pollwakeups = POLLOUT; 2264 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2265 /* 2266 * Deferred processing of T_DISCON_IND 2267 */ 2268 so_save_discon_ind(so, mp, strsock_discon_ind); 2269 mutex_exit(&so->so_lock); 2270 } else { 2271 /* 2272 * Process T_DISCON_IND now 2273 */ 2274 (void) strsock_discon_ind(so, mp); 2275 mutex_exit(&so->so_lock); 2276 } 2277 return (NULL); 2278 2279 case T_UDERROR_IND: { 2280 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2281 void *addr; 2282 t_uscalar_t addrlen; 2283 int error; 2284 2285 dprintso(so, 0, 2286 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2287 2288 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2289 cmn_err(CE_WARN, 2290 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2291 (ptrdiff_t)(MBLKL(mp))); 2292 freemsg(mp); 2293 return (NULL); 2294 } 2295 /* Ignore on connection-oriented transports */ 2296 if (so->so_mode & SM_CONNREQUIRED) { 2297 freemsg(mp); 2298 eprintsoline(so, 0); 2299 cmn_err(CE_WARN, 2300 "sockfs: T_uderror_ind on connection-oriented " 2301 "transport\n"); 2302 return (NULL); 2303 } 2304 addrlen = tudi->DEST_length; 2305 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2306 if (addr == NULL) { 2307 cmn_err(CE_WARN, 2308 "sockfs: T_uderror_ind with invalid " 2309 "addrlen/offset %u/%d\n", 2310 addrlen, tudi->DEST_offset); 2311 freemsg(mp); 2312 return (NULL); 2313 } 2314 2315 /* Verify source address for connected socket. */ 2316 mutex_enter(&so->so_lock); 2317 if (so->so_state & SS_ISCONNECTED) { 2318 void *faddr; 2319 t_uscalar_t faddr_len; 2320 boolean_t match = B_FALSE; 2321 2322 switch (so->so_family) { 2323 case AF_INET: { 2324 /* Compare just IP address and port */ 2325 struct sockaddr_in *sin1, *sin2; 2326 2327 sin1 = (struct sockaddr_in *)so->so_faddr_sa; 2328 sin2 = (struct sockaddr_in *)addr; 2329 if (addrlen == sizeof (struct sockaddr_in) && 2330 sin1->sin_port == sin2->sin_port && 2331 sin1->sin_addr.s_addr == 2332 sin2->sin_addr.s_addr) 2333 match = B_TRUE; 2334 break; 2335 } 2336 case AF_INET6: { 2337 /* Compare just IP address and port. Not flow */ 2338 struct sockaddr_in6 *sin1, *sin2; 2339 2340 sin1 = (struct sockaddr_in6 *)so->so_faddr_sa; 2341 sin2 = (struct sockaddr_in6 *)addr; 2342 if (addrlen == sizeof (struct sockaddr_in6) && 2343 sin1->sin6_port == sin2->sin6_port && 2344 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2345 &sin2->sin6_addr)) 2346 match = B_TRUE; 2347 break; 2348 } 2349 case AF_UNIX: 2350 faddr = &so->so_ux_faddr; 2351 faddr_len = 2352 (t_uscalar_t)sizeof (so->so_ux_faddr); 2353 if (faddr_len == addrlen && 2354 bcmp(addr, faddr, addrlen) == 0) 2355 match = B_TRUE; 2356 break; 2357 default: 2358 faddr = so->so_faddr_sa; 2359 faddr_len = (t_uscalar_t)so->so_faddr_len; 2360 if (faddr_len == addrlen && 2361 bcmp(addr, faddr, addrlen) == 0) 2362 match = B_TRUE; 2363 break; 2364 } 2365 2366 if (!match) { 2367 #ifdef DEBUG 2368 dprintso(so, 0, 2369 ("sockfs: T_UDERR_IND mismatch: %s - ", 2370 pr_addr(so->so_family, 2371 (struct sockaddr *)addr, 2372 addrlen))); 2373 dprintso(so, 0, ("%s\n", 2374 pr_addr(so->so_family, so->so_faddr_sa, 2375 so->so_faddr_len))); 2376 #endif /* DEBUG */ 2377 mutex_exit(&so->so_lock); 2378 freemsg(mp); 2379 return (NULL); 2380 } 2381 /* 2382 * Make the write error nonpersistent. If the error 2383 * is zero we use ECONNRESET. 2384 * This assumes that the name space for ERROR_type 2385 * is the errno name space. 2386 */ 2387 if (tudi->ERROR_type != 0) 2388 error = tudi->ERROR_type; 2389 else 2390 error = ECONNRESET; 2391 2392 soseterror(so, error); 2393 mutex_exit(&so->so_lock); 2394 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2395 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2396 *wakeups = RSLEEP | WSLEEP; 2397 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2398 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2399 freemsg(mp); 2400 return (NULL); 2401 } 2402 /* 2403 * If the application asked for delayed errors 2404 * record the T_UDERROR_IND so_eaddr_mp and the reason in 2405 * so_delayed_error for delayed error posting. If the reason 2406 * is zero use ECONNRESET. 2407 * Note that delayed error indications do not make sense for 2408 * AF_UNIX sockets since sendto checks that the destination 2409 * address is valid at the time of the sendto. 2410 */ 2411 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2412 mutex_exit(&so->so_lock); 2413 freemsg(mp); 2414 return (NULL); 2415 } 2416 if (so->so_eaddr_mp != NULL) 2417 freemsg(so->so_eaddr_mp); 2418 2419 so->so_eaddr_mp = mp; 2420 if (tudi->ERROR_type != 0) 2421 error = tudi->ERROR_type; 2422 else 2423 error = ECONNRESET; 2424 so->so_delayed_error = (ushort_t)error; 2425 mutex_exit(&so->so_lock); 2426 return (NULL); 2427 } 2428 2429 case T_ERROR_ACK: 2430 dprintso(so, 0, 2431 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2432 tpr->error_ack.ERROR_prim, 2433 tpr->error_ack.TLI_error, 2434 tpr->error_ack.UNIX_error)); 2435 2436 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2437 cmn_err(CE_WARN, 2438 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2439 (ptrdiff_t)(MBLKL(mp))); 2440 freemsg(mp); 2441 return (NULL); 2442 } 2443 /* 2444 * Check if we were waiting for the async message 2445 */ 2446 mutex_enter(&so->so_lock); 2447 if ((so->so_flag & SOASYNC_UNBIND) && 2448 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2449 so_unlock_single(so, SOASYNC_UNBIND); 2450 mutex_exit(&so->so_lock); 2451 freemsg(mp); 2452 return (NULL); 2453 } 2454 mutex_exit(&so->so_lock); 2455 soqueueack(so, mp); 2456 return (NULL); 2457 2458 case T_OK_ACK: 2459 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2460 cmn_err(CE_WARN, 2461 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2462 (ptrdiff_t)(MBLKL(mp))); 2463 freemsg(mp); 2464 return (NULL); 2465 } 2466 /* 2467 * Check if we were waiting for the async message 2468 */ 2469 mutex_enter(&so->so_lock); 2470 if ((so->so_flag & SOASYNC_UNBIND) && 2471 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2472 dprintso(so, 1, 2473 ("strsock_proto: T_OK_ACK async unbind\n")); 2474 so_unlock_single(so, SOASYNC_UNBIND); 2475 mutex_exit(&so->so_lock); 2476 freemsg(mp); 2477 return (NULL); 2478 } 2479 mutex_exit(&so->so_lock); 2480 soqueueack(so, mp); 2481 return (NULL); 2482 2483 case T_INFO_ACK: 2484 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2485 cmn_err(CE_WARN, 2486 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2487 (ptrdiff_t)(MBLKL(mp))); 2488 freemsg(mp); 2489 return (NULL); 2490 } 2491 soqueueack(so, mp); 2492 return (NULL); 2493 2494 case T_CAPABILITY_ACK: 2495 /* 2496 * A T_capability_ack need only be large enough to hold 2497 * the PRIM_type and CAP_bits1 fields; checking for anything 2498 * larger might reject a correct response from an older 2499 * provider. 2500 */ 2501 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2502 cmn_err(CE_WARN, 2503 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2504 (ptrdiff_t)(MBLKL(mp))); 2505 freemsg(mp); 2506 return (NULL); 2507 } 2508 soqueueack(so, mp); 2509 return (NULL); 2510 2511 case T_BIND_ACK: 2512 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2513 cmn_err(CE_WARN, 2514 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2515 (ptrdiff_t)(MBLKL(mp))); 2516 freemsg(mp); 2517 return (NULL); 2518 } 2519 soqueueack(so, mp); 2520 return (NULL); 2521 2522 case T_OPTMGMT_ACK: 2523 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2524 cmn_err(CE_WARN, 2525 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2526 (ptrdiff_t)(MBLKL(mp))); 2527 freemsg(mp); 2528 return (NULL); 2529 } 2530 soqueueack(so, mp); 2531 return (NULL); 2532 default: 2533 #ifdef DEBUG 2534 cmn_err(CE_WARN, 2535 "sockfs: unknown TPI primitive %d received\n", 2536 tpr->type); 2537 #endif /* DEBUG */ 2538 freemsg(mp); 2539 return (NULL); 2540 } 2541 } 2542 2543 /* 2544 * This routine is registered with the stream head to receive other 2545 * (non-data, and non-proto) messages. 2546 * 2547 * Returns NULL if the message was consumed. 2548 * Returns an mblk to make that mblk be processed by the stream head. 2549 * 2550 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2551 * *pollwakeups) for the stream head to take action on. 2552 */ 2553 static mblk_t * 2554 strsock_misc(vnode_t *vp, mblk_t *mp, 2555 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2556 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2557 { 2558 struct sonode *so; 2559 2560 so = VTOSO(vp); 2561 2562 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2563 vp, mp, DB_TYPE(mp))); 2564 2565 /* Set default return values */ 2566 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2567 2568 switch (DB_TYPE(mp)) { 2569 case M_PCSIG: 2570 /* 2571 * This assumes that an M_PCSIG for the urgent data arrives 2572 * before the corresponding T_EXDATA_IND. 2573 * 2574 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2575 * awoken before the urgent data shows up. 2576 * For OOBINLINE this can result in select returning 2577 * only exceptions as opposed to except|read. 2578 */ 2579 if (*mp->b_rptr == SIGURG) { 2580 mutex_enter(&so->so_lock); 2581 dprintso(so, 1, 2582 ("SIGURG(%p): counts %d/%d state %s\n", 2583 vp, so->so_oobsigcnt, 2584 so->so_oobcnt, 2585 pr_state(so->so_state, so->so_mode))); 2586 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2587 dprintso(so, 1, 2588 ("after SIGURG(%p): counts %d/%d " 2589 " poll 0x%x sig 0x%x state %s\n", 2590 vp, so->so_oobsigcnt, 2591 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2592 pr_state(so->so_state, so->so_mode))); 2593 mutex_exit(&so->so_lock); 2594 } 2595 freemsg(mp); 2596 return (NULL); 2597 2598 case M_SIG: 2599 case M_HANGUP: 2600 case M_UNHANGUP: 2601 case M_ERROR: 2602 /* M_ERRORs etc are ignored */ 2603 freemsg(mp); 2604 return (NULL); 2605 2606 case M_FLUSH: 2607 /* 2608 * Do not flush read queue. If the M_FLUSH 2609 * arrives because of an impending T_discon_ind 2610 * we still have to keep any queued data - this is part of 2611 * socket semantics. 2612 */ 2613 if (*mp->b_rptr & FLUSHW) { 2614 *mp->b_rptr &= ~FLUSHR; 2615 return (mp); 2616 } 2617 freemsg(mp); 2618 return (NULL); 2619 2620 default: 2621 return (mp); 2622 } 2623 } 2624 2625 2626 /* Register to receive signals for certain events */ 2627 int 2628 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2629 { 2630 struct strsigset ss; 2631 int32_t rval; 2632 2633 /* 2634 * Note that SOLOCKED will be set except for the call from soaccept(). 2635 */ 2636 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2637 ss.ss_pid = pgrp; 2638 ss.ss_events = events; 2639 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2640 &rval)); 2641 } 2642 2643 2644 /* Register for events matching the SS_ASYNC flag */ 2645 int 2646 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2647 { 2648 int events = so->so_state & SS_ASYNC ? 2649 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2650 S_RDBAND | S_BANDURG; 2651 2652 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2653 } 2654 2655 2656 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2657 int 2658 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2659 { 2660 ASSERT(mutex_owned(&so->so_lock)); 2661 if (so->so_pgrp != 0) { 2662 int error; 2663 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2664 S_RDBAND | S_BANDURG : /* New sigs */ 2665 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2666 2667 so_lock_single(so); 2668 mutex_exit(&so->so_lock); 2669 2670 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2671 2672 mutex_enter(&so->so_lock); 2673 so_unlock_single(so, SOLOCKED); 2674 if (error) 2675 return (error); 2676 } 2677 so->so_state ^= SS_ASYNC; 2678 return (0); 2679 } 2680 2681 /* 2682 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2683 * any existing one. If passed zero, just clear the existing one. 2684 */ 2685 int 2686 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2687 { 2688 int events = so->so_state & SS_ASYNC ? 2689 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2690 S_RDBAND | S_BANDURG; 2691 int error; 2692 2693 ASSERT(mutex_owned(&so->so_lock)); 2694 2695 /* 2696 * Change socket process (group). 2697 * 2698 * strioctl (via so_set_asyncsigs) will perform permission check and 2699 * also keep a PID_HOLD to prevent the pid from being reused. 2700 */ 2701 so_lock_single(so); 2702 mutex_exit(&so->so_lock); 2703 2704 if (pgrp != 0) { 2705 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2706 pgrp, events)); 2707 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2708 if (error != 0) { 2709 eprintsoline(so, error); 2710 goto bad; 2711 } 2712 } 2713 /* Remove the previously registered process/group */ 2714 if (so->so_pgrp != 0) { 2715 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2716 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2717 if (error != 0) { 2718 eprintsoline(so, error); 2719 error = 0; 2720 } 2721 } 2722 mutex_enter(&so->so_lock); 2723 so_unlock_single(so, SOLOCKED); 2724 so->so_pgrp = pgrp; 2725 return (0); 2726 bad: 2727 mutex_enter(&so->so_lock); 2728 so_unlock_single(so, SOLOCKED); 2729 return (error); 2730 } 2731 2732 2733 2734 /* 2735 * Translate a TLI(/XTI) error into a system error as best we can. 2736 */ 2737 static const int tli_errs[] = { 2738 0, /* no error */ 2739 EADDRNOTAVAIL, /* TBADADDR */ 2740 ENOPROTOOPT, /* TBADOPT */ 2741 EACCES, /* TACCES */ 2742 EBADF, /* TBADF */ 2743 EADDRNOTAVAIL, /* TNOADDR */ 2744 EPROTO, /* TOUTSTATE */ 2745 ECONNABORTED, /* TBADSEQ */ 2746 0, /* TSYSERR - will never get */ 2747 EPROTO, /* TLOOK - should never be sent by transport */ 2748 EMSGSIZE, /* TBADDATA */ 2749 EMSGSIZE, /* TBUFOVFLW */ 2750 EPROTO, /* TFLOW */ 2751 EWOULDBLOCK, /* TNODATA */ 2752 EPROTO, /* TNODIS */ 2753 EPROTO, /* TNOUDERR */ 2754 EINVAL, /* TBADFLAG */ 2755 EPROTO, /* TNOREL */ 2756 EOPNOTSUPP, /* TNOTSUPPORT */ 2757 EPROTO, /* TSTATECHNG */ 2758 /* following represent error namespace expansion with XTI */ 2759 EPROTO, /* TNOSTRUCTYPE - never sent by transport */ 2760 EPROTO, /* TBADNAME - never sent by transport */ 2761 EPROTO, /* TBADQLEN - never sent by transport */ 2762 EADDRINUSE, /* TADDRBUSY */ 2763 EBADF, /* TINDOUT */ 2764 EBADF, /* TPROVMISMATCH */ 2765 EBADF, /* TRESQLEN */ 2766 EBADF, /* TRESADDR */ 2767 EPROTO, /* TQFULL - never sent by transport */ 2768 EPROTO, /* TPROTO */ 2769 }; 2770 2771 static int 2772 tlitosyserr(int terr) 2773 { 2774 ASSERT(terr != TSYSERR); 2775 if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0]))) 2776 return (EPROTO); 2777 else 2778 return (tli_errs[terr]); 2779 } 2780