1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/inttypes.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/buf.h> 35 #include <sys/conf.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/sysmacros.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/user.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/esunddi.h> 49 #include <sys/flock.h> 50 #include <sys/modctl.h> 51 #include <sys/vtrace.h> 52 #include <sys/strsun.h> 53 #include <sys/cmn_err.h> 54 #include <sys/proc.h> 55 #include <sys/ddi.h> 56 #include <sys/kmem_impl.h> 57 58 #include <sys/suntpi.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/socketvar.h> 62 #include <netinet/in.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 68 #include <c2/audit.h> 69 70 int so_default_version = SOV_SOCKSTREAM; 71 72 #ifdef DEBUG 73 /* Set sockdebug to print debug messages when SO_DEBUG is set */ 74 int sockdebug = 0; 75 76 /* Set sockprinterr to print error messages when SO_DEBUG is set */ 77 int sockprinterr = 0; 78 79 /* 80 * Set so_default_options to SO_DEBUG is all sockets should be created 81 * with SO_DEBUG set. This is needed to get debug printouts from the 82 * socket() call itself. 83 */ 84 int so_default_options = 0; 85 #endif /* DEBUG */ 86 87 #ifdef SOCK_TEST 88 /* 89 * Set to number of ticks to limit cv_waits for code coverage testing. 90 * Set to 1000 when SO_DEBUG is set to 2. 91 */ 92 clock_t sock_test_timelimit = 0; 93 #endif /* SOCK_TEST */ 94 95 /* 96 * For concurrency testing of e.g. opening /dev/ip which does not 97 * handle T_INFO_REQ messages. 98 */ 99 int so_no_tinfo = 0; 100 101 /* 102 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider 103 * to simply ignore the T_CAPABILITY_REQ. 104 */ 105 clock_t sock_capability_timeout = 2; /* seconds */ 106 107 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1); 108 static void so_removehooks(struct sonode *so); 109 110 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp, 111 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 112 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 113 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp, 114 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 115 strsigset_t *allmsgsigs, strpollset_t *pollwakeups); 116 117 static int tlitosyserr(int terr); 118 119 /* 120 * Convert a socket to a stream. Invoked when the illusory sockmod 121 * is popped from the stream. 122 * Change the stream head back to default operation without losing 123 * any messages (T_conn_ind's are moved to the stream head queue). 124 */ 125 int 126 so_sock2stream(struct sonode *so) 127 { 128 struct vnode *vp = SOTOV(so); 129 queue_t *rq; 130 mblk_t *mp; 131 int error = 0; 132 133 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 134 135 mutex_enter(&so->so_lock); 136 so_lock_single(so); 137 138 ASSERT(so->so_version != SOV_STREAM); 139 140 /* tell the transport below that sockmod is being popped */ 141 if ((so->so_state & SS_TCP_FAST_ACCEPT) != 0) { 142 int rval; 143 mblk_t **mpp; 144 145 mutex_exit(&so->so_lock); 146 error = strioctl(vp, SIOCPOPSOCKFS, NULL, 0, K_TO_K, CRED(), 147 &rval); 148 mutex_enter(&so->so_lock); 149 if (error != 0) { 150 dprintso(so, 0, 151 ("so_sock2stream(%p): SIOCPOPSOCKFS failed\n", so)); 152 goto exit; 153 } 154 so->so_state &= ~SS_TCP_FAST_ACCEPT; 155 156 for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL; 157 mpp = &mp->b_next) { 158 struct T_conn_ind *conn_ind; 159 160 /* 161 * strsock_proto() has already verified the length of 162 * this message block. 163 */ 164 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind)); 165 166 conn_ind = (struct T_conn_ind *)mp->b_rptr; 167 if (conn_ind->OPT_length == 0 && 168 conn_ind->OPT_offset == 0) 169 continue; 170 171 if (DB_REF(mp) > 1) { 172 mblk_t *newmp; 173 size_t length; 174 cred_t *cr; 175 176 /* 177 * Copy the message block because it is used 178 * elsewhere, too. 179 */ 180 length = MBLKL(mp); 181 newmp = soallocproto(length, _ALLOC_INTR); 182 if (newmp == NULL) { 183 error = EINTR; 184 goto exit; 185 } 186 bcopy(mp->b_rptr, newmp->b_wptr, length); 187 newmp->b_wptr += length; 188 newmp->b_next = mp->b_next; 189 cr = DB_CRED(mp); 190 if (cr != NULL) 191 mblk_setcred(newmp, cr); 192 DB_CPID(newmp) = DB_CPID(mp); 193 194 /* 195 * Link the new message block into the queue 196 * and free the old one. 197 */ 198 *mpp = newmp; 199 mp->b_next = NULL; 200 freemsg(mp); 201 202 mp = newmp; 203 conn_ind = (struct T_conn_ind *)mp->b_rptr; 204 } 205 206 /* 207 * Remove options added by TCP for accept fast-path. 208 */ 209 conn_ind->OPT_length = 0; 210 conn_ind->OPT_offset = 0; 211 } 212 } 213 214 so->so_version = SOV_STREAM; 215 so->so_priv = NULL; 216 217 /* 218 * Remove the hooks in the stream head to avoid queuing more 219 * packets in sockfs. 220 */ 221 mutex_exit(&so->so_lock); 222 so_removehooks(so); 223 mutex_enter(&so->so_lock); 224 225 /* 226 * Clear any state related to urgent data. Leave any T_EXDATA_IND 227 * on the queue - the behavior of urgent data after a switch is 228 * left undefined. 229 */ 230 so->so_error = so->so_delayed_error = 0; 231 freemsg(so->so_oobmsg); 232 so->so_oobmsg = NULL; 233 so->so_oobsigcnt = so->so_oobcnt = 0; 234 235 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 236 SS_HASCONNIND|SS_SAVEDEOR); 237 ASSERT(so_verify_oobstate(so)); 238 239 freemsg(so->so_ack_mp); 240 so->so_ack_mp = NULL; 241 242 /* 243 * Flush the T_DISCON_IND on so_discon_ind_mp. 244 */ 245 so_flush_discon_ind(so); 246 247 /* 248 * Move any queued T_CONN_IND messages to stream head queue. 249 */ 250 rq = RD(strvp2wq(vp)); 251 while ((mp = so->so_conn_ind_head) != NULL) { 252 so->so_conn_ind_head = mp->b_next; 253 mp->b_next = NULL; 254 if (so->so_conn_ind_head == NULL) { 255 ASSERT(so->so_conn_ind_tail == mp); 256 so->so_conn_ind_tail = NULL; 257 } 258 dprintso(so, 0, 259 ("so_sock2stream(%p): moving T_CONN_IND\n", 260 so)); 261 262 /* Drop lock across put() */ 263 mutex_exit(&so->so_lock); 264 put(rq, mp); 265 mutex_enter(&so->so_lock); 266 } 267 268 exit: 269 ASSERT(MUTEX_HELD(&so->so_lock)); 270 so_unlock_single(so, SOLOCKED); 271 mutex_exit(&so->so_lock); 272 return (error); 273 } 274 275 /* 276 * Covert a stream back to a socket. This is invoked when the illusory 277 * sockmod is pushed on a stream (where the stream was "created" by 278 * popping the illusory sockmod). 279 * This routine can not recreate the socket state (certain aspects of 280 * it like urgent data state and the bound/connected addresses for AF_UNIX 281 * sockets can not be recreated by asking the transport for information). 282 * Thus this routine implicitly assumes that the socket is in an initial 283 * state (as if it was just created). It flushes any messages queued on the 284 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages. 285 */ 286 void 287 so_stream2sock(struct sonode *so) 288 { 289 struct vnode *vp = SOTOV(so); 290 291 ASSERT(MUTEX_HELD(&so->so_plumb_lock)); 292 293 mutex_enter(&so->so_lock); 294 so_lock_single(so); 295 ASSERT(so->so_version == SOV_STREAM); 296 so->so_version = SOV_SOCKSTREAM; 297 so->so_pushcnt = 0; 298 mutex_exit(&so->so_lock); 299 300 /* 301 * Set a permenent error to force any thread in sorecvmsg to 302 * return (and drop SOREADLOCKED). Clear the error once 303 * we have SOREADLOCKED. 304 * This makes a read sleeping during the I_PUSH of sockmod return 305 * EIO. 306 */ 307 strsetrerror(SOTOV(so), EIO, 1, NULL); 308 309 /* 310 * Get the read lock before flushing data to avoid 311 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg. 312 */ 313 mutex_enter(&so->so_lock); 314 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 315 mutex_exit(&so->so_lock); 316 317 strsetrerror(SOTOV(so), 0, 0, NULL); 318 so_installhooks(so); 319 320 /* 321 * Flush everything on the read queue. 322 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND 323 * remain; those types of messages would confuse sockfs. 324 */ 325 strflushrq(vp, FLUSHALL); 326 mutex_enter(&so->so_lock); 327 328 /* 329 * Flush the T_DISCON_IND on so_discon_ind_mp. 330 */ 331 so_flush_discon_ind(so); 332 so_unlock_read(so); /* Clear SOREADLOCKED */ 333 334 so_unlock_single(so, SOLOCKED); 335 mutex_exit(&so->so_lock); 336 } 337 338 /* 339 * Install the hooks in the stream head. 340 */ 341 void 342 so_installhooks(struct sonode *so) 343 { 344 struct vnode *vp = SOTOV(so); 345 346 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA, 347 strsock_proto, strsock_misc); 348 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0); 349 } 350 351 /* 352 * Remove the hooks in the stream head. 353 */ 354 static void 355 so_removehooks(struct sonode *so) 356 { 357 struct vnode *vp = SOTOV(so); 358 359 strsetrputhooks(vp, 0, NULL, NULL); 360 strsetwputhooks(vp, 0, STRTIMOUT); 361 /* 362 * Leave read behavior as it would have been for a normal 363 * stream i.e. a read of an M_PROTO will fail. 364 */ 365 } 366 367 /* 368 * Initialize the streams side of a socket including 369 * T_info_req/ack processing. If tso is not NULL its values are used thereby 370 * avoiding the T_INFO_REQ. 371 */ 372 int 373 so_strinit(struct sonode *so, struct sonode *tso) 374 { 375 struct vnode *vp = SOTOV(so); 376 struct stdata *stp; 377 mblk_t *mp; 378 int error; 379 380 dprintso(so, 1, ("so_strinit(%p)\n", so)); 381 382 /* Preallocate an unbind_req message */ 383 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 384 mutex_enter(&so->so_lock); 385 so->so_unbind_mp = mp; 386 #ifdef DEBUG 387 so->so_options = so_default_options; 388 #endif /* DEBUG */ 389 mutex_exit(&so->so_lock); 390 391 so_installhooks(so); 392 393 /* 394 * The T_CAPABILITY_REQ should be the first message sent down because 395 * at least TCP has a fast-path for this which avoids timeouts while 396 * waiting for the T_CAPABILITY_ACK under high system load. 397 */ 398 if (tso == NULL) { 399 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO); 400 if (error) 401 return (error); 402 } else { 403 mutex_enter(&so->so_lock); 404 so->so_tsdu_size = tso->so_tsdu_size; 405 so->so_etsdu_size = tso->so_etsdu_size; 406 so->so_addr_size = tso->so_addr_size; 407 so->so_opt_size = tso->so_opt_size; 408 so->so_tidu_size = tso->so_tidu_size; 409 so->so_serv_type = tso->so_serv_type; 410 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID; 411 mutex_exit(&so->so_lock); 412 413 /* the following do_tcapability may update so->so_mode */ 414 if ((tso->so_serv_type != T_CLTS) && 415 ((so->so_state & SS_TCP_FAST_ACCEPT) == 0)) { 416 error = do_tcapability(so, TC1_ACCEPTOR_ID); 417 if (error) 418 return (error); 419 } 420 } 421 /* 422 * If the addr_size is 0 we treat it as already bound 423 * and connected. This is used by the routing socket. 424 * We set the addr_size to something to allocate a the address 425 * structures. 426 */ 427 if (so->so_addr_size == 0) { 428 so->so_state |= SS_ISBOUND | SS_ISCONNECTED; 429 /* Address size can vary with address families. */ 430 if (so->so_family == AF_INET6) 431 so->so_addr_size = 432 (t_scalar_t)sizeof (struct sockaddr_in6); 433 else 434 so->so_addr_size = 435 (t_scalar_t)sizeof (struct sockaddr_in); 436 ASSERT(so->so_unbind_mp); 437 } 438 /* 439 * Allocate the addresses. 440 */ 441 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 442 ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0); 443 so->so_laddr_maxlen = so->so_faddr_maxlen = 444 P2ROUNDUP(so->so_addr_size, KMEM_ALIGN); 445 so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP); 446 so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa 447 + so->so_laddr_maxlen); 448 449 if (so->so_family == AF_UNIX) { 450 /* 451 * Initialize AF_UNIX related fields. 452 */ 453 bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr)); 454 bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr)); 455 } 456 457 stp = vp->v_stream; 458 /* 459 * Have to keep minpsz at zero in order to allow write/send of zero 460 * bytes. 461 */ 462 mutex_enter(&stp->sd_lock); 463 if (stp->sd_qn_minpsz == 1) 464 stp->sd_qn_minpsz = 0; 465 mutex_exit(&stp->sd_lock); 466 467 return (0); 468 } 469 470 static void 471 copy_tinfo(struct sonode *so, struct T_info_ack *tia) 472 { 473 so->so_tsdu_size = tia->TSDU_size; 474 so->so_etsdu_size = tia->ETSDU_size; 475 so->so_addr_size = tia->ADDR_size; 476 so->so_opt_size = tia->OPT_size; 477 so->so_tidu_size = tia->TIDU_size; 478 so->so_serv_type = tia->SERV_type; 479 switch (tia->CURRENT_state) { 480 case TS_UNBND: 481 break; 482 case TS_IDLE: 483 so->so_state |= SS_ISBOUND; 484 so->so_laddr_len = 0; 485 so->so_state &= ~SS_LADDR_VALID; 486 break; 487 case TS_DATA_XFER: 488 so->so_state |= SS_ISBOUND|SS_ISCONNECTED; 489 so->so_laddr_len = 0; 490 so->so_faddr_len = 0; 491 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 492 break; 493 } 494 495 /* 496 * Heuristics for determining the socket mode flags 497 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING, 498 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM) 499 * from the info ack. 500 */ 501 if (so->so_serv_type == T_CLTS) { 502 so->so_mode |= SM_ATOMIC | SM_ADDR; 503 } else { 504 so->so_mode |= SM_CONNREQUIRED; 505 if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2) 506 so->so_mode |= SM_EXDATA; 507 } 508 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) { 509 /* Semantics are to discard tail end of messages */ 510 so->so_mode |= SM_ATOMIC; 511 } 512 if (so->so_family == AF_UNIX) { 513 so->so_mode |= SM_FDPASSING | SM_OPTDATA; 514 if (so->so_addr_size == -1) { 515 /* MAXPATHLEN + soun_family + nul termination */ 516 so->so_addr_size = (t_scalar_t)(MAXPATHLEN + 517 sizeof (short) + 1); 518 } 519 if (so->so_type == SOCK_STREAM) { 520 /* 521 * Make it into a byte-stream transport. 522 * SOCK_SEQPACKET sockets are unchanged. 523 */ 524 so->so_tsdu_size = 0; 525 } 526 } else if (so->so_addr_size == -1) { 527 /* 528 * Logic extracted from sockmod - have to pick some max address 529 * length in order to preallocate the addresses. 530 */ 531 so->so_addr_size = SOA_DEFSIZE; 532 } 533 if (so->so_tsdu_size == 0) 534 so->so_mode |= SM_BYTESTREAM; 535 } 536 537 static int 538 check_tinfo(struct sonode *so) 539 { 540 /* Consistency checks */ 541 if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) { 542 eprintso(so, ("service type and socket type mismatch\n")); 543 eprintsoline(so, EPROTO); 544 return (EPROTO); 545 } 546 if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) { 547 eprintso(so, ("service type and socket type mismatch\n")); 548 eprintsoline(so, EPROTO); 549 return (EPROTO); 550 } 551 if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) { 552 eprintso(so, ("service type and socket type mismatch\n")); 553 eprintsoline(so, EPROTO); 554 return (EPROTO); 555 } 556 if (so->so_family == AF_INET && 557 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) { 558 eprintso(so, 559 ("AF_INET must have sockaddr_in address length. Got %d\n", 560 so->so_addr_size)); 561 eprintsoline(so, EMSGSIZE); 562 return (EMSGSIZE); 563 } 564 if (so->so_family == AF_INET6 && 565 so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) { 566 eprintso(so, 567 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n", 568 so->so_addr_size)); 569 eprintsoline(so, EMSGSIZE); 570 return (EMSGSIZE); 571 } 572 573 dprintso(so, 1, ( 574 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n", 575 so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size, 576 so->so_addr_size, so->so_opt_size, 577 so->so_tidu_size)); 578 dprintso(so, 1, ("tinfo: so_state %s\n", 579 pr_state(so->so_state, so->so_mode))); 580 return (0); 581 } 582 583 /* 584 * Send down T_info_req and wait for the ack. 585 * Record interesting T_info_ack values in the sonode. 586 */ 587 static int 588 do_tinfo(struct sonode *so) 589 { 590 struct T_info_req tir; 591 mblk_t *mp; 592 int error; 593 594 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 595 596 if (so_no_tinfo) { 597 so->so_addr_size = 0; 598 return (0); 599 } 600 601 dprintso(so, 1, ("do_tinfo(%p)\n", so)); 602 603 /* Send T_INFO_REQ */ 604 tir.PRIM_type = T_INFO_REQ; 605 mp = soallocproto1(&tir, sizeof (tir), 606 sizeof (struct T_info_req) + sizeof (struct T_info_ack), 607 _ALLOC_INTR); 608 if (mp == NULL) { 609 eprintsoline(so, ENOBUFS); 610 return (ENOBUFS); 611 } 612 /* T_INFO_REQ has to be M_PCPROTO */ 613 DB_TYPE(mp) = M_PCPROTO; 614 615 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 616 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 617 if (error) { 618 eprintsoline(so, error); 619 return (error); 620 } 621 mutex_enter(&so->so_lock); 622 /* Wait for T_INFO_ACK */ 623 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK, 624 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) { 625 mutex_exit(&so->so_lock); 626 eprintsoline(so, error); 627 return (error); 628 } 629 630 ASSERT(mp); 631 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr); 632 mutex_exit(&so->so_lock); 633 freemsg(mp); 634 return (check_tinfo(so)); 635 } 636 637 /* 638 * Send down T_capability_req and wait for the ack. 639 * Record interesting T_capability_ack values in the sonode. 640 */ 641 static int 642 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1) 643 { 644 struct T_capability_req tcr; 645 struct T_capability_ack *tca; 646 mblk_t *mp; 647 int error; 648 649 ASSERT(cap_bits1 != 0); 650 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0); 651 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 652 653 if (so->so_provinfo->tpi_capability == PI_NO) 654 return (do_tinfo(so)); 655 656 if (so_no_tinfo) { 657 so->so_addr_size = 0; 658 if ((cap_bits1 &= ~TC1_INFO) == 0) 659 return (0); 660 } 661 662 dprintso(so, 1, ("do_tcapability(%p)\n", so)); 663 664 /* Send T_CAPABILITY_REQ */ 665 tcr.PRIM_type = T_CAPABILITY_REQ; 666 tcr.CAP_bits1 = cap_bits1; 667 mp = soallocproto1(&tcr, sizeof (tcr), 668 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack), 669 _ALLOC_INTR); 670 if (mp == NULL) { 671 eprintsoline(so, ENOBUFS); 672 return (ENOBUFS); 673 } 674 /* T_CAPABILITY_REQ should be M_PCPROTO here */ 675 DB_TYPE(mp) = M_PCPROTO; 676 677 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 678 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 679 if (error) { 680 eprintsoline(so, error); 681 return (error); 682 } 683 mutex_enter(&so->so_lock); 684 /* Wait for T_CAPABILITY_ACK */ 685 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK, 686 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) { 687 mutex_exit(&so->so_lock); 688 PI_PROVLOCK(so->so_provinfo); 689 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) 690 so->so_provinfo->tpi_capability = PI_NO; 691 PI_PROVUNLOCK(so->so_provinfo); 692 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0); 693 if (cap_bits1 & TC1_INFO) { 694 /* 695 * If the T_CAPABILITY_REQ timed out and then a 696 * T_INFO_REQ gets a protocol error, most likely 697 * the capability was slow (vs. unsupported). Return 698 * ENOSR for this case as a best guess. 699 */ 700 if (error == ETIME) { 701 return ((error = do_tinfo(so)) == EPROTO ? 702 ENOSR : error); 703 } 704 return (do_tinfo(so)); 705 } 706 return (0); 707 } 708 709 if (so->so_provinfo->tpi_capability == PI_DONTKNOW) { 710 PI_PROVLOCK(so->so_provinfo); 711 so->so_provinfo->tpi_capability = PI_YES; 712 PI_PROVUNLOCK(so->so_provinfo); 713 } 714 715 ASSERT(mp); 716 tca = (struct T_capability_ack *)mp->b_rptr; 717 718 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO)); 719 720 cap_bits1 = tca->CAP_bits1; 721 722 if (cap_bits1 & TC1_ACCEPTOR_ID) { 723 so->so_acceptor_id = tca->ACCEPTOR_id; 724 so->so_mode |= SM_ACCEPTOR_ID; 725 } 726 727 if (cap_bits1 & TC1_INFO) 728 copy_tinfo(so, &tca->INFO_ack); 729 730 mutex_exit(&so->so_lock); 731 freemsg(mp); 732 733 if (cap_bits1 & TC1_INFO) 734 return (check_tinfo(so)); 735 736 return (0); 737 } 738 739 /* 740 * Retrieve and clear the socket error. 741 */ 742 int 743 sogeterr(struct sonode *so) 744 { 745 int error; 746 747 ASSERT(MUTEX_HELD(&so->so_lock)); 748 749 error = so->so_error; 750 so->so_error = 0; 751 752 return (error); 753 } 754 755 /* 756 * This routine is registered with the stream head to retrieve read 757 * side errors. 758 * It does not clear the socket error for a peeking read side operation. 759 * It the error is to be cleared it sets *clearerr. 760 */ 761 int 762 sogetrderr(vnode_t *vp, int ispeek, int *clearerr) 763 { 764 struct sonode *so = VTOSO(vp); 765 int error; 766 767 mutex_enter(&so->so_lock); 768 if (ispeek) { 769 error = so->so_error; 770 *clearerr = 0; 771 } else { 772 error = so->so_error; 773 so->so_error = 0; 774 *clearerr = 1; 775 } 776 mutex_exit(&so->so_lock); 777 return (error); 778 } 779 780 /* 781 * This routine is registered with the stream head to retrieve write 782 * side errors. 783 * It does not clear the socket error for a peeking read side operation. 784 * It the error is to be cleared it sets *clearerr. 785 */ 786 int 787 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr) 788 { 789 struct sonode *so = VTOSO(vp); 790 int error; 791 792 mutex_enter(&so->so_lock); 793 if (so->so_state & SS_CANTSENDMORE) { 794 error = EPIPE; 795 *clearerr = 0; 796 } else { 797 error = so->so_error; 798 if (ispeek) { 799 *clearerr = 0; 800 } else { 801 so->so_error = 0; 802 *clearerr = 1; 803 } 804 } 805 mutex_exit(&so->so_lock); 806 return (error); 807 } 808 809 /* 810 * Set a nonpersistent read and write error on the socket. 811 * Used when there is a T_uderror_ind for a connected socket. 812 * The caller also needs to call strsetrerror and strsetwerror 813 * after dropping the lock. 814 */ 815 void 816 soseterror(struct sonode *so, int error) 817 { 818 ASSERT(error != 0); 819 820 ASSERT(MUTEX_HELD(&so->so_lock)); 821 so->so_error = (ushort_t)error; 822 } 823 824 void 825 soisconnecting(struct sonode *so) 826 { 827 ASSERT(MUTEX_HELD(&so->so_lock)); 828 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 829 so->so_state |= SS_ISCONNECTING; 830 cv_broadcast(&so->so_state_cv); 831 } 832 833 void 834 soisconnected(struct sonode *so) 835 { 836 ASSERT(MUTEX_HELD(&so->so_lock)); 837 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 838 so->so_state |= SS_ISCONNECTED; 839 cv_broadcast(&so->so_state_cv); 840 } 841 842 /* 843 * The caller also needs to call strsetrerror, strsetwerror and strseteof. 844 */ 845 void 846 soisdisconnected(struct sonode *so, int error) 847 { 848 ASSERT(MUTEX_HELD(&so->so_lock)); 849 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING| 850 SS_LADDR_VALID|SS_FADDR_VALID); 851 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 852 so->so_error = (ushort_t)error; 853 if (so->so_peercred != NULL) { 854 crfree(so->so_peercred); 855 so->so_peercred = NULL; 856 } 857 cv_broadcast(&so->so_state_cv); 858 } 859 860 /* 861 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes. 862 * Does not affect write side. 863 * The caller also has to call strsetrerror. 864 */ 865 static void 866 sobreakconn(struct sonode *so, int error) 867 { 868 ASSERT(MUTEX_HELD(&so->so_lock)); 869 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 870 so->so_error = (ushort_t)error; 871 cv_broadcast(&so->so_state_cv); 872 } 873 874 /* 875 * Can no longer send. 876 * Caller must also call strsetwerror. 877 * 878 * We mark the peer address as no longer valid for getpeername, but 879 * leave it around for so_unix_close to notify the peer (that 880 * transport has no addressing held at that layer). 881 */ 882 void 883 socantsendmore(struct sonode *so) 884 { 885 ASSERT(MUTEX_HELD(&so->so_lock)); 886 so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE; 887 cv_broadcast(&so->so_state_cv); 888 } 889 890 /* 891 * The caller must call strseteof(,1) as well as this routine 892 * to change the socket state. 893 */ 894 void 895 socantrcvmore(struct sonode *so) 896 { 897 ASSERT(MUTEX_HELD(&so->so_lock)); 898 so->so_state |= SS_CANTRCVMORE; 899 cv_broadcast(&so->so_state_cv); 900 } 901 902 /* 903 * The caller has sent down a "request_prim" primitive and wants to wait for 904 * an ack ("ack_prim") or an T_ERROR_ACK for it. 905 * The specified "ack_prim" can be a T_OK_ACK. 906 * 907 * Assumes that all the TPI acks are M_PCPROTO messages. 908 * 909 * Note that the socket is single-threaded (using so_lock_single) 910 * for all operations that generate TPI ack messages. Since 911 * only TPI ack messages are M_PCPROTO we should never receive 912 * anything except either the ack we are expecting or a T_ERROR_ACK 913 * for the same primitive. 914 */ 915 int 916 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim, 917 t_uscalar_t min_size, mblk_t **mpp, clock_t wait) 918 { 919 mblk_t *mp; 920 union T_primitives *tpr; 921 int error; 922 923 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n", 924 so, request_prim, ack_prim, min_size, mpp, wait)); 925 926 ASSERT(MUTEX_HELD(&so->so_lock)); 927 928 error = sowaitack(so, &mp, wait); 929 if (error) 930 return (error); 931 932 dprintso(so, 1, ("got msg %p\n", mp)); 933 if (DB_TYPE(mp) != M_PCPROTO || 934 MBLKL(mp) < sizeof (tpr->type)) { 935 freemsg(mp); 936 eprintsoline(so, EPROTO); 937 return (EPROTO); 938 } 939 tpr = (union T_primitives *)mp->b_rptr; 940 /* 941 * Did we get the primitive that we were asking for? 942 * For T_OK_ACK we also check that it matches the request primitive. 943 */ 944 if (tpr->type == ack_prim && 945 (ack_prim != T_OK_ACK || 946 tpr->ok_ack.CORRECT_prim == request_prim)) { 947 if (MBLKL(mp) >= (ssize_t)min_size) { 948 /* Found what we are looking for */ 949 *mpp = mp; 950 return (0); 951 } 952 /* Too short */ 953 freemsg(mp); 954 eprintsoline(so, EPROTO); 955 return (EPROTO); 956 } 957 958 if (tpr->type == T_ERROR_ACK && 959 tpr->error_ack.ERROR_prim == request_prim) { 960 /* Error to the primitive we were looking for */ 961 if (tpr->error_ack.TLI_error == TSYSERR) { 962 error = tpr->error_ack.UNIX_error; 963 } else { 964 error = tlitosyserr(tpr->error_ack.TLI_error); 965 } 966 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n", 967 tpr->error_ack.ERROR_prim, 968 tpr->error_ack.TLI_error, 969 tpr->error_ack.UNIX_error, 970 error)); 971 freemsg(mp); 972 return (error); 973 } 974 /* 975 * Wrong primitive or T_ERROR_ACK for the wrong primitive 976 */ 977 #ifdef DEBUG 978 if (tpr->type == T_ERROR_ACK) { 979 dprintso(so, 0, ("error_ack for %d: %d/%d\n", 980 tpr->error_ack.ERROR_prim, 981 tpr->error_ack.TLI_error, 982 tpr->error_ack.UNIX_error)); 983 } else if (tpr->type == T_OK_ACK) { 984 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n", 985 tpr->ok_ack.CORRECT_prim, 986 ack_prim, request_prim)); 987 } else { 988 dprintso(so, 0, 989 ("unexpected primitive %d, expected %d for %d\n", 990 tpr->type, ack_prim, request_prim)); 991 } 992 #endif /* DEBUG */ 993 994 freemsg(mp); 995 eprintsoline(so, EPROTO); 996 return (EPROTO); 997 } 998 999 /* 1000 * Wait for a T_OK_ACK for the specified primitive. 1001 */ 1002 int 1003 sowaitokack(struct sonode *so, t_scalar_t request_prim) 1004 { 1005 mblk_t *mp; 1006 int error; 1007 1008 error = sowaitprim(so, request_prim, T_OK_ACK, 1009 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0); 1010 if (error) 1011 return (error); 1012 freemsg(mp); 1013 return (0); 1014 } 1015 1016 /* 1017 * Queue a received TPI ack message on so_ack_mp. 1018 */ 1019 void 1020 soqueueack(struct sonode *so, mblk_t *mp) 1021 { 1022 if (DB_TYPE(mp) != M_PCPROTO) { 1023 cmn_err(CE_WARN, 1024 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n", 1025 *(t_scalar_t *)mp->b_rptr); 1026 freemsg(mp); 1027 return; 1028 } 1029 1030 mutex_enter(&so->so_lock); 1031 if (so->so_ack_mp != NULL) { 1032 dprintso(so, 1, ("so_ack_mp already set\n")); 1033 freemsg(so->so_ack_mp); 1034 so->so_ack_mp = NULL; 1035 } 1036 so->so_ack_mp = mp; 1037 cv_broadcast(&so->so_ack_cv); 1038 mutex_exit(&so->so_lock); 1039 } 1040 1041 /* 1042 * Wait for a TPI ack ignoring signals and errors. 1043 */ 1044 int 1045 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait) 1046 { 1047 ASSERT(MUTEX_HELD(&so->so_lock)); 1048 1049 while (so->so_ack_mp == NULL) { 1050 #ifdef SOCK_TEST 1051 if (wait == 0 && sock_test_timelimit != 0) 1052 wait = sock_test_timelimit; 1053 #endif 1054 if (wait != 0) { 1055 /* 1056 * Only wait for the time limit. 1057 */ 1058 clock_t now; 1059 1060 time_to_wait(&now, wait); 1061 if (cv_timedwait(&so->so_ack_cv, &so->so_lock, 1062 now) == -1) { 1063 eprintsoline(so, ETIME); 1064 return (ETIME); 1065 } 1066 } 1067 else 1068 cv_wait(&so->so_ack_cv, &so->so_lock); 1069 } 1070 *mpp = so->so_ack_mp; 1071 #ifdef DEBUG 1072 { 1073 union T_primitives *tpr; 1074 mblk_t *mp = *mpp; 1075 1076 tpr = (union T_primitives *)mp->b_rptr; 1077 ASSERT(DB_TYPE(mp) == M_PCPROTO); 1078 ASSERT(tpr->type == T_OK_ACK || 1079 tpr->type == T_ERROR_ACK || 1080 tpr->type == T_BIND_ACK || 1081 tpr->type == T_CAPABILITY_ACK || 1082 tpr->type == T_INFO_ACK || 1083 tpr->type == T_OPTMGMT_ACK); 1084 } 1085 #endif /* DEBUG */ 1086 so->so_ack_mp = NULL; 1087 return (0); 1088 } 1089 1090 /* 1091 * Queue a received T_CONN_IND message on so_conn_ind_head/tail. 1092 */ 1093 void 1094 soqueueconnind(struct sonode *so, mblk_t *mp) 1095 { 1096 if (DB_TYPE(mp) != M_PROTO) { 1097 cmn_err(CE_WARN, 1098 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n"); 1099 freemsg(mp); 1100 return; 1101 } 1102 1103 mutex_enter(&so->so_lock); 1104 ASSERT(mp->b_next == NULL); 1105 if (so->so_conn_ind_head == NULL) { 1106 so->so_conn_ind_head = mp; 1107 so->so_state |= SS_HASCONNIND; 1108 } else { 1109 ASSERT(so->so_state & SS_HASCONNIND); 1110 ASSERT(so->so_conn_ind_tail->b_next == NULL); 1111 so->so_conn_ind_tail->b_next = mp; 1112 } 1113 so->so_conn_ind_tail = mp; 1114 /* Wakeup a single consumer of the T_CONN_IND */ 1115 cv_signal(&so->so_connind_cv); 1116 mutex_exit(&so->so_lock); 1117 } 1118 1119 /* 1120 * Wait for a T_CONN_IND. 1121 * Don't wait if nonblocking. 1122 * Accept signals and socket errors. 1123 */ 1124 int 1125 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp) 1126 { 1127 mblk_t *mp; 1128 int error = 0; 1129 1130 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1131 mutex_enter(&so->so_lock); 1132 check_error: 1133 if (so->so_error) { 1134 error = sogeterr(so); 1135 if (error) { 1136 mutex_exit(&so->so_lock); 1137 return (error); 1138 } 1139 } 1140 1141 if (so->so_conn_ind_head == NULL) { 1142 if (fmode & (FNDELAY|FNONBLOCK)) { 1143 error = EWOULDBLOCK; 1144 goto done; 1145 } 1146 if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) { 1147 error = EINTR; 1148 goto done; 1149 } 1150 goto check_error; 1151 } 1152 mp = so->so_conn_ind_head; 1153 so->so_conn_ind_head = mp->b_next; 1154 mp->b_next = NULL; 1155 if (so->so_conn_ind_head == NULL) { 1156 ASSERT(so->so_conn_ind_tail == mp); 1157 so->so_conn_ind_tail = NULL; 1158 so->so_state &= ~SS_HASCONNIND; 1159 } 1160 *mpp = mp; 1161 done: 1162 mutex_exit(&so->so_lock); 1163 return (error); 1164 } 1165 1166 /* 1167 * Flush a T_CONN_IND matching the sequence number from the list. 1168 * Return zero if found; non-zero otherwise. 1169 * This is called very infrequently thus it is ok to do a linear search. 1170 */ 1171 int 1172 soflushconnind(struct sonode *so, t_scalar_t seqno) 1173 { 1174 mblk_t *prevmp, *mp; 1175 struct T_conn_ind *tci; 1176 1177 mutex_enter(&so->so_lock); 1178 for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL; 1179 prevmp = mp, mp = mp->b_next) { 1180 tci = (struct T_conn_ind *)mp->b_rptr; 1181 if (tci->SEQ_number == seqno) { 1182 dprintso(so, 1, 1183 ("t_discon_ind: found T_CONN_IND %d\n", seqno)); 1184 /* Deleting last? */ 1185 if (so->so_conn_ind_tail == mp) { 1186 so->so_conn_ind_tail = prevmp; 1187 } 1188 if (prevmp == NULL) { 1189 /* Deleting first */ 1190 so->so_conn_ind_head = mp->b_next; 1191 } else { 1192 prevmp->b_next = mp->b_next; 1193 } 1194 mp->b_next = NULL; 1195 if (so->so_conn_ind_head == NULL) { 1196 ASSERT(so->so_conn_ind_tail == NULL); 1197 so->so_state &= ~SS_HASCONNIND; 1198 } else { 1199 ASSERT(so->so_conn_ind_tail != NULL); 1200 } 1201 so->so_error = ECONNABORTED; 1202 mutex_exit(&so->so_lock); 1203 freemsg(mp); 1204 return (0); 1205 } 1206 } 1207 mutex_exit(&so->so_lock); 1208 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno)); 1209 return (-1); 1210 } 1211 1212 /* 1213 * Wait until the socket is connected or there is an error. 1214 * fmode should contain any nonblocking flags. nosig should be 1215 * set if the caller does not want the wait to be interrupted by a signal. 1216 */ 1217 int 1218 sowaitconnected(struct sonode *so, int fmode, int nosig) 1219 { 1220 int error; 1221 1222 ASSERT(MUTEX_HELD(&so->so_lock)); 1223 1224 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 1225 SS_ISCONNECTING && so->so_error == 0) { 1226 1227 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", so)); 1228 if (fmode & (FNDELAY|FNONBLOCK)) 1229 return (EINPROGRESS); 1230 1231 if (nosig) 1232 cv_wait(&so->so_state_cv, &so->so_lock); 1233 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 1234 /* 1235 * Return EINTR and let the application use 1236 * nonblocking techniques for detecting when 1237 * the connection has been established. 1238 */ 1239 return (EINTR); 1240 } 1241 dprintso(so, 1, ("awoken on %p\n", so)); 1242 } 1243 1244 if (so->so_error != 0) { 1245 error = sogeterr(so); 1246 ASSERT(error != 0); 1247 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1248 return (error); 1249 } 1250 if (!(so->so_state & SS_ISCONNECTED)) { 1251 /* 1252 * Could have received a T_ORDREL_IND or a T_DISCON_IND with 1253 * zero errno. Or another thread could have consumed so_error 1254 * e.g. by calling read. 1255 */ 1256 error = ECONNREFUSED; 1257 dprintso(so, 1, ("sowaitconnected: error %d\n", error)); 1258 return (error); 1259 } 1260 return (0); 1261 } 1262 1263 1264 /* 1265 * Handle the signal generation aspect of urgent data. 1266 */ 1267 static void 1268 so_oob_sig(struct sonode *so, int extrasig, 1269 strsigset_t *signals, strpollset_t *pollwakeups) 1270 { 1271 ASSERT(MUTEX_HELD(&so->so_lock)); 1272 1273 ASSERT(so_verify_oobstate(so)); 1274 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1275 if (so->so_oobsigcnt > so->so_oobcnt) { 1276 /* 1277 * Signal has already been generated once for this 1278 * urgent "event". However, since TCP can receive updated 1279 * urgent pointers we still generate a signal. 1280 */ 1281 ASSERT(so->so_state & SS_OOBPEND); 1282 if (extrasig) { 1283 *signals |= S_RDBAND; 1284 *pollwakeups |= POLLRDBAND; 1285 } 1286 return; 1287 } 1288 1289 so->so_oobsigcnt++; 1290 ASSERT(so->so_oobsigcnt > 0); /* Wraparound */ 1291 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1292 1293 /* 1294 * Record (for select/poll) that urgent data is pending. 1295 */ 1296 so->so_state |= SS_OOBPEND; 1297 /* 1298 * New urgent data on the way so forget about any old 1299 * urgent data. 1300 */ 1301 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1302 if (so->so_oobmsg != NULL) { 1303 dprintso(so, 1, ("sock: discarding old oob\n")); 1304 freemsg(so->so_oobmsg); 1305 so->so_oobmsg = NULL; 1306 } 1307 *signals |= S_RDBAND; 1308 *pollwakeups |= POLLRDBAND; 1309 ASSERT(so_verify_oobstate(so)); 1310 } 1311 1312 /* 1313 * Handle the processing of the T_EXDATA_IND with urgent data. 1314 * Returns the T_EXDATA_IND if it should be queued on the read queue. 1315 */ 1316 /* ARGSUSED2 */ 1317 static mblk_t * 1318 so_oob_exdata(struct sonode *so, mblk_t *mp, 1319 strsigset_t *signals, strpollset_t *pollwakeups) 1320 { 1321 ASSERT(MUTEX_HELD(&so->so_lock)); 1322 1323 ASSERT(so_verify_oobstate(so)); 1324 1325 ASSERT(so->so_oobsigcnt > so->so_oobcnt); 1326 1327 so->so_oobcnt++; 1328 ASSERT(so->so_oobcnt > 0); /* wraparound? */ 1329 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1330 1331 /* 1332 * Set MSGMARK for SIOCATMARK. 1333 */ 1334 mp->b_flag |= MSGMARK; 1335 1336 ASSERT(so_verify_oobstate(so)); 1337 return (mp); 1338 } 1339 1340 /* 1341 * Handle the processing of the actual urgent data. 1342 * Returns the data mblk if it should be queued on the read queue. 1343 */ 1344 static mblk_t * 1345 so_oob_data(struct sonode *so, mblk_t *mp, 1346 strsigset_t *signals, strpollset_t *pollwakeups) 1347 { 1348 ASSERT(MUTEX_HELD(&so->so_lock)); 1349 1350 ASSERT(so_verify_oobstate(so)); 1351 1352 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 1353 ASSERT(mp != NULL); 1354 /* 1355 * For OOBINLINE we keep the data in the T_EXDATA_IND. 1356 * Otherwise we store it in so_oobmsg. 1357 */ 1358 ASSERT(so->so_oobmsg == NULL); 1359 if (so->so_options & SO_OOBINLINE) { 1360 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND; 1361 *signals |= S_INPUT | S_RDNORM; 1362 } else { 1363 *pollwakeups |= POLLRDBAND; 1364 so->so_state |= SS_HAVEOOBDATA; 1365 so->so_oobmsg = mp; 1366 mp = NULL; 1367 } 1368 ASSERT(so_verify_oobstate(so)); 1369 return (mp); 1370 } 1371 1372 /* 1373 * Caller must hold the mutex. 1374 * For delayed processing, save the T_DISCON_IND received 1375 * from below on so_discon_ind_mp. 1376 * When the message is processed the framework will call: 1377 * (*func)(so, mp); 1378 */ 1379 static void 1380 so_save_discon_ind(struct sonode *so, 1381 mblk_t *mp, 1382 void (*func)(struct sonode *so, mblk_t *)) 1383 { 1384 ASSERT(MUTEX_HELD(&so->so_lock)); 1385 1386 /* 1387 * Discard new T_DISCON_IND if we have already received another. 1388 * Currently the earlier message can either be on so_discon_ind_mp 1389 * or being processed. 1390 */ 1391 if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) { 1392 cmn_err(CE_WARN, 1393 "sockfs: received unexpected additional T_DISCON_IND\n"); 1394 freemsg(mp); 1395 return; 1396 } 1397 mp->b_prev = (mblk_t *)func; 1398 mp->b_next = NULL; 1399 so->so_discon_ind_mp = mp; 1400 } 1401 1402 /* 1403 * Caller must hold the mutex and make sure that either SOLOCKED 1404 * or SOASYNC_UNBIND is set. Called from so_unlock_single(). 1405 * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp. 1406 * Need to ensure that strsock_proto() will not end up sleeping for 1407 * SOASYNC_UNBIND, while executing this function. 1408 */ 1409 void 1410 so_drain_discon_ind(struct sonode *so) 1411 { 1412 mblk_t *bp; 1413 void (*func)(struct sonode *so, mblk_t *); 1414 1415 ASSERT(MUTEX_HELD(&so->so_lock)); 1416 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND)); 1417 1418 /* Process T_DISCON_IND on so_discon_ind_mp */ 1419 if ((bp = so->so_discon_ind_mp) != NULL) { 1420 so->so_discon_ind_mp = NULL; 1421 func = (void (*)())bp->b_prev; 1422 bp->b_prev = NULL; 1423 1424 /* 1425 * This (*func) is supposed to generate a message downstream 1426 * and we need to have a flag set until the corresponding 1427 * upstream message reaches stream head. 1428 * When processing T_DISCON_IND in strsock_discon_ind 1429 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and 1430 * drop the flag after we get the ACK in strsock_proto. 1431 */ 1432 (void) (*func)(so, bp); 1433 } 1434 } 1435 1436 /* 1437 * Caller must hold the mutex. 1438 * Remove the T_DISCON_IND on so_discon_ind_mp. 1439 */ 1440 void 1441 so_flush_discon_ind(struct sonode *so) 1442 { 1443 mblk_t *bp; 1444 1445 ASSERT(MUTEX_HELD(&so->so_lock)); 1446 1447 /* 1448 * Remove T_DISCON_IND mblk at so_discon_ind_mp. 1449 */ 1450 if ((bp = so->so_discon_ind_mp) != NULL) { 1451 so->so_discon_ind_mp = NULL; 1452 bp->b_prev = NULL; 1453 freemsg(bp); 1454 } 1455 } 1456 1457 /* 1458 * Caller must hold the mutex. 1459 * 1460 * This function is used to process the T_DISCON_IND message. It does 1461 * immediate processing when called from strsock_proto and delayed 1462 * processing of discon_ind saved on so_discon_ind_mp when called from 1463 * so_drain_discon_ind. When a T_DISCON_IND message is saved in 1464 * so_discon_ind_mp for delayed processing, this function is registered 1465 * as the callback function to process the message. 1466 * 1467 * SOASYNC_UNBIND should be held in this function, during the non-blocking 1468 * unbind operation, and should be released only after we receive the ACK 1469 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set, 1470 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH 1471 * sent from either this function or tcp_unbind(), flushing away any TPI 1472 * message that is being sent down and stays in a lower module's queue. 1473 * 1474 * This function drops so_lock and grabs it again. 1475 */ 1476 static void 1477 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp) 1478 { 1479 struct vnode *vp; 1480 struct stdata *stp; 1481 union T_primitives *tpr; 1482 struct T_unbind_req *ubr; 1483 mblk_t *mp; 1484 int error; 1485 1486 ASSERT(MUTEX_HELD(&so->so_lock)); 1487 ASSERT(discon_mp); 1488 ASSERT(discon_mp->b_rptr); 1489 1490 tpr = (union T_primitives *)discon_mp->b_rptr; 1491 ASSERT(tpr->type == T_DISCON_IND); 1492 1493 vp = SOTOV(so); 1494 stp = vp->v_stream; 1495 ASSERT(stp); 1496 1497 /* 1498 * Not a listener 1499 */ 1500 ASSERT((so->so_state & SS_ACCEPTCONN) == 0); 1501 1502 /* 1503 * This assumes that the name space for DISCON_reason 1504 * is the errno name space. 1505 */ 1506 soisdisconnected(so, tpr->discon_ind.DISCON_reason); 1507 1508 /* 1509 * Unbind with the transport without blocking. 1510 * If we've already received a T_DISCON_IND do not unbind. 1511 * 1512 * If there is no preallocated unbind message, we have already 1513 * unbound with the transport 1514 * 1515 * If the socket is not bound, no need to unbind. 1516 */ 1517 mp = so->so_unbind_mp; 1518 if (mp == NULL) { 1519 ASSERT(!(so->so_state & SS_ISBOUND)); 1520 mutex_exit(&so->so_lock); 1521 } else if (!(so->so_state & SS_ISBOUND)) { 1522 mutex_exit(&so->so_lock); 1523 } else { 1524 so->so_unbind_mp = NULL; 1525 1526 /* 1527 * Is another T_DISCON_IND being processed. 1528 */ 1529 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0); 1530 1531 /* 1532 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for 1533 * this unbind. Set SOASYNC_UNBIND. This should be cleared 1534 * only after we receive the ACK in strsock_proto. 1535 */ 1536 so->so_flag |= SOASYNC_UNBIND; 1537 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))); 1538 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1539 mutex_exit(&so->so_lock); 1540 1541 /* 1542 * Send down T_UNBIND_REQ ignoring flow control. 1543 * XXX Assumes that MSG_IGNFLOW implies that this thread 1544 * does not run service procedures. 1545 */ 1546 ASSERT(DB_TYPE(mp) == M_PROTO); 1547 ubr = (struct T_unbind_req *)mp->b_rptr; 1548 mp->b_wptr += sizeof (*ubr); 1549 ubr->PRIM_type = T_UNBIND_REQ; 1550 1551 /* 1552 * Flush the read and write side (except stream head read queue) 1553 * and send down T_UNBIND_REQ. 1554 */ 1555 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1556 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1557 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 1558 /* LINTED - warning: statement has no consequent: if */ 1559 if (error) { 1560 eprintsoline(so, error); 1561 } 1562 } 1563 1564 if (tpr->discon_ind.DISCON_reason != 0) 1565 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1566 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1567 strseteof(SOTOV(so), 1); 1568 /* 1569 * strseteof takes care of read side wakeups, 1570 * pollwakeups, and signals. 1571 */ 1572 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error)); 1573 freemsg(discon_mp); 1574 1575 1576 pollwakeup(&stp->sd_pollist, POLLOUT); 1577 mutex_enter(&stp->sd_lock); 1578 1579 /* 1580 * Wake sleeping write 1581 */ 1582 if (stp->sd_flag & WSLEEP) { 1583 stp->sd_flag &= ~WSLEEP; 1584 cv_broadcast(&stp->sd_wrq->q_wait); 1585 } 1586 1587 /* 1588 * strsendsig can handle multiple signals with a 1589 * single call. Send SIGPOLL for S_OUTPUT event. 1590 */ 1591 if (stp->sd_sigflags & S_OUTPUT) 1592 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0); 1593 1594 mutex_exit(&stp->sd_lock); 1595 mutex_enter(&so->so_lock); 1596 } 1597 1598 /* 1599 * This routine is registered with the stream head to receive M_PROTO 1600 * and M_PCPROTO messages. 1601 * 1602 * Returns NULL if the message was consumed. 1603 * Returns an mblk to make that mblk be processed (and queued) by the stream 1604 * head. 1605 * 1606 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 1607 * *pollwakeups) for the stream head to take action on. Note that since 1608 * sockets always deliver SIGIO for every new piece of data this routine 1609 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs. 1610 * 1611 * This routine handles all data related TPI messages independent of 1612 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message 1613 * arrive on a SOCK_STREAM. 1614 */ 1615 static mblk_t * 1616 strsock_proto(vnode_t *vp, mblk_t *mp, 1617 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1618 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1619 { 1620 union T_primitives *tpr; 1621 struct sonode *so; 1622 1623 so = VTOSO(vp); 1624 1625 dprintso(so, 1, ("strsock_proto(%p, %p)\n", vp, mp)); 1626 1627 /* Set default return values */ 1628 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0; 1629 1630 ASSERT(DB_TYPE(mp) == M_PROTO || 1631 DB_TYPE(mp) == M_PCPROTO); 1632 1633 if (MBLKL(mp) < sizeof (tpr->type)) { 1634 /* The message is too short to even contain the primitive */ 1635 cmn_err(CE_WARN, 1636 "sockfs: Too short TPI message received. Len = %ld\n", 1637 (ptrdiff_t)(MBLKL(mp))); 1638 freemsg(mp); 1639 return (NULL); 1640 } 1641 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1642 /* The read pointer is not aligned correctly for TPI */ 1643 cmn_err(CE_WARN, 1644 "sockfs: Unaligned TPI message received. rptr = %p\n", 1645 (void *)mp->b_rptr); 1646 freemsg(mp); 1647 return (NULL); 1648 } 1649 tpr = (union T_primitives *)mp->b_rptr; 1650 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type)); 1651 1652 switch (tpr->type) { 1653 1654 case T_DATA_IND: 1655 if (MBLKL(mp) < sizeof (struct T_data_ind)) { 1656 cmn_err(CE_WARN, 1657 "sockfs: Too short T_DATA_IND. Len = %ld\n", 1658 (ptrdiff_t)(MBLKL(mp))); 1659 freemsg(mp); 1660 return (NULL); 1661 } 1662 /* 1663 * Ignore zero-length T_DATA_IND messages. These might be 1664 * generated by some transports. 1665 * This is needed to prevent read (which skips the M_PROTO 1666 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1667 * on a non-blocking socket after select/poll has indicated 1668 * that data is available). 1669 */ 1670 if (msgdsize(mp->b_cont) == 0) { 1671 dprintso(so, 0, 1672 ("strsock_proto: zero length T_DATA_IND\n")); 1673 freemsg(mp); 1674 return (NULL); 1675 } 1676 *allmsgsigs = S_INPUT | S_RDNORM; 1677 *pollwakeups = POLLIN | POLLRDNORM; 1678 *wakeups = RSLEEP; 1679 return (mp); 1680 1681 case T_UNITDATA_IND: { 1682 struct T_unitdata_ind *tudi = &tpr->unitdata_ind; 1683 void *addr; 1684 t_uscalar_t addrlen; 1685 1686 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) { 1687 cmn_err(CE_WARN, 1688 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n", 1689 (ptrdiff_t)(MBLKL(mp))); 1690 freemsg(mp); 1691 return (NULL); 1692 } 1693 1694 /* Is this is not a connected datagram socket? */ 1695 if ((so->so_mode & SM_CONNREQUIRED) || 1696 !(so->so_state & SS_ISCONNECTED)) { 1697 /* 1698 * Not a connected datagram socket. Look for 1699 * the SO_UNIX_CLOSE option. If such an option is found 1700 * discard the message (since it has no meaning 1701 * unless connected). 1702 */ 1703 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 && 1704 tudi->OPT_length != 0) { 1705 void *opt; 1706 t_uscalar_t optlen = tudi->OPT_length; 1707 1708 opt = sogetoff(mp, tudi->OPT_offset, 1709 optlen, __TPI_ALIGN_SIZE); 1710 if (opt == NULL) { 1711 /* The len/off falls outside mp */ 1712 freemsg(mp); 1713 mutex_enter(&so->so_lock); 1714 soseterror(so, EPROTO); 1715 mutex_exit(&so->so_lock); 1716 cmn_err(CE_WARN, 1717 "sockfs: T_unidata_ind with " 1718 "invalid optlen/offset %u/%d\n", 1719 optlen, tudi->OPT_offset); 1720 return (NULL); 1721 } 1722 if (so_getopt_unix_close(opt, optlen)) { 1723 freemsg(mp); 1724 return (NULL); 1725 } 1726 } 1727 *allmsgsigs = S_INPUT | S_RDNORM; 1728 *pollwakeups = POLLIN | POLLRDNORM; 1729 *wakeups = RSLEEP; 1730 #ifdef C2_AUDIT 1731 if (audit_active) 1732 audit_sock(T_UNITDATA_IND, strvp2wq(vp), 1733 mp, 0); 1734 #endif /* C2_AUDIT */ 1735 return (mp); 1736 } 1737 1738 /* 1739 * A connect datagram socket. For AF_INET{,6} we verify that 1740 * the source address matches the "connected to" address. 1741 * The semantics of AF_UNIX sockets is to not verify 1742 * the source address. 1743 * Note that this source address verification is transport 1744 * specific. Thus the real fix would be to extent TPI 1745 * to allow T_CONN_REQ messages to be send to connectionless 1746 * transport providers and always let the transport provider 1747 * do whatever filtering is needed. 1748 * 1749 * The verification/filtering semantics for transports 1750 * other than AF_INET and AF_UNIX are unknown. The choice 1751 * would be to either filter using bcmp or let all messages 1752 * get through. This code does not filter other address 1753 * families since this at least allows the application to 1754 * work around any missing filtering. 1755 * 1756 * XXX Should we move filtering to UDP/ICMP??? 1757 * That would require passing e.g. a T_DISCON_REQ to UDP 1758 * when the socket becomes unconnected. 1759 */ 1760 addrlen = tudi->SRC_length; 1761 /* 1762 * The alignment restriction is really to strict but 1763 * we want enough alignment to inspect the fields of 1764 * a sockaddr_in. 1765 */ 1766 addr = sogetoff(mp, tudi->SRC_offset, addrlen, 1767 __TPI_ALIGN_SIZE); 1768 if (addr == NULL) { 1769 freemsg(mp); 1770 mutex_enter(&so->so_lock); 1771 soseterror(so, EPROTO); 1772 mutex_exit(&so->so_lock); 1773 cmn_err(CE_WARN, 1774 "sockfs: T_unidata_ind with invalid " 1775 "addrlen/offset %u/%d\n", 1776 addrlen, tudi->SRC_offset); 1777 return (NULL); 1778 } 1779 1780 if (so->so_family == AF_INET) { 1781 /* 1782 * For AF_INET we allow wildcarding both sin_addr 1783 * and sin_port. 1784 */ 1785 struct sockaddr_in *faddr, *sin; 1786 1787 /* Prevent so_faddr_sa from changing while accessed */ 1788 mutex_enter(&so->so_lock); 1789 ASSERT(so->so_faddr_len == 1790 (socklen_t)sizeof (struct sockaddr_in)); 1791 faddr = (struct sockaddr_in *)so->so_faddr_sa; 1792 sin = (struct sockaddr_in *)addr; 1793 if (addrlen != 1794 (t_uscalar_t)sizeof (struct sockaddr_in) || 1795 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr && 1796 faddr->sin_addr.s_addr != INADDR_ANY) || 1797 (so->so_type != SOCK_RAW && 1798 sin->sin_port != faddr->sin_port && 1799 faddr->sin_port != 0)) { 1800 #ifdef DEBUG 1801 dprintso(so, 0, 1802 ("sockfs: T_UNITDATA_IND mismatch: %s", 1803 pr_addr(so->so_family, 1804 (struct sockaddr *)addr, 1805 addrlen))); 1806 dprintso(so, 0, (" - %s\n", 1807 pr_addr(so->so_family, so->so_faddr_sa, 1808 (t_uscalar_t)so->so_faddr_len))); 1809 #endif /* DEBUG */ 1810 mutex_exit(&so->so_lock); 1811 freemsg(mp); 1812 return (NULL); 1813 } 1814 mutex_exit(&so->so_lock); 1815 } else if (so->so_family == AF_INET6) { 1816 /* 1817 * For AF_INET6 we allow wildcarding both sin6_addr 1818 * and sin6_port. 1819 */ 1820 struct sockaddr_in6 *faddr6, *sin6; 1821 static struct in6_addr zeroes; /* inits to all zeros */ 1822 1823 /* Prevent so_faddr_sa from changing while accessed */ 1824 mutex_enter(&so->so_lock); 1825 ASSERT(so->so_faddr_len == 1826 (socklen_t)sizeof (struct sockaddr_in6)); 1827 faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa; 1828 sin6 = (struct sockaddr_in6 *)addr; 1829 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */ 1830 if (addrlen != 1831 (t_uscalar_t)sizeof (struct sockaddr_in6) || 1832 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1833 &faddr6->sin6_addr) && 1834 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) || 1835 (so->so_type != SOCK_RAW && 1836 sin6->sin6_port != faddr6->sin6_port && 1837 faddr6->sin6_port != 0)) { 1838 #ifdef DEBUG 1839 dprintso(so, 0, 1840 ("sockfs: T_UNITDATA_IND mismatch: %s", 1841 pr_addr(so->so_family, 1842 (struct sockaddr *)addr, 1843 addrlen))); 1844 dprintso(so, 0, (" - %s\n", 1845 pr_addr(so->so_family, so->so_faddr_sa, 1846 (t_uscalar_t)so->so_faddr_len))); 1847 #endif /* DEBUG */ 1848 mutex_exit(&so->so_lock); 1849 freemsg(mp); 1850 return (NULL); 1851 } 1852 mutex_exit(&so->so_lock); 1853 } else if (so->so_family == AF_UNIX && 1854 msgdsize(mp->b_cont) == 0 && 1855 tudi->OPT_length != 0) { 1856 /* 1857 * Attempt to extract AF_UNIX 1858 * SO_UNIX_CLOSE indication from options. 1859 */ 1860 void *opt; 1861 t_uscalar_t optlen = tudi->OPT_length; 1862 1863 opt = sogetoff(mp, tudi->OPT_offset, 1864 optlen, __TPI_ALIGN_SIZE); 1865 if (opt == NULL) { 1866 /* The len/off falls outside mp */ 1867 freemsg(mp); 1868 mutex_enter(&so->so_lock); 1869 soseterror(so, EPROTO); 1870 mutex_exit(&so->so_lock); 1871 cmn_err(CE_WARN, 1872 "sockfs: T_unidata_ind with invalid " 1873 "optlen/offset %u/%d\n", 1874 optlen, tudi->OPT_offset); 1875 return (NULL); 1876 } 1877 /* 1878 * If we received a unix close indication mark the 1879 * socket and discard this message. 1880 */ 1881 if (so_getopt_unix_close(opt, optlen)) { 1882 mutex_enter(&so->so_lock); 1883 sobreakconn(so, ECONNRESET); 1884 mutex_exit(&so->so_lock); 1885 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 1886 freemsg(mp); 1887 *pollwakeups = POLLIN | POLLRDNORM; 1888 *allmsgsigs = S_INPUT | S_RDNORM; 1889 *wakeups = RSLEEP; 1890 return (NULL); 1891 } 1892 } 1893 *allmsgsigs = S_INPUT | S_RDNORM; 1894 *pollwakeups = POLLIN | POLLRDNORM; 1895 *wakeups = RSLEEP; 1896 return (mp); 1897 } 1898 1899 case T_OPTDATA_IND: { 1900 struct T_optdata_ind *tdi = &tpr->optdata_ind; 1901 1902 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) { 1903 cmn_err(CE_WARN, 1904 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n", 1905 (ptrdiff_t)(MBLKL(mp))); 1906 freemsg(mp); 1907 return (NULL); 1908 } 1909 /* 1910 * Allow zero-length messages carrying options. 1911 * This is used when carrying the SO_UNIX_CLOSE option. 1912 */ 1913 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 && 1914 tdi->OPT_length != 0) { 1915 /* 1916 * Attempt to extract AF_UNIX close indication 1917 * from the options. Ignore any other options - 1918 * those are handled once the message is removed 1919 * from the queue. 1920 * The close indication message should not carry data. 1921 */ 1922 void *opt; 1923 t_uscalar_t optlen = tdi->OPT_length; 1924 1925 opt = sogetoff(mp, tdi->OPT_offset, 1926 optlen, __TPI_ALIGN_SIZE); 1927 if (opt == NULL) { 1928 /* The len/off falls outside mp */ 1929 freemsg(mp); 1930 mutex_enter(&so->so_lock); 1931 soseterror(so, EPROTO); 1932 mutex_exit(&so->so_lock); 1933 cmn_err(CE_WARN, 1934 "sockfs: T_optdata_ind with invalid " 1935 "optlen/offset %u/%d\n", 1936 optlen, tdi->OPT_offset); 1937 return (NULL); 1938 } 1939 /* 1940 * If we received a close indication mark the 1941 * socket and discard this message. 1942 */ 1943 if (so_getopt_unix_close(opt, optlen)) { 1944 mutex_enter(&so->so_lock); 1945 socantsendmore(so); 1946 mutex_exit(&so->so_lock); 1947 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 1948 freemsg(mp); 1949 return (NULL); 1950 } 1951 } 1952 *allmsgsigs = S_INPUT | S_RDNORM; 1953 *pollwakeups = POLLIN | POLLRDNORM; 1954 *wakeups = RSLEEP; 1955 return (mp); 1956 } 1957 1958 case T_EXDATA_IND: { 1959 mblk_t *mctl, *mdata; 1960 1961 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) { 1962 cmn_err(CE_WARN, 1963 "sockfs: Too short T_EXDATA_IND. Len = %ld\n", 1964 (ptrdiff_t)(MBLKL(mp))); 1965 freemsg(mp); 1966 return (NULL); 1967 } 1968 /* 1969 * Ignore zero-length T_EXDATA_IND messages. These might be 1970 * generated by some transports. 1971 * 1972 * This is needed to prevent read (which skips the M_PROTO 1973 * part) to unexpectedly return 0 (or return EWOULDBLOCK 1974 * on a non-blocking socket after select/poll has indicated 1975 * that data is available). 1976 */ 1977 dprintso(so, 1, 1978 ("T_EXDATA_IND(%p): counts %d/%d state %s\n", 1979 vp, so->so_oobsigcnt, so->so_oobcnt, 1980 pr_state(so->so_state, so->so_mode))); 1981 1982 if (msgdsize(mp->b_cont) == 0) { 1983 dprintso(so, 0, 1984 ("strsock_proto: zero length T_EXDATA_IND\n")); 1985 freemsg(mp); 1986 return (NULL); 1987 } 1988 1989 /* 1990 * Split into the T_EXDATA_IND and the M_DATA part. 1991 * We process these three pieces separately: 1992 * signal generation 1993 * handling T_EXDATA_IND 1994 * handling M_DATA component 1995 */ 1996 mctl = mp; 1997 mdata = mctl->b_cont; 1998 mctl->b_cont = NULL; 1999 mutex_enter(&so->so_lock); 2000 so_oob_sig(so, 0, allmsgsigs, pollwakeups); 2001 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups); 2002 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups); 2003 2004 /* 2005 * Pass the T_EXDATA_IND and the M_DATA back separately 2006 * by using b_next linkage. (The stream head will queue any 2007 * b_next linked messages separately.) This is needed 2008 * since MSGMARK applies to the last by of the message 2009 * hence we can not have any M_DATA component attached 2010 * to the marked T_EXDATA_IND. Note that the stream head 2011 * will not consolidate M_DATA messages onto an MSGMARK'ed 2012 * message in order to preserve the constraint that 2013 * the T_EXDATA_IND always is a separate message. 2014 */ 2015 ASSERT(mctl != NULL); 2016 mctl->b_next = mdata; 2017 mp = mctl; 2018 #ifdef DEBUG 2019 if (mdata == NULL) { 2020 dprintso(so, 1, 2021 ("after outofline T_EXDATA_IND(%p): " 2022 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2023 vp, so->so_oobsigcnt, 2024 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2025 pr_state(so->so_state, so->so_mode))); 2026 } else { 2027 dprintso(so, 1, 2028 ("after inline T_EXDATA_IND(%p): " 2029 "counts %d/%d poll 0x%x sig 0x%x state %s\n", 2030 vp, so->so_oobsigcnt, 2031 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2032 pr_state(so->so_state, so->so_mode))); 2033 } 2034 #endif /* DEBUG */ 2035 mutex_exit(&so->so_lock); 2036 *wakeups = RSLEEP; 2037 return (mp); 2038 } 2039 2040 case T_CONN_CON: { 2041 struct T_conn_con *conn_con; 2042 void *addr; 2043 t_uscalar_t addrlen; 2044 2045 /* 2046 * Verify the state, update the state to ISCONNECTED, 2047 * record the potentially new address in the message, 2048 * and drop the message. 2049 */ 2050 if (MBLKL(mp) < sizeof (struct T_conn_con)) { 2051 cmn_err(CE_WARN, 2052 "sockfs: Too short T_CONN_CON. Len = %ld\n", 2053 (ptrdiff_t)(MBLKL(mp))); 2054 freemsg(mp); 2055 return (NULL); 2056 } 2057 2058 mutex_enter(&so->so_lock); 2059 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 2060 SS_ISCONNECTING) { 2061 mutex_exit(&so->so_lock); 2062 dprintso(so, 1, 2063 ("T_CONN_CON: state %x\n", so->so_state)); 2064 freemsg(mp); 2065 return (NULL); 2066 } 2067 2068 conn_con = &tpr->conn_con; 2069 addrlen = conn_con->RES_length; 2070 /* 2071 * Allow the address to be of different size than sent down 2072 * in the T_CONN_REQ as long as it doesn't exceed the maxlen. 2073 * For AF_UNIX require the identical length. 2074 */ 2075 if (so->so_family == AF_UNIX ? 2076 addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) : 2077 addrlen > (t_uscalar_t)so->so_faddr_maxlen) { 2078 cmn_err(CE_WARN, 2079 "sockfs: T_conn_con with different " 2080 "length %u/%d\n", 2081 addrlen, conn_con->RES_length); 2082 soisdisconnected(so, EPROTO); 2083 mutex_exit(&so->so_lock); 2084 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2085 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2086 strseteof(SOTOV(so), 1); 2087 freemsg(mp); 2088 /* 2089 * strseteof takes care of read side wakeups, 2090 * pollwakeups, and signals. 2091 */ 2092 *wakeups = WSLEEP; 2093 *allmsgsigs = S_OUTPUT; 2094 *pollwakeups = POLLOUT; 2095 return (NULL); 2096 } 2097 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1); 2098 if (addr == NULL) { 2099 cmn_err(CE_WARN, 2100 "sockfs: T_conn_con with invalid " 2101 "addrlen/offset %u/%d\n", 2102 addrlen, conn_con->RES_offset); 2103 mutex_exit(&so->so_lock); 2104 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2105 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2106 strseteof(SOTOV(so), 1); 2107 freemsg(mp); 2108 /* 2109 * strseteof takes care of read side wakeups, 2110 * pollwakeups, and signals. 2111 */ 2112 *wakeups = WSLEEP; 2113 *allmsgsigs = S_OUTPUT; 2114 *pollwakeups = POLLOUT; 2115 return (NULL); 2116 } 2117 2118 /* 2119 * Save for getpeername. 2120 */ 2121 if (so->so_family != AF_UNIX) { 2122 so->so_faddr_len = (socklen_t)addrlen; 2123 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2124 bcopy(addr, so->so_faddr_sa, addrlen); 2125 so->so_state |= SS_FADDR_VALID; 2126 } 2127 2128 if (so->so_peercred != NULL) 2129 crfree(so->so_peercred); 2130 so->so_peercred = DB_CRED(mp); 2131 so->so_cpid = DB_CPID(mp); 2132 if (so->so_peercred != NULL) 2133 crhold(so->so_peercred); 2134 2135 /* Wakeup anybody sleeping in sowaitconnected */ 2136 soisconnected(so); 2137 mutex_exit(&so->so_lock); 2138 2139 /* 2140 * The socket is now available for sending data. 2141 */ 2142 *wakeups = WSLEEP; 2143 *allmsgsigs = S_OUTPUT; 2144 *pollwakeups = POLLOUT; 2145 freemsg(mp); 2146 return (NULL); 2147 } 2148 2149 case T_CONN_IND: 2150 /* 2151 * Verify the min size and queue the message on 2152 * the so_conn_ind_head/tail list. 2153 */ 2154 if (MBLKL(mp) < sizeof (struct T_conn_ind)) { 2155 cmn_err(CE_WARN, 2156 "sockfs: Too short T_CONN_IND. Len = %ld\n", 2157 (ptrdiff_t)(MBLKL(mp))); 2158 freemsg(mp); 2159 return (NULL); 2160 } 2161 2162 #ifdef C2_AUDIT 2163 if (audit_active) 2164 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0); 2165 #endif /* C2_AUDIT */ 2166 if (!(so->so_state & SS_ACCEPTCONN)) { 2167 cmn_err(CE_WARN, 2168 "sockfs: T_conn_ind on non-listening socket\n"); 2169 freemsg(mp); 2170 return (NULL); 2171 } 2172 soqueueconnind(so, mp); 2173 *allmsgsigs = S_INPUT | S_RDNORM; 2174 *pollwakeups = POLLIN | POLLRDNORM; 2175 *wakeups = RSLEEP; 2176 return (NULL); 2177 2178 case T_ORDREL_IND: 2179 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) { 2180 cmn_err(CE_WARN, 2181 "sockfs: Too short T_ORDREL_IND. Len = %ld\n", 2182 (ptrdiff_t)(MBLKL(mp))); 2183 freemsg(mp); 2184 return (NULL); 2185 } 2186 2187 /* 2188 * Some providers send this when not fully connected. 2189 * SunLink X.25 needs to retrieve disconnect reason after 2190 * disconnect for compatibility. It uses T_ORDREL_IND 2191 * instead of T_DISCON_IND so that it may use the 2192 * endpoint after a connect failure to retrieve the 2193 * reason using an ioctl. Thus we explicitly clear 2194 * SS_ISCONNECTING here for SunLink X.25. 2195 * This is a needed TPI violation. 2196 */ 2197 mutex_enter(&so->so_lock); 2198 so->so_state &= ~SS_ISCONNECTING; 2199 socantrcvmore(so); 2200 mutex_exit(&so->so_lock); 2201 strseteof(SOTOV(so), 1); 2202 /* 2203 * strseteof takes care of read side wakeups, 2204 * pollwakeups, and signals. 2205 */ 2206 freemsg(mp); 2207 return (NULL); 2208 2209 case T_DISCON_IND: 2210 if (MBLKL(mp) < sizeof (struct T_discon_ind)) { 2211 cmn_err(CE_WARN, 2212 "sockfs: Too short T_DISCON_IND. Len = %ld\n", 2213 (ptrdiff_t)(MBLKL(mp))); 2214 freemsg(mp); 2215 return (NULL); 2216 } 2217 if (so->so_state & SS_ACCEPTCONN) { 2218 /* 2219 * This is a listener. Look for a queued T_CONN_IND 2220 * with a matching sequence number and remove it 2221 * from the list. 2222 * It is normal to not find the sequence number since 2223 * the soaccept might have already dequeued it 2224 * (in which case the T_CONN_RES will fail with 2225 * TBADSEQ). 2226 */ 2227 (void) soflushconnind(so, tpr->discon_ind.SEQ_number); 2228 freemsg(mp); 2229 return (0); 2230 } 2231 2232 /* 2233 * Not a listener 2234 * 2235 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason. 2236 * Such a discon_ind appears when the peer has first done 2237 * a shutdown() followed by a close() in which case we just 2238 * want to record socantsendmore. 2239 * In this case sockfs first receives a T_ORDREL_IND followed 2240 * by a T_DISCON_IND. 2241 * Note that for other transports (e.g. TCP) we need to handle 2242 * the discon_ind in this case since it signals an error. 2243 */ 2244 mutex_enter(&so->so_lock); 2245 if ((so->so_state & SS_CANTRCVMORE) && 2246 (so->so_family == AF_UNIX)) { 2247 socantsendmore(so); 2248 mutex_exit(&so->so_lock); 2249 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2250 dprintso(so, 1, 2251 ("T_DISCON_IND: error %d\n", so->so_error)); 2252 freemsg(mp); 2253 /* 2254 * Set these variables for caller to process them. 2255 * For the else part where T_DISCON_IND is processed, 2256 * this will be done in the function being called 2257 * (strsock_discon_ind()) 2258 */ 2259 *wakeups = WSLEEP; 2260 *allmsgsigs = S_OUTPUT; 2261 *pollwakeups = POLLOUT; 2262 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) { 2263 /* 2264 * Deferred processing of T_DISCON_IND 2265 */ 2266 so_save_discon_ind(so, mp, strsock_discon_ind); 2267 mutex_exit(&so->so_lock); 2268 } else { 2269 /* 2270 * Process T_DISCON_IND now 2271 */ 2272 (void) strsock_discon_ind(so, mp); 2273 mutex_exit(&so->so_lock); 2274 } 2275 return (NULL); 2276 2277 case T_UDERROR_IND: { 2278 struct T_uderror_ind *tudi = &tpr->uderror_ind; 2279 void *addr; 2280 t_uscalar_t addrlen; 2281 int error; 2282 2283 dprintso(so, 0, 2284 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type)); 2285 2286 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) { 2287 cmn_err(CE_WARN, 2288 "sockfs: Too short T_UDERROR_IND. Len = %ld\n", 2289 (ptrdiff_t)(MBLKL(mp))); 2290 freemsg(mp); 2291 return (NULL); 2292 } 2293 /* Ignore on connection-oriented transports */ 2294 if (so->so_mode & SM_CONNREQUIRED) { 2295 freemsg(mp); 2296 eprintsoline(so, 0); 2297 cmn_err(CE_WARN, 2298 "sockfs: T_uderror_ind on connection-oriented " 2299 "transport\n"); 2300 return (NULL); 2301 } 2302 addrlen = tudi->DEST_length; 2303 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1); 2304 if (addr == NULL) { 2305 cmn_err(CE_WARN, 2306 "sockfs: T_uderror_ind with invalid " 2307 "addrlen/offset %u/%d\n", 2308 addrlen, tudi->DEST_offset); 2309 freemsg(mp); 2310 return (NULL); 2311 } 2312 2313 /* Verify source address for connected socket. */ 2314 mutex_enter(&so->so_lock); 2315 if (so->so_state & SS_ISCONNECTED) { 2316 void *faddr; 2317 t_uscalar_t faddr_len; 2318 boolean_t match = B_FALSE; 2319 2320 switch (so->so_family) { 2321 case AF_INET: { 2322 /* Compare just IP address and port */ 2323 struct sockaddr_in *sin1, *sin2; 2324 2325 sin1 = (struct sockaddr_in *)so->so_faddr_sa; 2326 sin2 = (struct sockaddr_in *)addr; 2327 if (addrlen == sizeof (struct sockaddr_in) && 2328 sin1->sin_port == sin2->sin_port && 2329 sin1->sin_addr.s_addr == 2330 sin2->sin_addr.s_addr) 2331 match = B_TRUE; 2332 break; 2333 } 2334 case AF_INET6: { 2335 /* Compare just IP address and port. Not flow */ 2336 struct sockaddr_in6 *sin1, *sin2; 2337 2338 sin1 = (struct sockaddr_in6 *)so->so_faddr_sa; 2339 sin2 = (struct sockaddr_in6 *)addr; 2340 if (addrlen == sizeof (struct sockaddr_in6) && 2341 sin1->sin6_port == sin2->sin6_port && 2342 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 2343 &sin2->sin6_addr)) 2344 match = B_TRUE; 2345 break; 2346 } 2347 case AF_UNIX: 2348 faddr = &so->so_ux_faddr; 2349 faddr_len = 2350 (t_uscalar_t)sizeof (so->so_ux_faddr); 2351 if (faddr_len == addrlen && 2352 bcmp(addr, faddr, addrlen) == 0) 2353 match = B_TRUE; 2354 break; 2355 default: 2356 faddr = so->so_faddr_sa; 2357 faddr_len = (t_uscalar_t)so->so_faddr_len; 2358 if (faddr_len == addrlen && 2359 bcmp(addr, faddr, addrlen) == 0) 2360 match = B_TRUE; 2361 break; 2362 } 2363 2364 if (!match) { 2365 #ifdef DEBUG 2366 dprintso(so, 0, 2367 ("sockfs: T_UDERR_IND mismatch: %s - ", 2368 pr_addr(so->so_family, 2369 (struct sockaddr *)addr, 2370 addrlen))); 2371 dprintso(so, 0, ("%s\n", 2372 pr_addr(so->so_family, so->so_faddr_sa, 2373 so->so_faddr_len))); 2374 #endif /* DEBUG */ 2375 mutex_exit(&so->so_lock); 2376 freemsg(mp); 2377 return (NULL); 2378 } 2379 /* 2380 * Make the write error nonpersistent. If the error 2381 * is zero we use ECONNRESET. 2382 * This assumes that the name space for ERROR_type 2383 * is the errno name space. 2384 */ 2385 if (tudi->ERROR_type != 0) 2386 error = tudi->ERROR_type; 2387 else 2388 error = ECONNRESET; 2389 2390 soseterror(so, error); 2391 mutex_exit(&so->so_lock); 2392 strsetrerror(SOTOV(so), 0, 0, sogetrderr); 2393 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2394 *wakeups = RSLEEP | WSLEEP; 2395 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT; 2396 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT; 2397 freemsg(mp); 2398 return (NULL); 2399 } 2400 /* 2401 * If the application asked for delayed errors 2402 * record the T_UDERROR_IND so_eaddr_mp and the reason in 2403 * so_delayed_error for delayed error posting. If the reason 2404 * is zero use ECONNRESET. 2405 * Note that delayed error indications do not make sense for 2406 * AF_UNIX sockets since sendto checks that the destination 2407 * address is valid at the time of the sendto. 2408 */ 2409 if (!(so->so_options & SO_DGRAM_ERRIND)) { 2410 mutex_exit(&so->so_lock); 2411 freemsg(mp); 2412 return (NULL); 2413 } 2414 if (so->so_eaddr_mp != NULL) 2415 freemsg(so->so_eaddr_mp); 2416 2417 so->so_eaddr_mp = mp; 2418 if (tudi->ERROR_type != 0) 2419 error = tudi->ERROR_type; 2420 else 2421 error = ECONNRESET; 2422 so->so_delayed_error = (ushort_t)error; 2423 mutex_exit(&so->so_lock); 2424 return (NULL); 2425 } 2426 2427 case T_ERROR_ACK: 2428 dprintso(so, 0, 2429 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n", 2430 tpr->error_ack.ERROR_prim, 2431 tpr->error_ack.TLI_error, 2432 tpr->error_ack.UNIX_error)); 2433 2434 if (MBLKL(mp) < sizeof (struct T_error_ack)) { 2435 cmn_err(CE_WARN, 2436 "sockfs: Too short T_ERROR_ACK. Len = %ld\n", 2437 (ptrdiff_t)(MBLKL(mp))); 2438 freemsg(mp); 2439 return (NULL); 2440 } 2441 /* 2442 * Check if we were waiting for the async message 2443 */ 2444 mutex_enter(&so->so_lock); 2445 if ((so->so_flag & SOASYNC_UNBIND) && 2446 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) { 2447 so_unlock_single(so, SOASYNC_UNBIND); 2448 mutex_exit(&so->so_lock); 2449 freemsg(mp); 2450 return (NULL); 2451 } 2452 mutex_exit(&so->so_lock); 2453 soqueueack(so, mp); 2454 return (NULL); 2455 2456 case T_OK_ACK: 2457 if (MBLKL(mp) < sizeof (struct T_ok_ack)) { 2458 cmn_err(CE_WARN, 2459 "sockfs: Too short T_OK_ACK. Len = %ld\n", 2460 (ptrdiff_t)(MBLKL(mp))); 2461 freemsg(mp); 2462 return (NULL); 2463 } 2464 /* 2465 * Check if we were waiting for the async message 2466 */ 2467 mutex_enter(&so->so_lock); 2468 if ((so->so_flag & SOASYNC_UNBIND) && 2469 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) { 2470 dprintso(so, 1, 2471 ("strsock_proto: T_OK_ACK async unbind\n")); 2472 so_unlock_single(so, SOASYNC_UNBIND); 2473 mutex_exit(&so->so_lock); 2474 freemsg(mp); 2475 return (NULL); 2476 } 2477 mutex_exit(&so->so_lock); 2478 soqueueack(so, mp); 2479 return (NULL); 2480 2481 case T_INFO_ACK: 2482 if (MBLKL(mp) < sizeof (struct T_info_ack)) { 2483 cmn_err(CE_WARN, 2484 "sockfs: Too short T_INFO_ACK. Len = %ld\n", 2485 (ptrdiff_t)(MBLKL(mp))); 2486 freemsg(mp); 2487 return (NULL); 2488 } 2489 soqueueack(so, mp); 2490 return (NULL); 2491 2492 case T_CAPABILITY_ACK: 2493 /* 2494 * A T_capability_ack need only be large enough to hold 2495 * the PRIM_type and CAP_bits1 fields; checking for anything 2496 * larger might reject a correct response from an older 2497 * provider. 2498 */ 2499 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) { 2500 cmn_err(CE_WARN, 2501 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n", 2502 (ptrdiff_t)(MBLKL(mp))); 2503 freemsg(mp); 2504 return (NULL); 2505 } 2506 soqueueack(so, mp); 2507 return (NULL); 2508 2509 case T_BIND_ACK: 2510 if (MBLKL(mp) < sizeof (struct T_bind_ack)) { 2511 cmn_err(CE_WARN, 2512 "sockfs: Too short T_BIND_ACK. Len = %ld\n", 2513 (ptrdiff_t)(MBLKL(mp))); 2514 freemsg(mp); 2515 return (NULL); 2516 } 2517 soqueueack(so, mp); 2518 return (NULL); 2519 2520 case T_OPTMGMT_ACK: 2521 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) { 2522 cmn_err(CE_WARN, 2523 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n", 2524 (ptrdiff_t)(MBLKL(mp))); 2525 freemsg(mp); 2526 return (NULL); 2527 } 2528 soqueueack(so, mp); 2529 return (NULL); 2530 default: 2531 #ifdef DEBUG 2532 cmn_err(CE_WARN, 2533 "sockfs: unknown TPI primitive %d received\n", 2534 tpr->type); 2535 #endif /* DEBUG */ 2536 freemsg(mp); 2537 return (NULL); 2538 } 2539 } 2540 2541 /* 2542 * This routine is registered with the stream head to receive other 2543 * (non-data, and non-proto) messages. 2544 * 2545 * Returns NULL if the message was consumed. 2546 * Returns an mblk to make that mblk be processed by the stream head. 2547 * 2548 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and 2549 * *pollwakeups) for the stream head to take action on. 2550 */ 2551 static mblk_t * 2552 strsock_misc(vnode_t *vp, mblk_t *mp, 2553 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 2554 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 2555 { 2556 struct sonode *so; 2557 2558 so = VTOSO(vp); 2559 2560 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n", 2561 vp, mp, DB_TYPE(mp))); 2562 2563 /* Set default return values */ 2564 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0; 2565 2566 switch (DB_TYPE(mp)) { 2567 case M_PCSIG: 2568 /* 2569 * This assumes that an M_PCSIG for the urgent data arrives 2570 * before the corresponding T_EXDATA_IND. 2571 * 2572 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be 2573 * awoken before the urgent data shows up. 2574 * For OOBINLINE this can result in select returning 2575 * only exceptions as opposed to except|read. 2576 */ 2577 if (*mp->b_rptr == SIGURG) { 2578 mutex_enter(&so->so_lock); 2579 dprintso(so, 1, 2580 ("SIGURG(%p): counts %d/%d state %s\n", 2581 vp, so->so_oobsigcnt, 2582 so->so_oobcnt, 2583 pr_state(so->so_state, so->so_mode))); 2584 so_oob_sig(so, 1, allmsgsigs, pollwakeups); 2585 dprintso(so, 1, 2586 ("after SIGURG(%p): counts %d/%d " 2587 " poll 0x%x sig 0x%x state %s\n", 2588 vp, so->so_oobsigcnt, 2589 so->so_oobcnt, *pollwakeups, *allmsgsigs, 2590 pr_state(so->so_state, so->so_mode))); 2591 mutex_exit(&so->so_lock); 2592 } 2593 freemsg(mp); 2594 return (NULL); 2595 2596 case M_SIG: 2597 case M_HANGUP: 2598 case M_UNHANGUP: 2599 case M_ERROR: 2600 /* M_ERRORs etc are ignored */ 2601 freemsg(mp); 2602 return (NULL); 2603 2604 case M_FLUSH: 2605 /* 2606 * Do not flush read queue. If the M_FLUSH 2607 * arrives because of an impending T_discon_ind 2608 * we still have to keep any queued data - this is part of 2609 * socket semantics. 2610 */ 2611 if (*mp->b_rptr & FLUSHW) { 2612 *mp->b_rptr &= ~FLUSHR; 2613 return (mp); 2614 } 2615 freemsg(mp); 2616 return (NULL); 2617 2618 default: 2619 return (mp); 2620 } 2621 } 2622 2623 2624 /* Register to receive signals for certain events */ 2625 int 2626 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr) 2627 { 2628 struct strsigset ss; 2629 int32_t rval; 2630 2631 /* 2632 * Note that SOLOCKED will be set except for the call from soaccept(). 2633 */ 2634 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock)); 2635 ss.ss_pid = pgrp; 2636 ss.ss_events = events; 2637 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr, 2638 &rval)); 2639 } 2640 2641 2642 /* Register for events matching the SS_ASYNC flag */ 2643 int 2644 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr) 2645 { 2646 int events = so->so_state & SS_ASYNC ? 2647 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2648 S_RDBAND | S_BANDURG; 2649 2650 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr)); 2651 } 2652 2653 2654 /* Change the SS_ASYNC flag, and update signal delivery if needed */ 2655 int 2656 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr) 2657 { 2658 ASSERT(mutex_owned(&so->so_lock)); 2659 if (so->so_pgrp != 0) { 2660 int error; 2661 int events = so->so_state & SS_ASYNC ? /* Old flag */ 2662 S_RDBAND | S_BANDURG : /* New sigs */ 2663 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT; 2664 2665 so_lock_single(so); 2666 mutex_exit(&so->so_lock); 2667 2668 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr); 2669 2670 mutex_enter(&so->so_lock); 2671 so_unlock_single(so, SOLOCKED); 2672 if (error) 2673 return (error); 2674 } 2675 so->so_state ^= SS_ASYNC; 2676 return (0); 2677 } 2678 2679 /* 2680 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing 2681 * any existing one. If passed zero, just clear the existing one. 2682 */ 2683 int 2684 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr) 2685 { 2686 int events = so->so_state & SS_ASYNC ? 2687 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT : 2688 S_RDBAND | S_BANDURG; 2689 int error; 2690 2691 ASSERT(mutex_owned(&so->so_lock)); 2692 2693 /* 2694 * Change socket process (group). 2695 * 2696 * strioctl (via so_set_asyncsigs) will perform permission check and 2697 * also keep a PID_HOLD to prevent the pid from being reused. 2698 */ 2699 so_lock_single(so); 2700 mutex_exit(&so->so_lock); 2701 2702 if (pgrp != 0) { 2703 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n", 2704 pgrp, events)); 2705 error = so_set_asyncsigs(vp, pgrp, events, mode, cr); 2706 if (error != 0) { 2707 eprintsoline(so, error); 2708 goto bad; 2709 } 2710 } 2711 /* Remove the previously registered process/group */ 2712 if (so->so_pgrp != 0) { 2713 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp)); 2714 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr); 2715 if (error != 0) { 2716 eprintsoline(so, error); 2717 error = 0; 2718 } 2719 } 2720 mutex_enter(&so->so_lock); 2721 so_unlock_single(so, SOLOCKED); 2722 so->so_pgrp = pgrp; 2723 return (0); 2724 bad: 2725 mutex_enter(&so->so_lock); 2726 so_unlock_single(so, SOLOCKED); 2727 return (error); 2728 } 2729 2730 2731 2732 /* 2733 * Translate a TLI(/XTI) error into a system error as best we can. 2734 */ 2735 static const int tli_errs[] = { 2736 0, /* no error */ 2737 EADDRNOTAVAIL, /* TBADADDR */ 2738 ENOPROTOOPT, /* TBADOPT */ 2739 EACCES, /* TACCES */ 2740 EBADF, /* TBADF */ 2741 EADDRNOTAVAIL, /* TNOADDR */ 2742 EPROTO, /* TOUTSTATE */ 2743 ECONNABORTED, /* TBADSEQ */ 2744 0, /* TSYSERR - will never get */ 2745 EPROTO, /* TLOOK - should never be sent by transport */ 2746 EMSGSIZE, /* TBADDATA */ 2747 EMSGSIZE, /* TBUFOVFLW */ 2748 EPROTO, /* TFLOW */ 2749 EWOULDBLOCK, /* TNODATA */ 2750 EPROTO, /* TNODIS */ 2751 EPROTO, /* TNOUDERR */ 2752 EINVAL, /* TBADFLAG */ 2753 EPROTO, /* TNOREL */ 2754 EOPNOTSUPP, /* TNOTSUPPORT */ 2755 EPROTO, /* TSTATECHNG */ 2756 /* following represent error namespace expansion with XTI */ 2757 EPROTO, /* TNOSTRUCTYPE - never sent by transport */ 2758 EPROTO, /* TBADNAME - never sent by transport */ 2759 EPROTO, /* TBADQLEN - never sent by transport */ 2760 EADDRINUSE, /* TADDRBUSY */ 2761 EBADF, /* TINDOUT */ 2762 EBADF, /* TPROVMISMATCH */ 2763 EBADF, /* TRESQLEN */ 2764 EBADF, /* TRESADDR */ 2765 EPROTO, /* TQFULL - never sent by transport */ 2766 EPROTO, /* TPROTO */ 2767 }; 2768 2769 static int 2770 tlitosyserr(int terr) 2771 { 2772 ASSERT(terr != TSYSERR); 2773 if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0]))) 2774 return (EPROTO); 2775 else 2776 return (tli_errs[terr]); 2777 } 2778