1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/sysmacros.h> 31 #include <sys/param.h> 32 #include <sys/errno.h> 33 #include <sys/signal.h> 34 #include <sys/stat.h> 35 #include <sys/proc.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/vnode.h> 39 #include <sys/file.h> 40 #include <sys/stream.h> 41 #include <sys/strsubr.h> 42 #include <sys/stropts.h> 43 #include <sys/tihdr.h> 44 #include <sys/var.h> 45 #include <sys/poll.h> 46 #include <sys/termio.h> 47 #include <sys/ttold.h> 48 #include <sys/systm.h> 49 #include <sys/uio.h> 50 #include <sys/cmn_err.h> 51 #include <sys/sad.h> 52 #include <sys/netstack.h> 53 #include <sys/priocntl.h> 54 #include <sys/jioctl.h> 55 #include <sys/procset.h> 56 #include <sys/session.h> 57 #include <sys/kmem.h> 58 #include <sys/filio.h> 59 #include <sys/vtrace.h> 60 #include <sys/debug.h> 61 #include <sys/strredir.h> 62 #include <sys/fs/fifonode.h> 63 #include <sys/fs/snode.h> 64 #include <sys/strlog.h> 65 #include <sys/strsun.h> 66 #include <sys/project.h> 67 #include <sys/kbio.h> 68 #include <sys/msio.h> 69 #include <sys/tty.h> 70 #include <sys/ptyvar.h> 71 #include <sys/vuid_event.h> 72 #include <sys/modctl.h> 73 #include <sys/sunddi.h> 74 #include <sys/sunldi_impl.h> 75 #include <sys/autoconf.h> 76 #include <sys/policy.h> 77 #include <sys/dld.h> 78 #include <sys/zone.h> 79 #include <c2/audit.h> 80 81 /* 82 * This define helps improve the readability of streams code while 83 * still maintaining a very old streams performance enhancement. The 84 * performance enhancement basically involved having all callers 85 * of straccess() perform the first check that straccess() will do 86 * locally before actually calling straccess(). (There by reducing 87 * the number of unnecessary calls to straccess().) 88 */ 89 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \ 90 (stp->sd_vnode->v_type == VFIFO) ? 0 : \ 91 straccess((x), (y))) 92 93 /* 94 * what is mblk_pull_len? 95 * 96 * If a streams message consists of many short messages, 97 * a performance degradation occurs from copyout overhead. 98 * To decrease the per mblk overhead, messages that are 99 * likely to consist of many small mblks are pulled up into 100 * one continuous chunk of memory. 101 * 102 * To avoid the processing overhead of examining every 103 * mblk, a quick heuristic is used. If the first mblk in 104 * the message is shorter than mblk_pull_len, it is likely 105 * that the rest of the mblk will be short. 106 * 107 * This heuristic was decided upon after performance tests 108 * indicated that anything more complex slowed down the main 109 * code path. 110 */ 111 #define MBLK_PULL_LEN 64 112 uint32_t mblk_pull_len = MBLK_PULL_LEN; 113 114 /* 115 * The sgttyb_handling flag controls the handling of the old BSD 116 * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows: 117 * 118 * 0 - Emit no warnings at all and retain old, broken behavior. 119 * 1 - Emit no warnings and silently handle new semantics. 120 * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used 121 * (once per system invocation). Handle with new semantics. 122 * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is 123 * made (so that offenders drop core and are easy to debug). 124 * 125 * The "new semantics" are that TIOCGETP returns B38400 for 126 * sg_[io]speed if the corresponding value is over B38400, and that 127 * TIOCSET[PN] accept B38400 in these cases to mean "retain current 128 * bit rate." 129 */ 130 int sgttyb_handling = 1; 131 static boolean_t sgttyb_complaint; 132 133 /* don't push drcompat module by default on Style-2 streams */ 134 static int push_drcompat = 0; 135 136 /* 137 * id value used to distinguish between different ioctl messages 138 */ 139 static uint32_t ioc_id; 140 141 static void putback(struct stdata *, queue_t *, mblk_t *, int); 142 static void strcleanall(struct vnode *); 143 static int strwsrv(queue_t *); 144 static int strdocmd(struct stdata *, struct strcmd *, cred_t *); 145 146 /* 147 * qinit and module_info structures for stream head read and write queues 148 */ 149 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW }; 150 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 }; 151 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info }; 152 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info }; 153 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT, 154 FIFOLOWAT }; 155 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 }; 156 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info }; 157 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info }; 158 159 extern kmutex_t strresources; /* protects global resources */ 160 extern kmutex_t muxifier; /* single-threads multiplexor creation */ 161 162 static boolean_t msghasdata(mblk_t *bp); 163 #define msgnodata(bp) (!msghasdata(bp)) 164 165 /* 166 * Stream head locking notes: 167 * There are four monitors associated with the stream head: 168 * 1. v_stream monitor: in stropen() and strclose() v_lock 169 * is held while the association of vnode and stream 170 * head is established or tested for. 171 * 2. open/close/push/pop monitor: sd_lock is held while each 172 * thread bids for exclusive access to this monitor 173 * for opening or closing a stream. In addition, this 174 * monitor is entered during pushes and pops. This 175 * guarantees that during plumbing operations there 176 * is only one thread trying to change the plumbing. 177 * Any other threads present in the stream are only 178 * using the plumbing. 179 * 3. read/write monitor: in the case of read, a thread holds 180 * sd_lock while trying to get data from the stream 181 * head queue. if there is none to fulfill a read 182 * request, it sets RSLEEP and calls cv_wait_sig() down 183 * in strwaitq() to await the arrival of new data. 184 * when new data arrives in strrput(), sd_lock is acquired 185 * before testing for RSLEEP and calling cv_broadcast(). 186 * the behavior of strwrite(), strwsrv(), and WSLEEP 187 * mirror this. 188 * 4. ioctl monitor: sd_lock is gotten to ensure that only one 189 * thread is doing an ioctl at a time. 190 */ 191 192 static int 193 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name, 194 int anchor, cred_t *crp, uint_t anchor_zoneid) 195 { 196 int error; 197 fmodsw_impl_t *fp; 198 199 if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) { 200 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO; 201 return (error); 202 } 203 if (stp->sd_pushcnt >= nstrpush) { 204 return (EINVAL); 205 } 206 207 if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) { 208 stp->sd_flag |= STREOPENFAIL; 209 return (EINVAL); 210 } 211 212 /* 213 * push new module and call its open routine via qattach 214 */ 215 if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0) 216 return (error); 217 218 /* 219 * Check to see if caller wants a STREAMS anchor 220 * put at this place in the stream, and add if so. 221 */ 222 mutex_enter(&stp->sd_lock); 223 if (anchor == stp->sd_pushcnt) { 224 stp->sd_anchor = stp->sd_pushcnt; 225 stp->sd_anchorzone = anchor_zoneid; 226 } 227 mutex_exit(&stp->sd_lock); 228 229 return (0); 230 } 231 232 /* 233 * Open a stream device. 234 */ 235 int 236 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp) 237 { 238 struct stdata *stp; 239 queue_t *qp; 240 int s; 241 dev_t dummydev, savedev; 242 struct autopush *ap; 243 struct dlautopush dlap; 244 int error = 0; 245 ssize_t rmin, rmax; 246 int cloneopen; 247 queue_t *brq; 248 major_t major; 249 str_stack_t *ss; 250 zoneid_t zoneid; 251 uint_t anchor; 252 253 if (AU_AUDITING()) 254 audit_stropen(vp, devp, flag, crp); 255 256 /* 257 * If the stream already exists, wait for any open in progress 258 * to complete, then call the open function of each module and 259 * driver in the stream. Otherwise create the stream. 260 */ 261 TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp); 262 retry: 263 mutex_enter(&vp->v_lock); 264 if ((stp = vp->v_stream) != NULL) { 265 266 /* 267 * Waiting for stream to be created to device 268 * due to another open. 269 */ 270 mutex_exit(&vp->v_lock); 271 272 if (STRMATED(stp)) { 273 struct stdata *strmatep = stp->sd_mate; 274 275 STRLOCKMATES(stp); 276 if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 277 if (flag & (FNDELAY|FNONBLOCK)) { 278 error = EAGAIN; 279 mutex_exit(&strmatep->sd_lock); 280 goto ckreturn; 281 } 282 mutex_exit(&stp->sd_lock); 283 if (!cv_wait_sig(&strmatep->sd_monitor, 284 &strmatep->sd_lock)) { 285 error = EINTR; 286 mutex_exit(&strmatep->sd_lock); 287 mutex_enter(&stp->sd_lock); 288 goto ckreturn; 289 } 290 mutex_exit(&strmatep->sd_lock); 291 goto retry; 292 } 293 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 294 if (flag & (FNDELAY|FNONBLOCK)) { 295 error = EAGAIN; 296 mutex_exit(&strmatep->sd_lock); 297 goto ckreturn; 298 } 299 mutex_exit(&strmatep->sd_lock); 300 if (!cv_wait_sig(&stp->sd_monitor, 301 &stp->sd_lock)) { 302 error = EINTR; 303 goto ckreturn; 304 } 305 mutex_exit(&stp->sd_lock); 306 goto retry; 307 } 308 309 if (stp->sd_flag & (STRDERR|STWRERR)) { 310 error = EIO; 311 mutex_exit(&strmatep->sd_lock); 312 goto ckreturn; 313 } 314 315 stp->sd_flag |= STWOPEN; 316 STRUNLOCKMATES(stp); 317 } else { 318 mutex_enter(&stp->sd_lock); 319 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 320 if (flag & (FNDELAY|FNONBLOCK)) { 321 error = EAGAIN; 322 goto ckreturn; 323 } 324 if (!cv_wait_sig(&stp->sd_monitor, 325 &stp->sd_lock)) { 326 error = EINTR; 327 goto ckreturn; 328 } 329 mutex_exit(&stp->sd_lock); 330 goto retry; /* could be clone! */ 331 } 332 333 if (stp->sd_flag & (STRDERR|STWRERR)) { 334 error = EIO; 335 goto ckreturn; 336 } 337 338 stp->sd_flag |= STWOPEN; 339 mutex_exit(&stp->sd_lock); 340 } 341 342 /* 343 * Open all modules and devices down stream to notify 344 * that another user is streaming. For modules, set the 345 * last argument to MODOPEN and do not pass any open flags. 346 * Ignore dummydev since this is not the first open. 347 */ 348 claimstr(stp->sd_wrq); 349 qp = stp->sd_wrq; 350 while (_SAMESTR(qp)) { 351 qp = qp->q_next; 352 if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0) 353 break; 354 } 355 releasestr(stp->sd_wrq); 356 mutex_enter(&stp->sd_lock); 357 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR); 358 stp->sd_rerror = 0; 359 stp->sd_werror = 0; 360 ckreturn: 361 cv_broadcast(&stp->sd_monitor); 362 mutex_exit(&stp->sd_lock); 363 return (error); 364 } 365 366 /* 367 * This vnode isn't streaming. SPECFS already 368 * checked for multiple vnodes pointing to the 369 * same stream, so create a stream to the driver. 370 */ 371 qp = allocq(); 372 stp = shalloc(qp); 373 374 /* 375 * Initialize stream head. shalloc() has given us 376 * exclusive access, and we have the vnode locked; 377 * we can do whatever we want with stp. 378 */ 379 stp->sd_flag = STWOPEN; 380 stp->sd_siglist = NULL; 381 stp->sd_pollist.ph_list = NULL; 382 stp->sd_sigflags = 0; 383 stp->sd_mark = NULL; 384 stp->sd_closetime = STRTIMOUT; 385 stp->sd_sidp = NULL; 386 stp->sd_pgidp = NULL; 387 stp->sd_vnode = vp; 388 stp->sd_rerror = 0; 389 stp->sd_werror = 0; 390 stp->sd_wroff = 0; 391 stp->sd_tail = 0; 392 stp->sd_iocblk = NULL; 393 stp->sd_cmdblk = NULL; 394 stp->sd_pushcnt = 0; 395 stp->sd_qn_minpsz = 0; 396 stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */ 397 stp->sd_maxblk = INFPSZ; 398 qp->q_ptr = _WR(qp)->q_ptr = stp; 399 STREAM(qp) = STREAM(_WR(qp)) = stp; 400 vp->v_stream = stp; 401 mutex_exit(&vp->v_lock); 402 if (vp->v_type == VFIFO) { 403 stp->sd_flag |= OLDNDELAY; 404 /* 405 * This means, both for pipes and fifos 406 * strwrite will send SIGPIPE if the other 407 * end is closed. For putmsg it depends 408 * on whether it is a XPG4_2 application 409 * or not 410 */ 411 stp->sd_wput_opt = SW_SIGPIPE; 412 413 /* setq might sleep in kmem_alloc - avoid holding locks. */ 414 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE, 415 SQ_CI|SQ_CO, B_FALSE); 416 417 set_qend(qp); 418 stp->sd_strtab = fifo_getinfo(); 419 _WR(qp)->q_nfsrv = _WR(qp); 420 qp->q_nfsrv = qp; 421 /* 422 * Wake up others that are waiting for stream to be created. 423 */ 424 mutex_enter(&stp->sd_lock); 425 /* 426 * nothing is be pushed on stream yet, so 427 * optimized stream head packetsizes are just that 428 * of the read queue 429 */ 430 stp->sd_qn_minpsz = qp->q_minpsz; 431 stp->sd_qn_maxpsz = qp->q_maxpsz; 432 stp->sd_flag &= ~STWOPEN; 433 goto fifo_opendone; 434 } 435 /* setq might sleep in kmem_alloc - avoid holding locks. */ 436 setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE); 437 438 set_qend(qp); 439 440 /* 441 * Open driver and create stream to it (via qattach). 442 */ 443 savedev = *devp; 444 cloneopen = (getmajor(*devp) == clone_major); 445 if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) { 446 mutex_enter(&vp->v_lock); 447 vp->v_stream = NULL; 448 mutex_exit(&vp->v_lock); 449 mutex_enter(&stp->sd_lock); 450 cv_broadcast(&stp->sd_monitor); 451 mutex_exit(&stp->sd_lock); 452 freeq(_RD(qp)); 453 shfree(stp); 454 return (error); 455 } 456 /* 457 * Set sd_strtab after open in order to handle clonable drivers 458 */ 459 stp->sd_strtab = STREAMSTAB(getmajor(*devp)); 460 461 /* 462 * Historical note: dummydev used to be be prior to the initial 463 * open (via qattach above), which made the value seen 464 * inconsistent between an I_PUSH and an autopush of a module. 465 */ 466 dummydev = *devp; 467 468 /* 469 * For clone open of old style (Q not associated) network driver, 470 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH 471 */ 472 brq = _RD(_WR(qp)->q_next); 473 major = getmajor(*devp); 474 if (push_drcompat && cloneopen && NETWORK_DRV(major) && 475 ((brq->q_flag & _QASSOCIATED) == 0)) { 476 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0) 477 cmn_err(CE_WARN, "cannot push " DRMODNAME 478 " streams module"); 479 } 480 481 if (!NETWORK_DRV(major)) { 482 savedev = *devp; 483 } else { 484 /* 485 * For network devices, process differently based on the 486 * return value from dld_autopush(): 487 * 488 * 0: the passed-in device points to a GLDv3 datalink with 489 * per-link autopush configuration; use that configuration 490 * and ignore any per-driver autopush configuration. 491 * 492 * 1: the passed-in device points to a physical GLDv3 493 * datalink without per-link autopush configuration. The 494 * passed in device was changed to refer to the actual 495 * physical device (if it's not already); we use that new 496 * device to look up any per-driver autopush configuration. 497 * 498 * -1: neither of the above cases applied; use the initial 499 * device to look up any per-driver autopush configuration. 500 */ 501 switch (dld_autopush(&savedev, &dlap)) { 502 case 0: 503 zoneid = crgetzoneid(crp); 504 for (s = 0; s < dlap.dap_npush; s++) { 505 error = push_mod(qp, &dummydev, stp, 506 dlap.dap_aplist[s], dlap.dap_anchor, crp, 507 zoneid); 508 if (error != 0) 509 break; 510 } 511 goto opendone; 512 case 1: 513 break; 514 case -1: 515 savedev = *devp; 516 break; 517 } 518 } 519 /* 520 * Find the autopush configuration based on "savedev". Start with the 521 * global zone. If not found check in the local zone. 522 */ 523 zoneid = GLOBAL_ZONEID; 524 retryap: 525 ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))-> 526 netstack_str; 527 if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) { 528 netstack_rele(ss->ss_netstack); 529 if (zoneid == GLOBAL_ZONEID) { 530 /* 531 * None found. Also look in the zone's autopush table. 532 */ 533 zoneid = crgetzoneid(crp); 534 if (zoneid != GLOBAL_ZONEID) 535 goto retryap; 536 } 537 goto opendone; 538 } 539 anchor = ap->ap_anchor; 540 zoneid = crgetzoneid(crp); 541 for (s = 0; s < ap->ap_npush; s++) { 542 error = push_mod(qp, &dummydev, stp, ap->ap_list[s], 543 anchor, crp, zoneid); 544 if (error != 0) 545 break; 546 } 547 sad_ap_rele(ap, ss); 548 netstack_rele(ss->ss_netstack); 549 550 opendone: 551 552 /* 553 * let specfs know that open failed part way through 554 */ 555 if (error) { 556 mutex_enter(&stp->sd_lock); 557 stp->sd_flag |= STREOPENFAIL; 558 mutex_exit(&stp->sd_lock); 559 } 560 561 /* 562 * Wake up others that are waiting for stream to be created. 563 */ 564 mutex_enter(&stp->sd_lock); 565 stp->sd_flag &= ~STWOPEN; 566 567 /* 568 * As a performance concern we are caching the values of 569 * q_minpsz and q_maxpsz of the module below the stream 570 * head in the stream head. 571 */ 572 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 573 rmin = stp->sd_wrq->q_next->q_minpsz; 574 rmax = stp->sd_wrq->q_next->q_maxpsz; 575 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 576 577 /* do this processing here as a performance concern */ 578 if (strmsgsz != 0) { 579 if (rmax == INFPSZ) 580 rmax = strmsgsz; 581 else 582 rmax = MIN(strmsgsz, rmax); 583 } 584 585 mutex_enter(QLOCK(stp->sd_wrq)); 586 stp->sd_qn_minpsz = rmin; 587 stp->sd_qn_maxpsz = rmax; 588 mutex_exit(QLOCK(stp->sd_wrq)); 589 590 fifo_opendone: 591 cv_broadcast(&stp->sd_monitor); 592 mutex_exit(&stp->sd_lock); 593 return (error); 594 } 595 596 static int strsink(queue_t *, mblk_t *); 597 static struct qinit deadrend = { 598 strsink, NULL, NULL, NULL, NULL, &strm_info, NULL 599 }; 600 static struct qinit deadwend = { 601 NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL 602 }; 603 604 /* 605 * Close a stream. 606 * This is called from closef() on the last close of an open stream. 607 * Strclean() will already have removed the siglist and pollist 608 * information, so all that remains is to remove all multiplexor links 609 * for the stream, pop all the modules (and the driver), and free the 610 * stream structure. 611 */ 612 613 int 614 strclose(struct vnode *vp, int flag, cred_t *crp) 615 { 616 struct stdata *stp; 617 queue_t *qp; 618 int rval; 619 int freestp = 1; 620 queue_t *rmq; 621 622 if (AU_AUDITING()) 623 audit_strclose(vp, flag, crp); 624 625 TRACE_1(TR_FAC_STREAMS_FR, 626 TR_STRCLOSE, "strclose:%p", vp); 627 ASSERT(vp->v_stream); 628 629 stp = vp->v_stream; 630 ASSERT(!(stp->sd_flag & STPLEX)); 631 qp = stp->sd_wrq; 632 633 /* 634 * Needed so that strpoll will return non-zero for this fd. 635 * Note that with POLLNOERR STRHUP does still cause POLLHUP. 636 */ 637 mutex_enter(&stp->sd_lock); 638 stp->sd_flag |= STRHUP; 639 mutex_exit(&stp->sd_lock); 640 641 /* 642 * If the registered process or process group did not have an 643 * open instance of this stream then strclean would not be 644 * called. Thus at the time of closing all remaining siglist entries 645 * are removed. 646 */ 647 if (stp->sd_siglist != NULL) 648 strcleanall(vp); 649 650 ASSERT(stp->sd_siglist == NULL); 651 ASSERT(stp->sd_sigflags == 0); 652 653 if (STRMATED(stp)) { 654 struct stdata *strmatep = stp->sd_mate; 655 int waited = 1; 656 657 STRLOCKMATES(stp); 658 while (waited) { 659 waited = 0; 660 while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 661 mutex_exit(&strmatep->sd_lock); 662 cv_wait(&stp->sd_monitor, &stp->sd_lock); 663 mutex_exit(&stp->sd_lock); 664 STRLOCKMATES(stp); 665 waited = 1; 666 } 667 while (strmatep->sd_flag & 668 (STWOPEN|STRCLOSE|STRPLUMB)) { 669 mutex_exit(&stp->sd_lock); 670 cv_wait(&strmatep->sd_monitor, 671 &strmatep->sd_lock); 672 mutex_exit(&strmatep->sd_lock); 673 STRLOCKMATES(stp); 674 waited = 1; 675 } 676 } 677 stp->sd_flag |= STRCLOSE; 678 STRUNLOCKMATES(stp); 679 } else { 680 mutex_enter(&stp->sd_lock); 681 stp->sd_flag |= STRCLOSE; 682 mutex_exit(&stp->sd_lock); 683 } 684 685 ASSERT(qp->q_first == NULL); /* No more delayed write */ 686 687 /* Check if an I_LINK was ever done on this stream */ 688 if (stp->sd_flag & STRHASLINKS) { 689 netstack_t *ns; 690 str_stack_t *ss; 691 692 ns = netstack_find_by_cred(crp); 693 ASSERT(ns != NULL); 694 ss = ns->netstack_str; 695 ASSERT(ss != NULL); 696 697 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss); 698 netstack_rele(ss->ss_netstack); 699 } 700 701 while (_SAMESTR(qp)) { 702 /* 703 * Holding sd_lock prevents q_next from changing in 704 * this stream. 705 */ 706 mutex_enter(&stp->sd_lock); 707 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) { 708 709 /* 710 * sleep until awakened by strwsrv() or timeout 711 */ 712 for (;;) { 713 mutex_enter(QLOCK(qp->q_next)); 714 if (!(qp->q_next->q_mblkcnt)) { 715 mutex_exit(QLOCK(qp->q_next)); 716 break; 717 } 718 stp->sd_flag |= WSLEEP; 719 720 /* ensure strwsrv gets enabled */ 721 qp->q_next->q_flag |= QWANTW; 722 mutex_exit(QLOCK(qp->q_next)); 723 /* get out if we timed out or recv'd a signal */ 724 if (str_cv_wait(&qp->q_wait, &stp->sd_lock, 725 stp->sd_closetime, 0) <= 0) { 726 break; 727 } 728 } 729 stp->sd_flag &= ~WSLEEP; 730 } 731 mutex_exit(&stp->sd_lock); 732 733 rmq = qp->q_next; 734 if (rmq->q_flag & QISDRV) { 735 ASSERT(!_SAMESTR(rmq)); 736 wait_sq_svc(_RD(qp)->q_syncq); 737 } 738 739 qdetach(_RD(rmq), 1, flag, crp, B_FALSE); 740 } 741 742 /* 743 * Since we call pollwakeup in close() now, the poll list should 744 * be empty in most cases. The only exception is the layered devices 745 * (e.g. the console drivers with redirection modules pushed on top 746 * of it). We have to do this after calling qdetach() because 747 * the redirection module won't have torn down the console 748 * redirection until after qdetach() has been invoked. 749 */ 750 if (stp->sd_pollist.ph_list != NULL) { 751 pollwakeup(&stp->sd_pollist, POLLERR); 752 pollhead_clean(&stp->sd_pollist); 753 } 754 ASSERT(stp->sd_pollist.ph_list == NULL); 755 ASSERT(stp->sd_sidp == NULL); 756 ASSERT(stp->sd_pgidp == NULL); 757 758 /* Prevent qenable from re-enabling the stream head queue */ 759 disable_svc(_RD(qp)); 760 761 /* 762 * Wait until service procedure of each queue is 763 * run, if QINSERVICE is set. 764 */ 765 wait_svc(_RD(qp)); 766 767 /* 768 * Now, flush both queues. 769 */ 770 flushq(_RD(qp), FLUSHALL); 771 flushq(qp, FLUSHALL); 772 773 /* 774 * If the write queue of the stream head is pointing to a 775 * read queue, we have a twisted stream. If the read queue 776 * is alive, convert the stream head queues into a dead end. 777 * If the read queue is dead, free the dead pair. 778 */ 779 if (qp->q_next && !_SAMESTR(qp)) { 780 if (qp->q_next->q_qinfo == &deadrend) { /* half-closed pipe */ 781 flushq(qp->q_next, FLUSHALL); /* ensure no message */ 782 shfree(qp->q_next->q_stream); 783 freeq(qp->q_next); 784 freeq(_RD(qp)); 785 } else if (qp->q_next == _RD(qp)) { /* fifo */ 786 freeq(_RD(qp)); 787 } else { /* pipe */ 788 freestp = 0; 789 /* 790 * The q_info pointers are never accessed when 791 * SQLOCK is held. 792 */ 793 ASSERT(qp->q_syncq == _RD(qp)->q_syncq); 794 mutex_enter(SQLOCK(qp->q_syncq)); 795 qp->q_qinfo = &deadwend; 796 _RD(qp)->q_qinfo = &deadrend; 797 mutex_exit(SQLOCK(qp->q_syncq)); 798 } 799 } else { 800 freeq(_RD(qp)); /* free stream head queue pair */ 801 } 802 803 mutex_enter(&vp->v_lock); 804 if (stp->sd_iocblk) { 805 if (stp->sd_iocblk != (mblk_t *)-1) { 806 freemsg(stp->sd_iocblk); 807 } 808 stp->sd_iocblk = NULL; 809 } 810 stp->sd_vnode = NULL; 811 vp->v_stream = NULL; 812 mutex_exit(&vp->v_lock); 813 mutex_enter(&stp->sd_lock); 814 freemsg(stp->sd_cmdblk); 815 stp->sd_cmdblk = NULL; 816 stp->sd_flag &= ~STRCLOSE; 817 cv_broadcast(&stp->sd_monitor); 818 mutex_exit(&stp->sd_lock); 819 820 if (freestp) 821 shfree(stp); 822 return (0); 823 } 824 825 static int 826 strsink(queue_t *q, mblk_t *bp) 827 { 828 struct copyresp *resp; 829 830 switch (bp->b_datap->db_type) { 831 case M_FLUSH: 832 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 833 *bp->b_rptr &= ~FLUSHR; 834 bp->b_flag |= MSGNOLOOP; 835 /* 836 * Protect against the driver passing up 837 * messages after it has done a qprocsoff. 838 */ 839 if (_OTHERQ(q)->q_next == NULL) 840 freemsg(bp); 841 else 842 qreply(q, bp); 843 } else { 844 freemsg(bp); 845 } 846 break; 847 848 case M_COPYIN: 849 case M_COPYOUT: 850 if (bp->b_cont) { 851 freemsg(bp->b_cont); 852 bp->b_cont = NULL; 853 } 854 bp->b_datap->db_type = M_IOCDATA; 855 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 856 resp = (struct copyresp *)bp->b_rptr; 857 resp->cp_rval = (caddr_t)1; /* failure */ 858 /* 859 * Protect against the driver passing up 860 * messages after it has done a qprocsoff. 861 */ 862 if (_OTHERQ(q)->q_next == NULL) 863 freemsg(bp); 864 else 865 qreply(q, bp); 866 break; 867 868 case M_IOCTL: 869 if (bp->b_cont) { 870 freemsg(bp->b_cont); 871 bp->b_cont = NULL; 872 } 873 bp->b_datap->db_type = M_IOCNAK; 874 /* 875 * Protect against the driver passing up 876 * messages after it has done a qprocsoff. 877 */ 878 if (_OTHERQ(q)->q_next == NULL) 879 freemsg(bp); 880 else 881 qreply(q, bp); 882 break; 883 884 default: 885 freemsg(bp); 886 break; 887 } 888 889 return (0); 890 } 891 892 /* 893 * Clean up after a process when it closes a stream. This is called 894 * from closef for all closes, whereas strclose is called only for the 895 * last close on a stream. The siglist is scanned for entries for the 896 * current process, and these are removed. 897 */ 898 void 899 strclean(struct vnode *vp) 900 { 901 strsig_t *ssp, *pssp, *tssp; 902 stdata_t *stp; 903 int update = 0; 904 905 TRACE_1(TR_FAC_STREAMS_FR, 906 TR_STRCLEAN, "strclean:%p", vp); 907 stp = vp->v_stream; 908 pssp = NULL; 909 mutex_enter(&stp->sd_lock); 910 ssp = stp->sd_siglist; 911 while (ssp) { 912 if (ssp->ss_pidp == curproc->p_pidp) { 913 tssp = ssp->ss_next; 914 if (pssp) 915 pssp->ss_next = tssp; 916 else 917 stp->sd_siglist = tssp; 918 mutex_enter(&pidlock); 919 PID_RELE(ssp->ss_pidp); 920 mutex_exit(&pidlock); 921 kmem_free(ssp, sizeof (strsig_t)); 922 update = 1; 923 ssp = tssp; 924 } else { 925 pssp = ssp; 926 ssp = ssp->ss_next; 927 } 928 } 929 if (update) { 930 stp->sd_sigflags = 0; 931 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 932 stp->sd_sigflags |= ssp->ss_events; 933 } 934 mutex_exit(&stp->sd_lock); 935 } 936 937 /* 938 * Used on the last close to remove any remaining items on the siglist. 939 * These could be present on the siglist due to I_ESETSIG calls that 940 * use process groups or processed that do not have an open file descriptor 941 * for this stream (Such entries would not be removed by strclean). 942 */ 943 static void 944 strcleanall(struct vnode *vp) 945 { 946 strsig_t *ssp, *nssp; 947 stdata_t *stp; 948 949 stp = vp->v_stream; 950 mutex_enter(&stp->sd_lock); 951 ssp = stp->sd_siglist; 952 stp->sd_siglist = NULL; 953 while (ssp) { 954 nssp = ssp->ss_next; 955 mutex_enter(&pidlock); 956 PID_RELE(ssp->ss_pidp); 957 mutex_exit(&pidlock); 958 kmem_free(ssp, sizeof (strsig_t)); 959 ssp = nssp; 960 } 961 stp->sd_sigflags = 0; 962 mutex_exit(&stp->sd_lock); 963 } 964 965 /* 966 * Retrieve the next message from the logical stream head read queue 967 * using either rwnext (if sync stream) or getq_noenab. 968 * It is the callers responsibility to call qbackenable after 969 * it is finished with the message. The caller should not call 970 * qbackenable until after any putback calls to avoid spurious backenabling. 971 */ 972 mblk_t * 973 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first, 974 int *errorp) 975 { 976 mblk_t *bp; 977 int error; 978 ssize_t rbytes = 0; 979 980 /* Holding sd_lock prevents the read queue from changing */ 981 ASSERT(MUTEX_HELD(&stp->sd_lock)); 982 983 if (uiop != NULL && stp->sd_struiordq != NULL && 984 q->q_first == NULL && 985 (!first || (stp->sd_wakeq & RSLEEP))) { 986 /* 987 * Stream supports rwnext() for the read side. 988 * If this is the first time we're called by e.g. strread 989 * only do the downcall if there is a deferred wakeup 990 * (registered in sd_wakeq). 991 */ 992 struiod_t uiod; 993 994 if (first) 995 stp->sd_wakeq &= ~RSLEEP; 996 997 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 998 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 999 uiod.d_mp = 0; 1000 /* 1001 * Mark that a thread is in rwnext on the read side 1002 * to prevent strrput from nacking ioctls immediately. 1003 * When the last concurrent rwnext returns 1004 * the ioctls are nack'ed. 1005 */ 1006 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1007 stp->sd_struiodnak++; 1008 /* 1009 * Note: rwnext will drop sd_lock. 1010 */ 1011 error = rwnext(q, &uiod); 1012 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 1013 mutex_enter(&stp->sd_lock); 1014 stp->sd_struiodnak--; 1015 while (stp->sd_struiodnak == 0 && 1016 ((bp = stp->sd_struionak) != NULL)) { 1017 stp->sd_struionak = bp->b_next; 1018 bp->b_next = NULL; 1019 bp->b_datap->db_type = M_IOCNAK; 1020 /* 1021 * Protect against the driver passing up 1022 * messages after it has done a qprocsoff. 1023 */ 1024 if (_OTHERQ(q)->q_next == NULL) 1025 freemsg(bp); 1026 else { 1027 mutex_exit(&stp->sd_lock); 1028 qreply(q, bp); 1029 mutex_enter(&stp->sd_lock); 1030 } 1031 } 1032 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1033 if (error == 0 || error == EWOULDBLOCK) { 1034 if ((bp = uiod.d_mp) != NULL) { 1035 *errorp = 0; 1036 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1037 return (bp); 1038 } 1039 error = 0; 1040 } else if (error == EINVAL) { 1041 /* 1042 * The stream plumbing must have 1043 * changed while we were away, so 1044 * just turn off rwnext()s. 1045 */ 1046 error = 0; 1047 } else if (error == EBUSY) { 1048 /* 1049 * The module might have data in transit using putnext 1050 * Fall back on waiting + getq. 1051 */ 1052 error = 0; 1053 } else { 1054 *errorp = error; 1055 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1056 return (NULL); 1057 } 1058 /* 1059 * Try a getq in case a rwnext() generated mblk 1060 * has bubbled up via strrput(). 1061 */ 1062 } 1063 *errorp = 0; 1064 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1065 1066 /* 1067 * If we have a valid uio, try and use this as a guide for how 1068 * many bytes to retrieve from the queue via getq_noenab(). 1069 * Doing this can avoid unneccesary counting of overlong 1070 * messages in putback(). We currently only do this for sockets 1071 * and only if there is no sd_rputdatafunc hook. 1072 * 1073 * The sd_rputdatafunc hook transforms the entire message 1074 * before any bytes in it can be given to a client. So, rbytes 1075 * must be 0 if there is a hook. 1076 */ 1077 if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) && 1078 (stp->sd_rputdatafunc == NULL)) 1079 rbytes = uiop->uio_resid; 1080 1081 return (getq_noenab(q, rbytes)); 1082 } 1083 1084 /* 1085 * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'. 1086 * If the message does not fit in the uio the remainder of it is returned; 1087 * otherwise NULL is returned. Any embedded zero-length mblk_t's are 1088 * consumed, even if uio_resid reaches zero. On error, `*errorp' is set to 1089 * the error code, the message is consumed, and NULL is returned. 1090 */ 1091 static mblk_t * 1092 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp) 1093 { 1094 int error; 1095 ptrdiff_t n; 1096 mblk_t *nbp; 1097 1098 ASSERT(bp->b_wptr >= bp->b_rptr); 1099 1100 do { 1101 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) { 1102 ASSERT(n > 0); 1103 1104 error = uiomove(bp->b_rptr, n, UIO_READ, uiop); 1105 if (error != 0) { 1106 freemsg(bp); 1107 *errorp = error; 1108 return (NULL); 1109 } 1110 } 1111 1112 bp->b_rptr += n; 1113 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) { 1114 nbp = bp; 1115 bp = bp->b_cont; 1116 freeb(nbp); 1117 } 1118 } while (bp != NULL && uiop->uio_resid > 0); 1119 1120 *errorp = 0; 1121 return (bp); 1122 } 1123 1124 /* 1125 * Read a stream according to the mode flags in sd_flag: 1126 * 1127 * (default mode) - Byte stream, msg boundaries are ignored 1128 * RD_MSGDIS (msg discard) - Read on msg boundaries and throw away 1129 * any data remaining in msg 1130 * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back 1131 * any remaining data on head of read queue 1132 * 1133 * Consume readable messages on the front of the queue until 1134 * ttolwp(curthread)->lwp_count 1135 * is satisfied, the readable messages are exhausted, or a message 1136 * boundary is reached in a message mode. If no data was read and 1137 * the stream was not opened with the NDELAY flag, block until data arrives. 1138 * Otherwise return the data read and update the count. 1139 * 1140 * In default mode a 0 length message signifies end-of-file and terminates 1141 * a read in progress. The 0 length message is removed from the queue 1142 * only if it is the only message read (no data is read). 1143 * 1144 * An attempt to read an M_PROTO or M_PCPROTO message results in an 1145 * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set. 1146 * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data. 1147 * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message 1148 * are unlinked from and M_DATA blocks in the message, the protos are 1149 * thrown away, and the data is read. 1150 */ 1151 /* ARGSUSED */ 1152 int 1153 strread(struct vnode *vp, struct uio *uiop, cred_t *crp) 1154 { 1155 struct stdata *stp; 1156 mblk_t *bp, *nbp; 1157 queue_t *q; 1158 int error = 0; 1159 uint_t old_sd_flag; 1160 int first; 1161 char rflg; 1162 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 1163 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 1164 short delim; 1165 unsigned char pri = 0; 1166 char waitflag; 1167 unsigned char type; 1168 1169 TRACE_1(TR_FAC_STREAMS_FR, 1170 TR_STRREAD_ENTER, "strread:%p", vp); 1171 ASSERT(vp->v_stream); 1172 stp = vp->v_stream; 1173 1174 mutex_enter(&stp->sd_lock); 1175 1176 if ((error = i_straccess(stp, JCREAD)) != 0) { 1177 mutex_exit(&stp->sd_lock); 1178 return (error); 1179 } 1180 1181 if (stp->sd_flag & (STRDERR|STPLEX)) { 1182 error = strgeterr(stp, STRDERR|STPLEX, 0); 1183 if (error != 0) { 1184 mutex_exit(&stp->sd_lock); 1185 return (error); 1186 } 1187 } 1188 1189 /* 1190 * Loop terminates when uiop->uio_resid == 0. 1191 */ 1192 rflg = 0; 1193 waitflag = READWAIT; 1194 q = _RD(stp->sd_wrq); 1195 for (;;) { 1196 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1197 old_sd_flag = stp->sd_flag; 1198 mark = 0; 1199 delim = 0; 1200 first = 1; 1201 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) { 1202 int done = 0; 1203 1204 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1205 1206 if (error != 0) 1207 goto oops; 1208 1209 if (stp->sd_flag & (STRHUP|STREOF)) { 1210 goto oops; 1211 } 1212 if (rflg && !(stp->sd_flag & STRDELIM)) { 1213 goto oops; 1214 } 1215 /* 1216 * If a read(fd,buf,0) has been done, there is no 1217 * need to sleep. We always have zero bytes to 1218 * return. 1219 */ 1220 if (uiop->uio_resid == 0) { 1221 goto oops; 1222 } 1223 1224 qbackenable(q, 0); 1225 1226 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT, 1227 "strread calls strwaitq:%p, %p, %p", 1228 vp, uiop, crp); 1229 if ((error = strwaitq(stp, waitflag, uiop->uio_resid, 1230 uiop->uio_fmode, -1, &done)) != 0 || done) { 1231 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE, 1232 "strread error or done:%p, %p, %p", 1233 vp, uiop, crp); 1234 if ((uiop->uio_fmode & FNDELAY) && 1235 (stp->sd_flag & OLDNDELAY) && 1236 (error == EAGAIN)) 1237 error = 0; 1238 goto oops; 1239 } 1240 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE, 1241 "strread awakes:%p, %p, %p", vp, uiop, crp); 1242 if ((error = i_straccess(stp, JCREAD)) != 0) { 1243 goto oops; 1244 } 1245 first = 0; 1246 } 1247 1248 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1249 ASSERT(bp); 1250 pri = bp->b_band; 1251 /* 1252 * Extract any mark information. If the message is not 1253 * completely consumed this information will be put in the mblk 1254 * that is putback. 1255 * If MSGMARKNEXT is set and the message is completely consumed 1256 * the STRATMARK flag will be set below. Likewise, if 1257 * MSGNOTMARKNEXT is set and the message is 1258 * completely consumed STRNOTATMARK will be set. 1259 * 1260 * For some unknown reason strread only breaks the read at the 1261 * last mark. 1262 */ 1263 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 1264 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 1265 (MSGMARKNEXT|MSGNOTMARKNEXT)); 1266 if (mark != 0 && bp == stp->sd_mark) { 1267 if (rflg) { 1268 putback(stp, q, bp, pri); 1269 goto oops; 1270 } 1271 mark |= _LASTMARK; 1272 stp->sd_mark = NULL; 1273 } 1274 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM)) 1275 delim = 1; 1276 mutex_exit(&stp->sd_lock); 1277 1278 if (STREAM_NEEDSERVICE(stp)) 1279 stream_runservice(stp); 1280 1281 type = bp->b_datap->db_type; 1282 1283 switch (type) { 1284 1285 case M_DATA: 1286 ismdata: 1287 if (msgnodata(bp)) { 1288 if (mark || delim) { 1289 freemsg(bp); 1290 } else if (rflg) { 1291 1292 /* 1293 * If already read data put zero 1294 * length message back on queue else 1295 * free msg and return 0. 1296 */ 1297 bp->b_band = pri; 1298 mutex_enter(&stp->sd_lock); 1299 putback(stp, q, bp, pri); 1300 mutex_exit(&stp->sd_lock); 1301 } else { 1302 freemsg(bp); 1303 } 1304 error = 0; 1305 goto oops1; 1306 } 1307 1308 rflg = 1; 1309 waitflag |= NOINTR; 1310 bp = struiocopyout(bp, uiop, &error); 1311 if (error != 0) 1312 goto oops1; 1313 1314 mutex_enter(&stp->sd_lock); 1315 if (bp) { 1316 /* 1317 * Have remaining data in message. 1318 * Free msg if in discard mode. 1319 */ 1320 if (stp->sd_read_opt & RD_MSGDIS) { 1321 freemsg(bp); 1322 } else { 1323 bp->b_band = pri; 1324 if ((mark & _LASTMARK) && 1325 (stp->sd_mark == NULL)) 1326 stp->sd_mark = bp; 1327 bp->b_flag |= mark & ~_LASTMARK; 1328 if (delim) 1329 bp->b_flag |= MSGDELIM; 1330 if (msgnodata(bp)) 1331 freemsg(bp); 1332 else 1333 putback(stp, q, bp, pri); 1334 } 1335 } else { 1336 /* 1337 * Consumed the complete message. 1338 * Move the MSG*MARKNEXT information 1339 * to the stream head just in case 1340 * the read queue becomes empty. 1341 * 1342 * If the stream head was at the mark 1343 * (STRATMARK) before we dropped sd_lock above 1344 * and some data was consumed then we have 1345 * moved past the mark thus STRATMARK is 1346 * cleared. However, if a message arrived in 1347 * strrput during the copyout above causing 1348 * STRATMARK to be set we can not clear that 1349 * flag. 1350 */ 1351 if (mark & 1352 (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 1353 if (mark & MSGMARKNEXT) { 1354 stp->sd_flag &= ~STRNOTATMARK; 1355 stp->sd_flag |= STRATMARK; 1356 } else if (mark & MSGNOTMARKNEXT) { 1357 stp->sd_flag &= ~STRATMARK; 1358 stp->sd_flag |= STRNOTATMARK; 1359 } else { 1360 stp->sd_flag &= 1361 ~(STRATMARK|STRNOTATMARK); 1362 } 1363 } else if (rflg && (old_sd_flag & STRATMARK)) { 1364 stp->sd_flag &= ~STRATMARK; 1365 } 1366 } 1367 1368 /* 1369 * Check for signal messages at the front of the read 1370 * queue and generate the signal(s) if appropriate. 1371 * The only signal that can be on queue is M_SIG at 1372 * this point. 1373 */ 1374 while ((((bp = q->q_first)) != NULL) && 1375 (bp->b_datap->db_type == M_SIG)) { 1376 bp = getq_noenab(q, 0); 1377 /* 1378 * sd_lock is held so the content of the 1379 * read queue can not change. 1380 */ 1381 ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG); 1382 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 1383 mutex_exit(&stp->sd_lock); 1384 freemsg(bp); 1385 if (STREAM_NEEDSERVICE(stp)) 1386 stream_runservice(stp); 1387 mutex_enter(&stp->sd_lock); 1388 } 1389 1390 if ((uiop->uio_resid == 0) || (mark & _LASTMARK) || 1391 delim || 1392 (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) { 1393 goto oops; 1394 } 1395 continue; 1396 1397 case M_SIG: 1398 strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band); 1399 freemsg(bp); 1400 mutex_enter(&stp->sd_lock); 1401 continue; 1402 1403 case M_PROTO: 1404 case M_PCPROTO: 1405 /* 1406 * Only data messages are readable. 1407 * Any others generate an error, unless 1408 * RD_PROTDIS or RD_PROTDAT is set. 1409 */ 1410 if (stp->sd_read_opt & RD_PROTDAT) { 1411 for (nbp = bp; nbp; nbp = nbp->b_next) { 1412 if ((nbp->b_datap->db_type == 1413 M_PROTO) || 1414 (nbp->b_datap->db_type == 1415 M_PCPROTO)) { 1416 nbp->b_datap->db_type = M_DATA; 1417 } else { 1418 break; 1419 } 1420 } 1421 /* 1422 * clear stream head hi pri flag based on 1423 * first message 1424 */ 1425 if (type == M_PCPROTO) { 1426 mutex_enter(&stp->sd_lock); 1427 stp->sd_flag &= ~STRPRI; 1428 mutex_exit(&stp->sd_lock); 1429 } 1430 goto ismdata; 1431 } else if (stp->sd_read_opt & RD_PROTDIS) { 1432 /* 1433 * discard non-data messages 1434 */ 1435 while (bp && 1436 ((bp->b_datap->db_type == M_PROTO) || 1437 (bp->b_datap->db_type == M_PCPROTO))) { 1438 nbp = unlinkb(bp); 1439 freeb(bp); 1440 bp = nbp; 1441 } 1442 /* 1443 * clear stream head hi pri flag based on 1444 * first message 1445 */ 1446 if (type == M_PCPROTO) { 1447 mutex_enter(&stp->sd_lock); 1448 stp->sd_flag &= ~STRPRI; 1449 mutex_exit(&stp->sd_lock); 1450 } 1451 if (bp) { 1452 bp->b_band = pri; 1453 goto ismdata; 1454 } else { 1455 break; 1456 } 1457 } 1458 /* FALLTHRU */ 1459 case M_PASSFP: 1460 if ((bp->b_datap->db_type == M_PASSFP) && 1461 (stp->sd_read_opt & RD_PROTDIS)) { 1462 freemsg(bp); 1463 break; 1464 } 1465 mutex_enter(&stp->sd_lock); 1466 putback(stp, q, bp, pri); 1467 mutex_exit(&stp->sd_lock); 1468 if (rflg == 0) 1469 error = EBADMSG; 1470 goto oops1; 1471 1472 default: 1473 /* 1474 * Garbage on stream head read queue. 1475 */ 1476 cmn_err(CE_WARN, "bad %x found at stream head\n", 1477 bp->b_datap->db_type); 1478 freemsg(bp); 1479 goto oops1; 1480 } 1481 mutex_enter(&stp->sd_lock); 1482 } 1483 oops: 1484 mutex_exit(&stp->sd_lock); 1485 oops1: 1486 qbackenable(q, pri); 1487 return (error); 1488 #undef _LASTMARK 1489 } 1490 1491 /* 1492 * Default processing of M_PROTO/M_PCPROTO messages. 1493 * Determine which wakeups and signals are needed. 1494 * This can be replaced by a user-specified procedure for kernel users 1495 * of STREAMS. 1496 */ 1497 /* ARGSUSED */ 1498 mblk_t * 1499 strrput_proto(vnode_t *vp, mblk_t *mp, 1500 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1501 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1502 { 1503 *wakeups = RSLEEP; 1504 *allmsgsigs = 0; 1505 1506 switch (mp->b_datap->db_type) { 1507 case M_PROTO: 1508 if (mp->b_band == 0) { 1509 *firstmsgsigs = S_INPUT | S_RDNORM; 1510 *pollwakeups = POLLIN | POLLRDNORM; 1511 } else { 1512 *firstmsgsigs = S_INPUT | S_RDBAND; 1513 *pollwakeups = POLLIN | POLLRDBAND; 1514 } 1515 break; 1516 case M_PCPROTO: 1517 *firstmsgsigs = S_HIPRI; 1518 *pollwakeups = POLLPRI; 1519 break; 1520 } 1521 return (mp); 1522 } 1523 1524 /* 1525 * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and 1526 * M_PASSFP messages. 1527 * Determine which wakeups and signals are needed. 1528 * This can be replaced by a user-specified procedure for kernel users 1529 * of STREAMS. 1530 */ 1531 /* ARGSUSED */ 1532 mblk_t * 1533 strrput_misc(vnode_t *vp, mblk_t *mp, 1534 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1535 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1536 { 1537 *wakeups = 0; 1538 *firstmsgsigs = 0; 1539 *allmsgsigs = 0; 1540 *pollwakeups = 0; 1541 return (mp); 1542 } 1543 1544 /* 1545 * Stream read put procedure. Called from downstream driver/module 1546 * with messages for the stream head. Data, protocol, and in-stream 1547 * signal messages are placed on the queue, others are handled directly. 1548 */ 1549 int 1550 strrput(queue_t *q, mblk_t *bp) 1551 { 1552 struct stdata *stp; 1553 ulong_t rput_opt; 1554 strwakeup_t wakeups; 1555 strsigset_t firstmsgsigs; /* Signals if first message on queue */ 1556 strsigset_t allmsgsigs; /* Signals for all messages */ 1557 strsigset_t signals; /* Signals events to generate */ 1558 strpollset_t pollwakeups; 1559 mblk_t *nextbp; 1560 uchar_t band = 0; 1561 int hipri_sig; 1562 1563 stp = (struct stdata *)q->q_ptr; 1564 /* 1565 * Use rput_opt for optimized access to the SR_ flags except 1566 * SR_POLLIN. That flag has to be checked under sd_lock since it 1567 * is modified by strpoll(). 1568 */ 1569 rput_opt = stp->sd_rput_opt; 1570 1571 ASSERT(qclaimed(q)); 1572 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER, 1573 "strrput called with message type:q %p bp %p", q, bp); 1574 1575 /* 1576 * Perform initial processing and pass to the parameterized functions. 1577 */ 1578 ASSERT(bp->b_next == NULL); 1579 1580 switch (bp->b_datap->db_type) { 1581 case M_DATA: 1582 /* 1583 * sockfs is the only consumer of STREOF and when it is set, 1584 * it implies that the receiver is not interested in receiving 1585 * any more data, hence the mblk is freed to prevent unnecessary 1586 * message queueing at the stream head. 1587 */ 1588 if (stp->sd_flag == STREOF) { 1589 freemsg(bp); 1590 return (0); 1591 } 1592 if ((rput_opt & SR_IGN_ZEROLEN) && 1593 bp->b_rptr == bp->b_wptr && msgnodata(bp)) { 1594 /* 1595 * Ignore zero-length M_DATA messages. These might be 1596 * generated by some transports. 1597 * The zero-length M_DATA messages, even if they 1598 * are ignored, should effect the atmark tracking and 1599 * should wake up a thread sleeping in strwaitmark. 1600 */ 1601 mutex_enter(&stp->sd_lock); 1602 if (bp->b_flag & MSGMARKNEXT) { 1603 /* 1604 * Record the position of the mark either 1605 * in q_last or in STRATMARK. 1606 */ 1607 if (q->q_last != NULL) { 1608 q->q_last->b_flag &= ~MSGNOTMARKNEXT; 1609 q->q_last->b_flag |= MSGMARKNEXT; 1610 } else { 1611 stp->sd_flag &= ~STRNOTATMARK; 1612 stp->sd_flag |= STRATMARK; 1613 } 1614 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1615 /* 1616 * Record that this is not the position of 1617 * the mark either in q_last or in 1618 * STRNOTATMARK. 1619 */ 1620 if (q->q_last != NULL) { 1621 q->q_last->b_flag &= ~MSGMARKNEXT; 1622 q->q_last->b_flag |= MSGNOTMARKNEXT; 1623 } else { 1624 stp->sd_flag &= ~STRATMARK; 1625 stp->sd_flag |= STRNOTATMARK; 1626 } 1627 } 1628 if (stp->sd_flag & RSLEEP) { 1629 stp->sd_flag &= ~RSLEEP; 1630 cv_broadcast(&q->q_wait); 1631 } 1632 mutex_exit(&stp->sd_lock); 1633 freemsg(bp); 1634 return (0); 1635 } 1636 wakeups = RSLEEP; 1637 if (bp->b_band == 0) { 1638 firstmsgsigs = S_INPUT | S_RDNORM; 1639 pollwakeups = POLLIN | POLLRDNORM; 1640 } else { 1641 firstmsgsigs = S_INPUT | S_RDBAND; 1642 pollwakeups = POLLIN | POLLRDBAND; 1643 } 1644 if (rput_opt & SR_SIGALLDATA) 1645 allmsgsigs = firstmsgsigs; 1646 else 1647 allmsgsigs = 0; 1648 1649 mutex_enter(&stp->sd_lock); 1650 if ((rput_opt & SR_CONSOL_DATA) && 1651 (q->q_last != NULL) && 1652 (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) { 1653 /* 1654 * Consolidate an M_DATA message onto an M_DATA, 1655 * M_PROTO, or M_PCPROTO by merging it with q_last. 1656 * The consolidation does not take place if 1657 * the old message is marked with either of the 1658 * marks or the delim flag or if the new 1659 * message is marked with MSGMARK. The MSGMARK 1660 * check is needed to handle the odd semantics of 1661 * MSGMARK where essentially the whole message 1662 * is to be treated as marked. 1663 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from the 1664 * new message to the front of the b_cont chain. 1665 */ 1666 mblk_t *lbp = q->q_last; 1667 unsigned char db_type = lbp->b_datap->db_type; 1668 1669 if ((db_type == M_DATA || db_type == M_PROTO || 1670 db_type == M_PCPROTO) && 1671 !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) { 1672 rmvq_noenab(q, lbp); 1673 /* 1674 * The first message in the b_cont list 1675 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 1676 * We need to handle the case where we 1677 * are appending: 1678 * 1679 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 1680 * 2) a MSGMARKNEXT to a plain message. 1681 * 3) a MSGNOTMARKNEXT to a plain message 1682 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 1683 * message. 1684 * 1685 * Thus we never append a MSGMARKNEXT or 1686 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 1687 */ 1688 if (bp->b_flag & MSGMARKNEXT) { 1689 lbp->b_flag |= MSGMARKNEXT; 1690 lbp->b_flag &= ~MSGNOTMARKNEXT; 1691 bp->b_flag &= ~MSGMARKNEXT; 1692 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1693 lbp->b_flag |= MSGNOTMARKNEXT; 1694 bp->b_flag &= ~MSGNOTMARKNEXT; 1695 } 1696 1697 linkb(lbp, bp); 1698 bp = lbp; 1699 /* 1700 * The new message logically isn't the first 1701 * even though the q_first check below thinks 1702 * it is. Clear the firstmsgsigs to make it 1703 * not appear to be first. 1704 */ 1705 firstmsgsigs = 0; 1706 } 1707 } 1708 break; 1709 1710 case M_PASSFP: 1711 wakeups = RSLEEP; 1712 allmsgsigs = 0; 1713 if (bp->b_band == 0) { 1714 firstmsgsigs = S_INPUT | S_RDNORM; 1715 pollwakeups = POLLIN | POLLRDNORM; 1716 } else { 1717 firstmsgsigs = S_INPUT | S_RDBAND; 1718 pollwakeups = POLLIN | POLLRDBAND; 1719 } 1720 mutex_enter(&stp->sd_lock); 1721 break; 1722 1723 case M_PROTO: 1724 case M_PCPROTO: 1725 ASSERT(stp->sd_rprotofunc != NULL); 1726 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp, 1727 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1728 #define ALLSIG (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\ 1729 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG) 1730 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\ 1731 POLLWRBAND) 1732 1733 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1734 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1735 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1736 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1737 1738 mutex_enter(&stp->sd_lock); 1739 break; 1740 1741 default: 1742 ASSERT(stp->sd_rmiscfunc != NULL); 1743 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp, 1744 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1745 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1746 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1747 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1748 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1749 #undef ALLSIG 1750 #undef ALLPOLL 1751 mutex_enter(&stp->sd_lock); 1752 break; 1753 } 1754 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1755 1756 /* By default generate superset of signals */ 1757 signals = (firstmsgsigs | allmsgsigs); 1758 1759 /* 1760 * The proto and misc functions can return multiple messages 1761 * as a b_next chain. Such messages are processed separately. 1762 */ 1763 one_more: 1764 hipri_sig = 0; 1765 if (bp == NULL) { 1766 nextbp = NULL; 1767 } else { 1768 nextbp = bp->b_next; 1769 bp->b_next = NULL; 1770 1771 switch (bp->b_datap->db_type) { 1772 case M_PCPROTO: 1773 /* 1774 * Only one priority protocol message is allowed at the 1775 * stream head at a time. 1776 */ 1777 if (stp->sd_flag & STRPRI) { 1778 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR, 1779 "M_PCPROTO already at head"); 1780 freemsg(bp); 1781 mutex_exit(&stp->sd_lock); 1782 goto done; 1783 } 1784 stp->sd_flag |= STRPRI; 1785 hipri_sig = 1; 1786 /* FALLTHRU */ 1787 case M_DATA: 1788 case M_PROTO: 1789 case M_PASSFP: 1790 band = bp->b_band; 1791 /* 1792 * Marking doesn't work well when messages 1793 * are marked in more than one band. We only 1794 * remember the last message received, even if 1795 * it is placed on the queue ahead of other 1796 * marked messages. 1797 */ 1798 if (bp->b_flag & MSGMARK) 1799 stp->sd_mark = bp; 1800 (void) putq(q, bp); 1801 1802 /* 1803 * If message is a PCPROTO message, always use 1804 * firstmsgsigs to determine if a signal should be 1805 * sent as strrput is the only place to send 1806 * signals for PCPROTO. Other messages are based on 1807 * the STRGETINPROG flag. The flag determines if 1808 * strrput or (k)strgetmsg will be responsible for 1809 * sending the signals, in the firstmsgsigs case. 1810 */ 1811 if ((hipri_sig == 1) || 1812 (((stp->sd_flag & STRGETINPROG) == 0) && 1813 (q->q_first == bp))) 1814 signals = (firstmsgsigs | allmsgsigs); 1815 else 1816 signals = allmsgsigs; 1817 break; 1818 1819 default: 1820 mutex_exit(&stp->sd_lock); 1821 (void) strrput_nondata(q, bp); 1822 mutex_enter(&stp->sd_lock); 1823 break; 1824 } 1825 } 1826 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1827 /* 1828 * Wake sleeping read/getmsg and cancel deferred wakeup 1829 */ 1830 if (wakeups & RSLEEP) 1831 stp->sd_wakeq &= ~RSLEEP; 1832 1833 wakeups &= stp->sd_flag; 1834 if (wakeups & RSLEEP) { 1835 stp->sd_flag &= ~RSLEEP; 1836 cv_broadcast(&q->q_wait); 1837 } 1838 if (wakeups & WSLEEP) { 1839 stp->sd_flag &= ~WSLEEP; 1840 cv_broadcast(&_WR(q)->q_wait); 1841 } 1842 1843 if (pollwakeups != 0) { 1844 if (pollwakeups == (POLLIN | POLLRDNORM)) { 1845 /* 1846 * Can't use rput_opt since it was not 1847 * read when sd_lock was held and SR_POLLIN is changed 1848 * by strpoll() under sd_lock. 1849 */ 1850 if (!(stp->sd_rput_opt & SR_POLLIN)) 1851 goto no_pollwake; 1852 stp->sd_rput_opt &= ~SR_POLLIN; 1853 } 1854 mutex_exit(&stp->sd_lock); 1855 pollwakeup(&stp->sd_pollist, pollwakeups); 1856 mutex_enter(&stp->sd_lock); 1857 } 1858 no_pollwake: 1859 1860 /* 1861 * strsendsig can handle multiple signals with a 1862 * single call. 1863 */ 1864 if (stp->sd_sigflags & signals) 1865 strsendsig(stp->sd_siglist, signals, band, 0); 1866 mutex_exit(&stp->sd_lock); 1867 1868 1869 done: 1870 if (nextbp == NULL) 1871 return (0); 1872 1873 /* 1874 * Any signals were handled the first time. 1875 * Wakeups and pollwakeups are redone to avoid any race 1876 * conditions - all the messages are not queued until the 1877 * last message has been processed by strrput. 1878 */ 1879 bp = nextbp; 1880 signals = firstmsgsigs = allmsgsigs = 0; 1881 mutex_enter(&stp->sd_lock); 1882 goto one_more; 1883 } 1884 1885 static void 1886 log_dupioc(queue_t *rq, mblk_t *bp) 1887 { 1888 queue_t *wq, *qp; 1889 char *modnames, *mnp, *dname; 1890 size_t maxmodstr; 1891 boolean_t islast; 1892 1893 /* 1894 * Allocate a buffer large enough to hold the names of nstrpush modules 1895 * and one driver, with spaces between and NUL terminator. If we can't 1896 * get memory, then we'll just log the driver name. 1897 */ 1898 maxmodstr = nstrpush * (FMNAMESZ + 1); 1899 mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP); 1900 1901 /* march down write side to print log message down to the driver */ 1902 wq = WR(rq); 1903 1904 /* make sure q_next doesn't shift around while we're grabbing data */ 1905 claimstr(wq); 1906 qp = wq->q_next; 1907 do { 1908 dname = Q2NAME(qp); 1909 islast = !SAMESTR(qp) || qp->q_next == NULL; 1910 if (modnames == NULL) { 1911 /* 1912 * If we don't have memory, then get the driver name in 1913 * the log where we can see it. Note that memory 1914 * pressure is a possible cause of these sorts of bugs. 1915 */ 1916 if (islast) { 1917 modnames = dname; 1918 maxmodstr = 0; 1919 } 1920 } else { 1921 mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname); 1922 if (!islast) 1923 *mnp++ = ' '; 1924 } 1925 qp = qp->q_next; 1926 } while (!islast); 1927 releasestr(wq); 1928 /* Cannot happen unless stream head is corrupt. */ 1929 ASSERT(modnames != NULL); 1930 (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1, 1931 SL_CONSOLE|SL_TRACE|SL_ERROR, 1932 "Warning: stream %p received duplicate %X M_IOC%s; module list: %s", 1933 rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd, 1934 (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames); 1935 if (maxmodstr != 0) 1936 kmem_free(modnames, maxmodstr); 1937 } 1938 1939 int 1940 strrput_nondata(queue_t *q, mblk_t *bp) 1941 { 1942 struct stdata *stp; 1943 struct iocblk *iocbp; 1944 struct stroptions *sop; 1945 struct copyreq *reqp; 1946 struct copyresp *resp; 1947 unsigned char bpri; 1948 unsigned char flushed_already = 0; 1949 1950 stp = (struct stdata *)q->q_ptr; 1951 1952 ASSERT(!(stp->sd_flag & STPLEX)); 1953 ASSERT(qclaimed(q)); 1954 1955 switch (bp->b_datap->db_type) { 1956 case M_ERROR: 1957 /* 1958 * An error has occurred downstream, the errno is in the first 1959 * bytes of the message. 1960 */ 1961 if ((bp->b_wptr - bp->b_rptr) == 2) { /* New flavor */ 1962 unsigned char rw = 0; 1963 1964 mutex_enter(&stp->sd_lock); 1965 if (*bp->b_rptr != NOERROR) { /* read error */ 1966 if (*bp->b_rptr != 0) { 1967 if (stp->sd_flag & STRDERR) 1968 flushed_already |= FLUSHR; 1969 stp->sd_flag |= STRDERR; 1970 rw |= FLUSHR; 1971 } else { 1972 stp->sd_flag &= ~STRDERR; 1973 } 1974 stp->sd_rerror = *bp->b_rptr; 1975 } 1976 bp->b_rptr++; 1977 if (*bp->b_rptr != NOERROR) { /* write error */ 1978 if (*bp->b_rptr != 0) { 1979 if (stp->sd_flag & STWRERR) 1980 flushed_already |= FLUSHW; 1981 stp->sd_flag |= STWRERR; 1982 rw |= FLUSHW; 1983 } else { 1984 stp->sd_flag &= ~STWRERR; 1985 } 1986 stp->sd_werror = *bp->b_rptr; 1987 } 1988 if (rw) { 1989 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE, 1990 "strrput cv_broadcast:q %p, bp %p", 1991 q, bp); 1992 cv_broadcast(&q->q_wait); /* readers */ 1993 cv_broadcast(&_WR(q)->q_wait); /* writers */ 1994 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 1995 1996 mutex_exit(&stp->sd_lock); 1997 pollwakeup(&stp->sd_pollist, POLLERR); 1998 mutex_enter(&stp->sd_lock); 1999 2000 if (stp->sd_sigflags & S_ERROR) 2001 strsendsig(stp->sd_siglist, S_ERROR, 0, 2002 ((rw & FLUSHR) ? stp->sd_rerror : 2003 stp->sd_werror)); 2004 mutex_exit(&stp->sd_lock); 2005 /* 2006 * Send the M_FLUSH only 2007 * for the first M_ERROR 2008 * message on the stream 2009 */ 2010 if (flushed_already == rw) { 2011 freemsg(bp); 2012 return (0); 2013 } 2014 2015 bp->b_datap->db_type = M_FLUSH; 2016 *bp->b_rptr = rw; 2017 bp->b_wptr = bp->b_rptr + 1; 2018 /* 2019 * Protect against the driver 2020 * passing up messages after 2021 * it has done a qprocsoff 2022 */ 2023 if (_OTHERQ(q)->q_next == NULL) 2024 freemsg(bp); 2025 else 2026 qreply(q, bp); 2027 return (0); 2028 } else 2029 mutex_exit(&stp->sd_lock); 2030 } else if (*bp->b_rptr != 0) { /* Old flavor */ 2031 if (stp->sd_flag & (STRDERR|STWRERR)) 2032 flushed_already = FLUSHRW; 2033 mutex_enter(&stp->sd_lock); 2034 stp->sd_flag |= (STRDERR|STWRERR); 2035 stp->sd_rerror = *bp->b_rptr; 2036 stp->sd_werror = *bp->b_rptr; 2037 TRACE_2(TR_FAC_STREAMS_FR, 2038 TR_STRRPUT_WAKE2, 2039 "strrput wakeup #2:q %p, bp %p", q, bp); 2040 cv_broadcast(&q->q_wait); /* the readers */ 2041 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2042 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 2043 2044 mutex_exit(&stp->sd_lock); 2045 pollwakeup(&stp->sd_pollist, POLLERR); 2046 mutex_enter(&stp->sd_lock); 2047 2048 if (stp->sd_sigflags & S_ERROR) 2049 strsendsig(stp->sd_siglist, S_ERROR, 0, 2050 (stp->sd_werror ? stp->sd_werror : 2051 stp->sd_rerror)); 2052 mutex_exit(&stp->sd_lock); 2053 2054 /* 2055 * Send the M_FLUSH only 2056 * for the first M_ERROR 2057 * message on the stream 2058 */ 2059 if (flushed_already != FLUSHRW) { 2060 bp->b_datap->db_type = M_FLUSH; 2061 *bp->b_rptr = FLUSHRW; 2062 /* 2063 * Protect against the driver passing up 2064 * messages after it has done a 2065 * qprocsoff. 2066 */ 2067 if (_OTHERQ(q)->q_next == NULL) 2068 freemsg(bp); 2069 else 2070 qreply(q, bp); 2071 return (0); 2072 } 2073 } 2074 freemsg(bp); 2075 return (0); 2076 2077 case M_HANGUP: 2078 2079 freemsg(bp); 2080 mutex_enter(&stp->sd_lock); 2081 stp->sd_werror = ENXIO; 2082 stp->sd_flag |= STRHUP; 2083 stp->sd_flag &= ~(WSLEEP|RSLEEP); 2084 2085 /* 2086 * send signal if controlling tty 2087 */ 2088 2089 if (stp->sd_sidp) { 2090 prsignal(stp->sd_sidp, SIGHUP); 2091 if (stp->sd_sidp != stp->sd_pgidp) 2092 pgsignal(stp->sd_pgidp, SIGTSTP); 2093 } 2094 2095 /* 2096 * wake up read, write, and exception pollers and 2097 * reset wakeup mechanism. 2098 */ 2099 cv_broadcast(&q->q_wait); /* the readers */ 2100 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2101 cv_broadcast(&stp->sd_monitor); /* the ioctllers */ 2102 strhup(stp); 2103 mutex_exit(&stp->sd_lock); 2104 return (0); 2105 2106 case M_UNHANGUP: 2107 freemsg(bp); 2108 mutex_enter(&stp->sd_lock); 2109 stp->sd_werror = 0; 2110 stp->sd_flag &= ~STRHUP; 2111 mutex_exit(&stp->sd_lock); 2112 return (0); 2113 2114 case M_SIG: 2115 /* 2116 * Someone downstream wants to post a signal. The 2117 * signal to post is contained in the first byte of the 2118 * message. If the message would go on the front of 2119 * the queue, send a signal to the process group 2120 * (if not SIGPOLL) or to the siglist processes 2121 * (SIGPOLL). If something is already on the queue, 2122 * OR if we are delivering a delayed suspend (*sigh* 2123 * another "tty" hack) and there's no one sleeping already, 2124 * just enqueue the message. 2125 */ 2126 mutex_enter(&stp->sd_lock); 2127 if (q->q_first || (*bp->b_rptr == SIGTSTP && 2128 !(stp->sd_flag & RSLEEP))) { 2129 (void) putq(q, bp); 2130 mutex_exit(&stp->sd_lock); 2131 return (0); 2132 } 2133 mutex_exit(&stp->sd_lock); 2134 /* FALLTHRU */ 2135 2136 case M_PCSIG: 2137 /* 2138 * Don't enqueue, just post the signal. 2139 */ 2140 strsignal(stp, *bp->b_rptr, 0L); 2141 freemsg(bp); 2142 return (0); 2143 2144 case M_CMD: 2145 if (MBLKL(bp) != sizeof (cmdblk_t)) { 2146 freemsg(bp); 2147 return (0); 2148 } 2149 2150 mutex_enter(&stp->sd_lock); 2151 if (stp->sd_flag & STRCMDWAIT) { 2152 ASSERT(stp->sd_cmdblk == NULL); 2153 stp->sd_cmdblk = bp; 2154 cv_broadcast(&stp->sd_monitor); 2155 mutex_exit(&stp->sd_lock); 2156 } else { 2157 mutex_exit(&stp->sd_lock); 2158 freemsg(bp); 2159 } 2160 return (0); 2161 2162 case M_FLUSH: 2163 /* 2164 * Flush queues. The indication of which queues to flush 2165 * is in the first byte of the message. If the read queue 2166 * is specified, then flush it. If FLUSHBAND is set, just 2167 * flush the band specified by the second byte of the message. 2168 * 2169 * If a module has issued a M_SETOPT to not flush hi 2170 * priority messages off of the stream head, then pass this 2171 * flag into the flushq code to preserve such messages. 2172 */ 2173 2174 if (*bp->b_rptr & FLUSHR) { 2175 mutex_enter(&stp->sd_lock); 2176 if (*bp->b_rptr & FLUSHBAND) { 2177 ASSERT((bp->b_wptr - bp->b_rptr) >= 2); 2178 flushband(q, *(bp->b_rptr + 1), FLUSHALL); 2179 } else 2180 flushq_common(q, FLUSHALL, 2181 stp->sd_read_opt & RFLUSHPCPROT); 2182 if ((q->q_first == NULL) || 2183 (q->q_first->b_datap->db_type < QPCTL)) 2184 stp->sd_flag &= ~STRPRI; 2185 else { 2186 ASSERT(stp->sd_flag & STRPRI); 2187 } 2188 mutex_exit(&stp->sd_lock); 2189 } 2190 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 2191 *bp->b_rptr &= ~FLUSHR; 2192 bp->b_flag |= MSGNOLOOP; 2193 /* 2194 * Protect against the driver passing up 2195 * messages after it has done a qprocsoff. 2196 */ 2197 if (_OTHERQ(q)->q_next == NULL) 2198 freemsg(bp); 2199 else 2200 qreply(q, bp); 2201 return (0); 2202 } 2203 freemsg(bp); 2204 return (0); 2205 2206 case M_IOCACK: 2207 case M_IOCNAK: 2208 iocbp = (struct iocblk *)bp->b_rptr; 2209 /* 2210 * If not waiting for ACK or NAK then just free msg. 2211 * If incorrect id sequence number then just free msg. 2212 * If already have ACK or NAK for user then this is a 2213 * duplicate, display a warning and free the msg. 2214 */ 2215 mutex_enter(&stp->sd_lock); 2216 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2217 (stp->sd_iocid != iocbp->ioc_id)) { 2218 /* 2219 * If the ACK/NAK is a dup, display a message 2220 * Dup is when sd_iocid == ioc_id, and 2221 * sd_iocblk == <valid ptr> or -1 (the former 2222 * is when an ioctl has been put on the stream 2223 * head, but has not yet been consumed, the 2224 * later is when it has been consumed). 2225 */ 2226 if ((stp->sd_iocid == iocbp->ioc_id) && 2227 (stp->sd_iocblk != NULL)) { 2228 log_dupioc(q, bp); 2229 } 2230 freemsg(bp); 2231 mutex_exit(&stp->sd_lock); 2232 return (0); 2233 } 2234 2235 /* 2236 * Assign ACK or NAK to user and wake up. 2237 */ 2238 stp->sd_iocblk = bp; 2239 cv_broadcast(&stp->sd_monitor); 2240 mutex_exit(&stp->sd_lock); 2241 return (0); 2242 2243 case M_COPYIN: 2244 case M_COPYOUT: 2245 reqp = (struct copyreq *)bp->b_rptr; 2246 2247 /* 2248 * If not waiting for ACK or NAK then just fail request. 2249 * If already have ACK, NAK, or copy request, then just 2250 * fail request. 2251 * If incorrect id sequence number then just fail request. 2252 */ 2253 mutex_enter(&stp->sd_lock); 2254 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2255 (stp->sd_iocid != reqp->cq_id)) { 2256 if (bp->b_cont) { 2257 freemsg(bp->b_cont); 2258 bp->b_cont = NULL; 2259 } 2260 bp->b_datap->db_type = M_IOCDATA; 2261 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 2262 resp = (struct copyresp *)bp->b_rptr; 2263 resp->cp_rval = (caddr_t)1; /* failure */ 2264 mutex_exit(&stp->sd_lock); 2265 putnext(stp->sd_wrq, bp); 2266 return (0); 2267 } 2268 2269 /* 2270 * Assign copy request to user and wake up. 2271 */ 2272 stp->sd_iocblk = bp; 2273 cv_broadcast(&stp->sd_monitor); 2274 mutex_exit(&stp->sd_lock); 2275 return (0); 2276 2277 case M_SETOPTS: 2278 /* 2279 * Set stream head options (read option, write offset, 2280 * min/max packet size, and/or high/low water marks for 2281 * the read side only). 2282 */ 2283 2284 bpri = 0; 2285 sop = (struct stroptions *)bp->b_rptr; 2286 mutex_enter(&stp->sd_lock); 2287 if (sop->so_flags & SO_READOPT) { 2288 switch (sop->so_readopt & RMODEMASK) { 2289 case RNORM: 2290 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 2291 break; 2292 2293 case RMSGD: 2294 stp->sd_read_opt = 2295 ((stp->sd_read_opt & ~RD_MSGNODIS) | 2296 RD_MSGDIS); 2297 break; 2298 2299 case RMSGN: 2300 stp->sd_read_opt = 2301 ((stp->sd_read_opt & ~RD_MSGDIS) | 2302 RD_MSGNODIS); 2303 break; 2304 } 2305 switch (sop->so_readopt & RPROTMASK) { 2306 case RPROTNORM: 2307 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 2308 break; 2309 2310 case RPROTDAT: 2311 stp->sd_read_opt = 2312 ((stp->sd_read_opt & ~RD_PROTDIS) | 2313 RD_PROTDAT); 2314 break; 2315 2316 case RPROTDIS: 2317 stp->sd_read_opt = 2318 ((stp->sd_read_opt & ~RD_PROTDAT) | 2319 RD_PROTDIS); 2320 break; 2321 } 2322 switch (sop->so_readopt & RFLUSHMASK) { 2323 case RFLUSHPCPROT: 2324 /* 2325 * This sets the stream head to NOT flush 2326 * M_PCPROTO messages. 2327 */ 2328 stp->sd_read_opt |= RFLUSHPCPROT; 2329 break; 2330 } 2331 } 2332 if (sop->so_flags & SO_ERROPT) { 2333 switch (sop->so_erropt & RERRMASK) { 2334 case RERRNORM: 2335 stp->sd_flag &= ~STRDERRNONPERSIST; 2336 break; 2337 case RERRNONPERSIST: 2338 stp->sd_flag |= STRDERRNONPERSIST; 2339 break; 2340 } 2341 switch (sop->so_erropt & WERRMASK) { 2342 case WERRNORM: 2343 stp->sd_flag &= ~STWRERRNONPERSIST; 2344 break; 2345 case WERRNONPERSIST: 2346 stp->sd_flag |= STWRERRNONPERSIST; 2347 break; 2348 } 2349 } 2350 if (sop->so_flags & SO_COPYOPT) { 2351 if (sop->so_copyopt & ZCVMSAFE) { 2352 stp->sd_copyflag |= STZCVMSAFE; 2353 stp->sd_copyflag &= ~STZCVMUNSAFE; 2354 } else if (sop->so_copyopt & ZCVMUNSAFE) { 2355 stp->sd_copyflag |= STZCVMUNSAFE; 2356 stp->sd_copyflag &= ~STZCVMSAFE; 2357 } 2358 2359 if (sop->so_copyopt & COPYCACHED) { 2360 stp->sd_copyflag |= STRCOPYCACHED; 2361 } 2362 } 2363 if (sop->so_flags & SO_WROFF) 2364 stp->sd_wroff = sop->so_wroff; 2365 if (sop->so_flags & SO_TAIL) 2366 stp->sd_tail = sop->so_tail; 2367 if (sop->so_flags & SO_MINPSZ) 2368 q->q_minpsz = sop->so_minpsz; 2369 if (sop->so_flags & SO_MAXPSZ) 2370 q->q_maxpsz = sop->so_maxpsz; 2371 if (sop->so_flags & SO_MAXBLK) 2372 stp->sd_maxblk = sop->so_maxblk; 2373 if (sop->so_flags & SO_HIWAT) { 2374 if (sop->so_flags & SO_BAND) { 2375 if (strqset(q, QHIWAT, 2376 sop->so_band, sop->so_hiwat)) { 2377 cmn_err(CE_WARN, "strrput: could not " 2378 "allocate qband\n"); 2379 } else { 2380 bpri = sop->so_band; 2381 } 2382 } else { 2383 q->q_hiwat = sop->so_hiwat; 2384 } 2385 } 2386 if (sop->so_flags & SO_LOWAT) { 2387 if (sop->so_flags & SO_BAND) { 2388 if (strqset(q, QLOWAT, 2389 sop->so_band, sop->so_lowat)) { 2390 cmn_err(CE_WARN, "strrput: could not " 2391 "allocate qband\n"); 2392 } else { 2393 bpri = sop->so_band; 2394 } 2395 } else { 2396 q->q_lowat = sop->so_lowat; 2397 } 2398 } 2399 if (sop->so_flags & SO_MREADON) 2400 stp->sd_flag |= SNDMREAD; 2401 if (sop->so_flags & SO_MREADOFF) 2402 stp->sd_flag &= ~SNDMREAD; 2403 if (sop->so_flags & SO_NDELON) 2404 stp->sd_flag |= OLDNDELAY; 2405 if (sop->so_flags & SO_NDELOFF) 2406 stp->sd_flag &= ~OLDNDELAY; 2407 if (sop->so_flags & SO_ISTTY) 2408 stp->sd_flag |= STRISTTY; 2409 if (sop->so_flags & SO_ISNTTY) 2410 stp->sd_flag &= ~STRISTTY; 2411 if (sop->so_flags & SO_TOSTOP) 2412 stp->sd_flag |= STRTOSTOP; 2413 if (sop->so_flags & SO_TONSTOP) 2414 stp->sd_flag &= ~STRTOSTOP; 2415 if (sop->so_flags & SO_DELIM) 2416 stp->sd_flag |= STRDELIM; 2417 if (sop->so_flags & SO_NODELIM) 2418 stp->sd_flag &= ~STRDELIM; 2419 2420 mutex_exit(&stp->sd_lock); 2421 freemsg(bp); 2422 2423 /* Check backenable in case the water marks changed */ 2424 qbackenable(q, bpri); 2425 return (0); 2426 2427 /* 2428 * The following set of cases deal with situations where two stream 2429 * heads are connected to each other (twisted streams). These messages 2430 * have no meaning at the stream head. 2431 */ 2432 case M_BREAK: 2433 case M_CTL: 2434 case M_DELAY: 2435 case M_START: 2436 case M_STOP: 2437 case M_IOCDATA: 2438 case M_STARTI: 2439 case M_STOPI: 2440 freemsg(bp); 2441 return (0); 2442 2443 case M_IOCTL: 2444 /* 2445 * Always NAK this condition 2446 * (makes no sense) 2447 * If there is one or more threads in the read side 2448 * rwnext we have to defer the nacking until that thread 2449 * returns (in strget). 2450 */ 2451 mutex_enter(&stp->sd_lock); 2452 if (stp->sd_struiodnak != 0) { 2453 /* 2454 * Defer NAK to the streamhead. Queue at the end 2455 * the list. 2456 */ 2457 mblk_t *mp = stp->sd_struionak; 2458 2459 while (mp && mp->b_next) 2460 mp = mp->b_next; 2461 if (mp) 2462 mp->b_next = bp; 2463 else 2464 stp->sd_struionak = bp; 2465 bp->b_next = NULL; 2466 mutex_exit(&stp->sd_lock); 2467 return (0); 2468 } 2469 mutex_exit(&stp->sd_lock); 2470 2471 bp->b_datap->db_type = M_IOCNAK; 2472 /* 2473 * Protect against the driver passing up 2474 * messages after it has done a qprocsoff. 2475 */ 2476 if (_OTHERQ(q)->q_next == NULL) 2477 freemsg(bp); 2478 else 2479 qreply(q, bp); 2480 return (0); 2481 2482 default: 2483 #ifdef DEBUG 2484 cmn_err(CE_WARN, 2485 "bad message type %x received at stream head\n", 2486 bp->b_datap->db_type); 2487 #endif 2488 freemsg(bp); 2489 return (0); 2490 } 2491 2492 /* NOTREACHED */ 2493 } 2494 2495 /* 2496 * Check if the stream pointed to by `stp' can be written to, and return an 2497 * error code if not. If `eiohup' is set, then return EIO if STRHUP is set. 2498 * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream, 2499 * then always return EPIPE and send a SIGPIPE to the invoking thread. 2500 */ 2501 static int 2502 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok) 2503 { 2504 int error; 2505 2506 ASSERT(MUTEX_HELD(&stp->sd_lock)); 2507 2508 /* 2509 * For modem support, POSIX states that on writes, EIO should 2510 * be returned if the stream has been hung up. 2511 */ 2512 if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP) 2513 error = EIO; 2514 else 2515 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0); 2516 2517 if (error != 0) { 2518 if (!(stp->sd_flag & STPLEX) && 2519 (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) { 2520 tsignal(curthread, SIGPIPE); 2521 error = EPIPE; 2522 } 2523 } 2524 2525 return (error); 2526 } 2527 2528 /* 2529 * Copyin and send data down a stream. 2530 * The caller will allocate and copyin any control part that precedes the 2531 * message and pass that in as mctl. 2532 * 2533 * Caller should *not* hold sd_lock. 2534 * When EWOULDBLOCK is returned the caller has to redo the canputnext 2535 * under sd_lock in order to avoid missing a backenabling wakeup. 2536 * 2537 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA. 2538 * 2539 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages. 2540 * For sync streams we can only ignore flow control by reverting to using 2541 * putnext. 2542 * 2543 * If sd_maxblk is less than *iosize this routine might return without 2544 * transferring all of *iosize. In all cases, on return *iosize will contain 2545 * the amount of data that was transferred. 2546 */ 2547 static int 2548 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize, 2549 int b_flag, int pri, int flags) 2550 { 2551 struiod_t uiod; 2552 mblk_t *mp; 2553 queue_t *wqp = stp->sd_wrq; 2554 int error = 0; 2555 ssize_t count = *iosize; 2556 2557 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 2558 2559 if (uiop != NULL && count >= 0) 2560 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0; 2561 2562 if (!(flags & STRUIO_POSTPONE)) { 2563 /* 2564 * Use regular canputnext, strmakedata, putnext sequence. 2565 */ 2566 if (pri == 0) { 2567 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2568 freemsg(mctl); 2569 return (EWOULDBLOCK); 2570 } 2571 } else { 2572 if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) { 2573 freemsg(mctl); 2574 return (EWOULDBLOCK); 2575 } 2576 } 2577 2578 if ((error = strmakedata(iosize, uiop, stp, flags, 2579 &mp)) != 0) { 2580 freemsg(mctl); 2581 /* 2582 * need to change return code to ENOMEM 2583 * so that this is not confused with 2584 * flow control, EAGAIN. 2585 */ 2586 2587 if (error == EAGAIN) 2588 return (ENOMEM); 2589 else 2590 return (error); 2591 } 2592 if (mctl != NULL) { 2593 if (mctl->b_cont == NULL) 2594 mctl->b_cont = mp; 2595 else if (mp != NULL) 2596 linkb(mctl, mp); 2597 mp = mctl; 2598 } else if (mp == NULL) 2599 return (0); 2600 2601 mp->b_flag |= b_flag; 2602 mp->b_band = (uchar_t)pri; 2603 2604 if (flags & MSG_IGNFLOW) { 2605 /* 2606 * XXX Hack: Don't get stuck running service 2607 * procedures. This is needed for sockfs when 2608 * sending the unbind message out of the rput 2609 * procedure - we don't want a put procedure 2610 * to run service procedures. 2611 */ 2612 putnext(wqp, mp); 2613 } else { 2614 stream_willservice(stp); 2615 putnext(wqp, mp); 2616 stream_runservice(stp); 2617 } 2618 return (0); 2619 } 2620 /* 2621 * Stream supports rwnext() for the write side. 2622 */ 2623 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) { 2624 freemsg(mctl); 2625 /* 2626 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled". 2627 */ 2628 return (error == EAGAIN ? ENOMEM : error); 2629 } 2630 if (mctl != NULL) { 2631 if (mctl->b_cont == NULL) 2632 mctl->b_cont = mp; 2633 else if (mp != NULL) 2634 linkb(mctl, mp); 2635 mp = mctl; 2636 } else if (mp == NULL) { 2637 return (0); 2638 } 2639 2640 mp->b_flag |= b_flag; 2641 mp->b_band = (uchar_t)pri; 2642 2643 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 2644 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 2645 uiod.d_uio.uio_offset = 0; 2646 uiod.d_mp = mp; 2647 error = rwnext(wqp, &uiod); 2648 if (! uiod.d_mp) { 2649 uioskip(uiop, *iosize); 2650 return (error); 2651 } 2652 ASSERT(mp == uiod.d_mp); 2653 if (error == EINVAL) { 2654 /* 2655 * The stream plumbing must have changed while 2656 * we were away, so just turn off rwnext()s. 2657 */ 2658 error = 0; 2659 } else if (error == EBUSY || error == EWOULDBLOCK) { 2660 /* 2661 * Couldn't enter a perimeter or took a page fault, 2662 * so fall-back to putnext(). 2663 */ 2664 error = 0; 2665 } else { 2666 freemsg(mp); 2667 return (error); 2668 } 2669 /* Have to check canput before consuming data from the uio */ 2670 if (pri == 0) { 2671 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2672 freemsg(mp); 2673 return (EWOULDBLOCK); 2674 } 2675 } else { 2676 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) { 2677 freemsg(mp); 2678 return (EWOULDBLOCK); 2679 } 2680 } 2681 ASSERT(mp == uiod.d_mp); 2682 /* Copyin data from the uio */ 2683 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) { 2684 freemsg(mp); 2685 return (error); 2686 } 2687 uioskip(uiop, *iosize); 2688 if (flags & MSG_IGNFLOW) { 2689 /* 2690 * XXX Hack: Don't get stuck running service procedures. 2691 * This is needed for sockfs when sending the unbind message 2692 * out of the rput procedure - we don't want a put procedure 2693 * to run service procedures. 2694 */ 2695 putnext(wqp, mp); 2696 } else { 2697 stream_willservice(stp); 2698 putnext(wqp, mp); 2699 stream_runservice(stp); 2700 } 2701 return (0); 2702 } 2703 2704 /* 2705 * Write attempts to break the write request into messages conforming 2706 * with the minimum and maximum packet sizes set downstream. 2707 * 2708 * Write will not block if downstream queue is full and 2709 * O_NDELAY is set, otherwise it will block waiting for the queue to get room. 2710 * 2711 * A write of zero bytes gets packaged into a zero length message and sent 2712 * downstream like any other message. 2713 * 2714 * If buffers of the requested sizes are not available, the write will 2715 * sleep until the buffers become available. 2716 * 2717 * Write (if specified) will supply a write offset in a message if it 2718 * makes sense. This can be specified by downstream modules as part of 2719 * a M_SETOPTS message. Write will not supply the write offset if it 2720 * cannot supply any data in a buffer. In other words, write will never 2721 * send down an empty packet due to a write offset. 2722 */ 2723 /* ARGSUSED2 */ 2724 int 2725 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp) 2726 { 2727 return (strwrite_common(vp, uiop, crp, 0)); 2728 } 2729 2730 /* ARGSUSED2 */ 2731 int 2732 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) 2733 { 2734 struct stdata *stp; 2735 struct queue *wqp; 2736 ssize_t rmin, rmax; 2737 ssize_t iosize; 2738 int waitflag; 2739 int tempmode; 2740 int error = 0; 2741 int b_flag; 2742 2743 ASSERT(vp->v_stream); 2744 stp = vp->v_stream; 2745 2746 mutex_enter(&stp->sd_lock); 2747 2748 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2749 mutex_exit(&stp->sd_lock); 2750 return (error); 2751 } 2752 2753 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 2754 error = strwriteable(stp, B_TRUE, B_TRUE); 2755 if (error != 0) { 2756 mutex_exit(&stp->sd_lock); 2757 return (error); 2758 } 2759 } 2760 2761 mutex_exit(&stp->sd_lock); 2762 2763 wqp = stp->sd_wrq; 2764 2765 /* get these values from them cached in the stream head */ 2766 rmin = stp->sd_qn_minpsz; 2767 rmax = stp->sd_qn_maxpsz; 2768 2769 /* 2770 * Check the min/max packet size constraints. If min packet size 2771 * is non-zero, the write cannot be split into multiple messages 2772 * and still guarantee the size constraints. 2773 */ 2774 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp); 2775 2776 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 2777 if (rmax == 0) { 2778 return (0); 2779 } 2780 if (rmin > 0) { 2781 if (uiop->uio_resid < rmin) { 2782 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2783 "strwrite out:q %p out %d error %d", 2784 wqp, 0, ERANGE); 2785 return (ERANGE); 2786 } 2787 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) { 2788 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2789 "strwrite out:q %p out %d error %d", 2790 wqp, 1, ERANGE); 2791 return (ERANGE); 2792 } 2793 } 2794 2795 /* 2796 * Do until count satisfied or error. 2797 */ 2798 waitflag = WRITEWAIT | wflag; 2799 if (stp->sd_flag & OLDNDELAY) 2800 tempmode = uiop->uio_fmode & ~FNDELAY; 2801 else 2802 tempmode = uiop->uio_fmode; 2803 2804 if (rmax == INFPSZ) 2805 rmax = uiop->uio_resid; 2806 2807 /* 2808 * Note that tempmode does not get used in strput/strmakedata 2809 * but only in strwaitq. The other routines use uio_fmode 2810 * unmodified. 2811 */ 2812 2813 /* LINTED: constant in conditional context */ 2814 while (1) { /* breaks when uio_resid reaches zero */ 2815 /* 2816 * Determine the size of the next message to be 2817 * packaged. May have to break write into several 2818 * messages based on max packet size. 2819 */ 2820 iosize = MIN(uiop->uio_resid, rmax); 2821 2822 /* 2823 * Put block downstream when flow control allows it. 2824 */ 2825 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize)) 2826 b_flag = MSGDELIM; 2827 else 2828 b_flag = 0; 2829 2830 for (;;) { 2831 int done = 0; 2832 2833 error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0); 2834 if (error == 0) 2835 break; 2836 if (error != EWOULDBLOCK) 2837 goto out; 2838 2839 mutex_enter(&stp->sd_lock); 2840 /* 2841 * Check for a missed wakeup. 2842 * Needed since strput did not hold sd_lock across 2843 * the canputnext. 2844 */ 2845 if (canputnext(wqp)) { 2846 /* Try again */ 2847 mutex_exit(&stp->sd_lock); 2848 continue; 2849 } 2850 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT, 2851 "strwrite wait:q %p wait", wqp); 2852 if ((error = strwaitq(stp, waitflag, (ssize_t)0, 2853 tempmode, -1, &done)) != 0 || done) { 2854 mutex_exit(&stp->sd_lock); 2855 if ((vp->v_type == VFIFO) && 2856 (uiop->uio_fmode & FNDELAY) && 2857 (error == EAGAIN)) 2858 error = 0; 2859 goto out; 2860 } 2861 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE, 2862 "strwrite wake:q %p awakes", wqp); 2863 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2864 mutex_exit(&stp->sd_lock); 2865 goto out; 2866 } 2867 mutex_exit(&stp->sd_lock); 2868 } 2869 waitflag |= NOINTR; 2870 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID, 2871 "strwrite resid:q %p uiop %p", wqp, uiop); 2872 if (uiop->uio_resid) { 2873 /* Recheck for errors - needed for sockets */ 2874 if ((stp->sd_wput_opt & SW_RECHECK_ERR) && 2875 (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 2876 mutex_enter(&stp->sd_lock); 2877 error = strwriteable(stp, B_FALSE, B_TRUE); 2878 mutex_exit(&stp->sd_lock); 2879 if (error != 0) 2880 return (error); 2881 } 2882 continue; 2883 } 2884 break; 2885 } 2886 out: 2887 /* 2888 * For historical reasons, applications expect EAGAIN when a data 2889 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN. 2890 */ 2891 if (error == ENOMEM) 2892 error = EAGAIN; 2893 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2894 "strwrite out:q %p out %d error %d", wqp, 2, error); 2895 return (error); 2896 } 2897 2898 /* 2899 * Stream head write service routine. 2900 * Its job is to wake up any sleeping writers when a queue 2901 * downstream needs data (part of the flow control in putq and getq). 2902 * It also must wake anyone sleeping on a poll(). 2903 * For stream head right below mux module, it must also invoke put procedure 2904 * of next downstream module. 2905 */ 2906 int 2907 strwsrv(queue_t *q) 2908 { 2909 struct stdata *stp; 2910 queue_t *tq; 2911 qband_t *qbp; 2912 int i; 2913 qband_t *myqbp; 2914 int isevent; 2915 unsigned char qbf[NBAND]; /* band flushing backenable flags */ 2916 2917 TRACE_1(TR_FAC_STREAMS_FR, 2918 TR_STRWSRV, "strwsrv:q %p", q); 2919 stp = (struct stdata *)q->q_ptr; 2920 ASSERT(qclaimed(q)); 2921 mutex_enter(&stp->sd_lock); 2922 ASSERT(!(stp->sd_flag & STPLEX)); 2923 2924 if (stp->sd_flag & WSLEEP) { 2925 stp->sd_flag &= ~WSLEEP; 2926 cv_broadcast(&q->q_wait); 2927 } 2928 mutex_exit(&stp->sd_lock); 2929 2930 /* The other end of a stream pipe went away. */ 2931 if ((tq = q->q_next) == NULL) { 2932 return (0); 2933 } 2934 2935 /* Find the next module forward that has a service procedure */ 2936 claimstr(q); 2937 tq = q->q_nfsrv; 2938 ASSERT(tq != NULL); 2939 2940 if ((q->q_flag & QBACK)) { 2941 if ((tq->q_flag & QFULL)) { 2942 mutex_enter(QLOCK(tq)); 2943 if (!(tq->q_flag & QFULL)) { 2944 mutex_exit(QLOCK(tq)); 2945 goto wakeup; 2946 } 2947 /* 2948 * The queue must have become full again. Set QWANTW 2949 * again so strwsrv will be back enabled when 2950 * the queue becomes non-full next time. 2951 */ 2952 tq->q_flag |= QWANTW; 2953 mutex_exit(QLOCK(tq)); 2954 } else { 2955 wakeup: 2956 pollwakeup(&stp->sd_pollist, POLLWRNORM); 2957 mutex_enter(&stp->sd_lock); 2958 if (stp->sd_sigflags & S_WRNORM) 2959 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 2960 mutex_exit(&stp->sd_lock); 2961 } 2962 } 2963 2964 isevent = 0; 2965 i = 1; 2966 bzero((caddr_t)qbf, NBAND); 2967 mutex_enter(QLOCK(tq)); 2968 if ((myqbp = q->q_bandp) != NULL) 2969 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) { 2970 ASSERT(myqbp); 2971 if ((myqbp->qb_flag & QB_BACK)) { 2972 if (qbp->qb_flag & QB_FULL) { 2973 /* 2974 * The band must have become full again. 2975 * Set QB_WANTW again so strwsrv will 2976 * be back enabled when the band becomes 2977 * non-full next time. 2978 */ 2979 qbp->qb_flag |= QB_WANTW; 2980 } else { 2981 isevent = 1; 2982 qbf[i] = 1; 2983 } 2984 } 2985 myqbp = myqbp->qb_next; 2986 i++; 2987 } 2988 mutex_exit(QLOCK(tq)); 2989 2990 if (isevent) { 2991 for (i = tq->q_nband; i; i--) { 2992 if (qbf[i]) { 2993 pollwakeup(&stp->sd_pollist, POLLWRBAND); 2994 mutex_enter(&stp->sd_lock); 2995 if (stp->sd_sigflags & S_WRBAND) 2996 strsendsig(stp->sd_siglist, S_WRBAND, 2997 (uchar_t)i, 0); 2998 mutex_exit(&stp->sd_lock); 2999 } 3000 } 3001 } 3002 3003 releasestr(q); 3004 return (0); 3005 } 3006 3007 /* 3008 * Special case of strcopyin/strcopyout for copying 3009 * struct strioctl that can deal with both data 3010 * models. 3011 */ 3012 3013 #ifdef _LP64 3014 3015 static int 3016 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3017 { 3018 struct strioctl32 strioc32; 3019 struct strioctl *striocp; 3020 3021 if (copyflag & U_TO_K) { 3022 ASSERT((copyflag & K_TO_K) == 0); 3023 3024 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3025 if (copyin(from, &strioc32, sizeof (strioc32))) 3026 return (EFAULT); 3027 3028 striocp = (struct strioctl *)to; 3029 striocp->ic_cmd = strioc32.ic_cmd; 3030 striocp->ic_timout = strioc32.ic_timout; 3031 striocp->ic_len = strioc32.ic_len; 3032 striocp->ic_dp = (char *)(uintptr_t)strioc32.ic_dp; 3033 3034 } else { /* NATIVE data model */ 3035 if (copyin(from, to, sizeof (struct strioctl))) { 3036 return (EFAULT); 3037 } else { 3038 return (0); 3039 } 3040 } 3041 } else { 3042 ASSERT(copyflag & K_TO_K); 3043 bcopy(from, to, sizeof (struct strioctl)); 3044 } 3045 return (0); 3046 } 3047 3048 static int 3049 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3050 { 3051 struct strioctl32 strioc32; 3052 struct strioctl *striocp; 3053 3054 if (copyflag & U_TO_K) { 3055 ASSERT((copyflag & K_TO_K) == 0); 3056 3057 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3058 striocp = (struct strioctl *)from; 3059 strioc32.ic_cmd = striocp->ic_cmd; 3060 strioc32.ic_timout = striocp->ic_timout; 3061 strioc32.ic_len = striocp->ic_len; 3062 strioc32.ic_dp = (caddr32_t)(uintptr_t)striocp->ic_dp; 3063 ASSERT((char *)(uintptr_t)strioc32.ic_dp == 3064 striocp->ic_dp); 3065 3066 if (copyout(&strioc32, to, sizeof (strioc32))) 3067 return (EFAULT); 3068 3069 } else { /* NATIVE data model */ 3070 if (copyout(from, to, sizeof (struct strioctl))) { 3071 return (EFAULT); 3072 } else { 3073 return (0); 3074 } 3075 } 3076 } else { 3077 ASSERT(copyflag & K_TO_K); 3078 bcopy(from, to, sizeof (struct strioctl)); 3079 } 3080 return (0); 3081 } 3082 3083 #else /* ! _LP64 */ 3084 3085 /* ARGSUSED2 */ 3086 static int 3087 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3088 { 3089 return (strcopyin(from, to, sizeof (struct strioctl), copyflag)); 3090 } 3091 3092 /* ARGSUSED2 */ 3093 static int 3094 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3095 { 3096 return (strcopyout(from, to, sizeof (struct strioctl), copyflag)); 3097 } 3098 3099 #endif /* _LP64 */ 3100 3101 /* 3102 * Determine type of job control semantics expected by user. The 3103 * possibilities are: 3104 * JCREAD - Behaves like read() on fd; send SIGTTIN 3105 * JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set 3106 * JCSETP - Sets a value in the stream; send SIGTTOU, ignore TOSTOP 3107 * JCGETP - Gets a value in the stream; no signals. 3108 * See straccess in strsubr.c for usage of these values. 3109 * 3110 * This routine also returns -1 for I_STR as a special case; the 3111 * caller must call again with the real ioctl number for 3112 * classification. 3113 */ 3114 static int 3115 job_control_type(int cmd) 3116 { 3117 switch (cmd) { 3118 case I_STR: 3119 return (-1); 3120 3121 case I_RECVFD: 3122 case I_E_RECVFD: 3123 return (JCREAD); 3124 3125 case I_FDINSERT: 3126 case I_SENDFD: 3127 return (JCWRITE); 3128 3129 case TCSETA: 3130 case TCSETAW: 3131 case TCSETAF: 3132 case TCSBRK: 3133 case TCXONC: 3134 case TCFLSH: 3135 case TCDSET: /* Obsolete */ 3136 case TIOCSWINSZ: 3137 case TCSETS: 3138 case TCSETSW: 3139 case TCSETSF: 3140 case TIOCSETD: 3141 case TIOCHPCL: 3142 case TIOCSETP: 3143 case TIOCSETN: 3144 case TIOCEXCL: 3145 case TIOCNXCL: 3146 case TIOCFLUSH: 3147 case TIOCSETC: 3148 case TIOCLBIS: 3149 case TIOCLBIC: 3150 case TIOCLSET: 3151 case TIOCSBRK: 3152 case TIOCCBRK: 3153 case TIOCSDTR: 3154 case TIOCCDTR: 3155 case TIOCSLTC: 3156 case TIOCSTOP: 3157 case TIOCSTART: 3158 case TIOCSTI: 3159 case TIOCSPGRP: 3160 case TIOCMSET: 3161 case TIOCMBIS: 3162 case TIOCMBIC: 3163 case TIOCREMOTE: 3164 case TIOCSIGNAL: 3165 case LDSETT: 3166 case LDSMAP: /* Obsolete */ 3167 case DIOCSETP: 3168 case I_FLUSH: 3169 case I_SRDOPT: 3170 case I_SETSIG: 3171 case I_SWROPT: 3172 case I_FLUSHBAND: 3173 case I_SETCLTIME: 3174 case I_SERROPT: 3175 case I_ESETSIG: 3176 case FIONBIO: 3177 case FIOASYNC: 3178 case FIOSETOWN: 3179 case JBOOT: /* Obsolete */ 3180 case JTERM: /* Obsolete */ 3181 case JTIMOM: /* Obsolete */ 3182 case JZOMBOOT: /* Obsolete */ 3183 case JAGENT: /* Obsolete */ 3184 case JTRUN: /* Obsolete */ 3185 case JXTPROTO: /* Obsolete */ 3186 return (JCSETP); 3187 } 3188 3189 return (JCGETP); 3190 } 3191 3192 /* 3193 * ioctl for streams 3194 */ 3195 int 3196 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, 3197 cred_t *crp, int *rvalp) 3198 { 3199 struct stdata *stp; 3200 struct strcmd *scp; 3201 struct strioctl strioc; 3202 struct uio uio; 3203 struct iovec iov; 3204 int access; 3205 mblk_t *mp; 3206 int error = 0; 3207 int done = 0; 3208 ssize_t rmin, rmax; 3209 queue_t *wrq; 3210 queue_t *rdq; 3211 boolean_t kioctl = B_FALSE; 3212 uint32_t auditing = AU_AUDITING(); 3213 3214 if (flag & FKIOCTL) { 3215 copyflag = K_TO_K; 3216 kioctl = B_TRUE; 3217 } 3218 ASSERT(vp->v_stream); 3219 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 3220 stp = vp->v_stream; 3221 3222 TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER, 3223 "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg); 3224 3225 if (auditing) 3226 audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp); 3227 3228 /* 3229 * If the copy is kernel to kernel, make sure that the FNATIVE 3230 * flag is set. After this it would be a serious error to have 3231 * no model flag. 3232 */ 3233 if (copyflag == K_TO_K) 3234 flag = (flag & ~FMODELS) | FNATIVE; 3235 3236 ASSERT((flag & FMODELS) != 0); 3237 3238 wrq = stp->sd_wrq; 3239 rdq = _RD(wrq); 3240 3241 access = job_control_type(cmd); 3242 3243 /* We should never see these here, should be handled by iwscn */ 3244 if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR) 3245 return (EINVAL); 3246 3247 mutex_enter(&stp->sd_lock); 3248 if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) { 3249 mutex_exit(&stp->sd_lock); 3250 return (error); 3251 } 3252 mutex_exit(&stp->sd_lock); 3253 3254 /* 3255 * Check for sgttyb-related ioctls first, and complain as 3256 * necessary. 3257 */ 3258 switch (cmd) { 3259 case TIOCGETP: 3260 case TIOCSETP: 3261 case TIOCSETN: 3262 if (sgttyb_handling >= 2 && !sgttyb_complaint) { 3263 sgttyb_complaint = B_TRUE; 3264 cmn_err(CE_NOTE, 3265 "application used obsolete TIOC[GS]ET"); 3266 } 3267 if (sgttyb_handling >= 3) { 3268 tsignal(curthread, SIGSYS); 3269 return (EIO); 3270 } 3271 break; 3272 } 3273 3274 mutex_enter(&stp->sd_lock); 3275 3276 switch (cmd) { 3277 case I_RECVFD: 3278 case I_E_RECVFD: 3279 case I_PEEK: 3280 case I_NREAD: 3281 case FIONREAD: 3282 case FIORDCHK: 3283 case I_ATMARK: 3284 case FIONBIO: 3285 case FIOASYNC: 3286 if (stp->sd_flag & (STRDERR|STPLEX)) { 3287 error = strgeterr(stp, STRDERR|STPLEX, 0); 3288 if (error != 0) { 3289 mutex_exit(&stp->sd_lock); 3290 return (error); 3291 } 3292 } 3293 break; 3294 3295 default: 3296 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) { 3297 error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0); 3298 if (error != 0) { 3299 mutex_exit(&stp->sd_lock); 3300 return (error); 3301 } 3302 } 3303 } 3304 3305 mutex_exit(&stp->sd_lock); 3306 3307 switch (cmd) { 3308 default: 3309 /* 3310 * The stream head has hardcoded knowledge of a 3311 * miscellaneous collection of terminal-, keyboard- and 3312 * mouse-related ioctls, enumerated below. This hardcoded 3313 * knowledge allows the stream head to automatically 3314 * convert transparent ioctl requests made by userland 3315 * programs into I_STR ioctls which many old STREAMS 3316 * modules and drivers require. 3317 * 3318 * No new ioctls should ever be added to this list. 3319 * Instead, the STREAMS module or driver should be written 3320 * to either handle transparent ioctls or require any 3321 * userland programs to use I_STR ioctls (by returning 3322 * EINVAL to any transparent ioctl requests). 3323 * 3324 * More importantly, removing ioctls from this list should 3325 * be done with the utmost care, since our STREAMS modules 3326 * and drivers *count* on the stream head performing this 3327 * conversion, and thus may panic while processing 3328 * transparent ioctl request for one of these ioctls (keep 3329 * in mind that third party modules and drivers may have 3330 * similar problems). 3331 */ 3332 if (((cmd & IOCTYPE) == LDIOC) || 3333 ((cmd & IOCTYPE) == tIOC) || 3334 ((cmd & IOCTYPE) == TIOC) || 3335 ((cmd & IOCTYPE) == KIOC) || 3336 ((cmd & IOCTYPE) == MSIOC) || 3337 ((cmd & IOCTYPE) == VUIOC)) { 3338 /* 3339 * The ioctl is a tty ioctl - set up strioc buffer 3340 * and call strdoioctl() to do the work. 3341 */ 3342 if (stp->sd_flag & STRHUP) 3343 return (ENXIO); 3344 strioc.ic_cmd = cmd; 3345 strioc.ic_timout = INFTIM; 3346 3347 switch (cmd) { 3348 3349 case TCXONC: 3350 case TCSBRK: 3351 case TCFLSH: 3352 case TCDSET: 3353 { 3354 int native_arg = (int)arg; 3355 strioc.ic_len = sizeof (int); 3356 strioc.ic_dp = (char *)&native_arg; 3357 return (strdoioctl(stp, &strioc, flag, 3358 K_TO_K, crp, rvalp)); 3359 } 3360 3361 case TCSETA: 3362 case TCSETAW: 3363 case TCSETAF: 3364 strioc.ic_len = sizeof (struct termio); 3365 strioc.ic_dp = (char *)arg; 3366 return (strdoioctl(stp, &strioc, flag, 3367 copyflag, crp, rvalp)); 3368 3369 case TCSETS: 3370 case TCSETSW: 3371 case TCSETSF: 3372 strioc.ic_len = sizeof (struct termios); 3373 strioc.ic_dp = (char *)arg; 3374 return (strdoioctl(stp, &strioc, flag, 3375 copyflag, crp, rvalp)); 3376 3377 case LDSETT: 3378 strioc.ic_len = sizeof (struct termcb); 3379 strioc.ic_dp = (char *)arg; 3380 return (strdoioctl(stp, &strioc, flag, 3381 copyflag, crp, rvalp)); 3382 3383 case TIOCSETP: 3384 strioc.ic_len = sizeof (struct sgttyb); 3385 strioc.ic_dp = (char *)arg; 3386 return (strdoioctl(stp, &strioc, flag, 3387 copyflag, crp, rvalp)); 3388 3389 case TIOCSTI: 3390 if ((flag & FREAD) == 0 && 3391 secpolicy_sti(crp) != 0) { 3392 return (EPERM); 3393 } 3394 mutex_enter(&stp->sd_lock); 3395 mutex_enter(&curproc->p_splock); 3396 if (stp->sd_sidp != curproc->p_sessp->s_sidp && 3397 secpolicy_sti(crp) != 0) { 3398 mutex_exit(&curproc->p_splock); 3399 mutex_exit(&stp->sd_lock); 3400 return (EACCES); 3401 } 3402 mutex_exit(&curproc->p_splock); 3403 mutex_exit(&stp->sd_lock); 3404 3405 strioc.ic_len = sizeof (char); 3406 strioc.ic_dp = (char *)arg; 3407 return (strdoioctl(stp, &strioc, flag, 3408 copyflag, crp, rvalp)); 3409 3410 case TIOCSWINSZ: 3411 strioc.ic_len = sizeof (struct winsize); 3412 strioc.ic_dp = (char *)arg; 3413 return (strdoioctl(stp, &strioc, flag, 3414 copyflag, crp, rvalp)); 3415 3416 case TIOCSSIZE: 3417 strioc.ic_len = sizeof (struct ttysize); 3418 strioc.ic_dp = (char *)arg; 3419 return (strdoioctl(stp, &strioc, flag, 3420 copyflag, crp, rvalp)); 3421 3422 case TIOCSSOFTCAR: 3423 case KIOCTRANS: 3424 case KIOCTRANSABLE: 3425 case KIOCCMD: 3426 case KIOCSDIRECT: 3427 case KIOCSCOMPAT: 3428 case KIOCSKABORTEN: 3429 case KIOCSRPTDELAY: 3430 case KIOCSRPTRATE: 3431 case VUIDSFORMAT: 3432 case TIOCSPPS: 3433 strioc.ic_len = sizeof (int); 3434 strioc.ic_dp = (char *)arg; 3435 return (strdoioctl(stp, &strioc, flag, 3436 copyflag, crp, rvalp)); 3437 3438 case KIOCSETKEY: 3439 case KIOCGETKEY: 3440 strioc.ic_len = sizeof (struct kiockey); 3441 strioc.ic_dp = (char *)arg; 3442 return (strdoioctl(stp, &strioc, flag, 3443 copyflag, crp, rvalp)); 3444 3445 case KIOCSKEY: 3446 case KIOCGKEY: 3447 strioc.ic_len = sizeof (struct kiockeymap); 3448 strioc.ic_dp = (char *)arg; 3449 return (strdoioctl(stp, &strioc, flag, 3450 copyflag, crp, rvalp)); 3451 3452 case KIOCSLED: 3453 /* arg is a pointer to char */ 3454 strioc.ic_len = sizeof (char); 3455 strioc.ic_dp = (char *)arg; 3456 return (strdoioctl(stp, &strioc, flag, 3457 copyflag, crp, rvalp)); 3458 3459 case MSIOSETPARMS: 3460 strioc.ic_len = sizeof (Ms_parms); 3461 strioc.ic_dp = (char *)arg; 3462 return (strdoioctl(stp, &strioc, flag, 3463 copyflag, crp, rvalp)); 3464 3465 case VUIDSADDR: 3466 case VUIDGADDR: 3467 strioc.ic_len = sizeof (struct vuid_addr_probe); 3468 strioc.ic_dp = (char *)arg; 3469 return (strdoioctl(stp, &strioc, flag, 3470 copyflag, crp, rvalp)); 3471 3472 /* 3473 * These M_IOCTL's don't require any data to be sent 3474 * downstream, and the driver will allocate and link 3475 * on its own mblk_t upon M_IOCACK -- thus we set 3476 * ic_len to zero and set ic_dp to arg so we know 3477 * where to copyout to later. 3478 */ 3479 case TIOCGSOFTCAR: 3480 case TIOCGWINSZ: 3481 case TIOCGSIZE: 3482 case KIOCGTRANS: 3483 case KIOCGTRANSABLE: 3484 case KIOCTYPE: 3485 case KIOCGDIRECT: 3486 case KIOCGCOMPAT: 3487 case KIOCLAYOUT: 3488 case KIOCGLED: 3489 case MSIOGETPARMS: 3490 case MSIOBUTTONS: 3491 case VUIDGFORMAT: 3492 case TIOCGPPS: 3493 case TIOCGPPSEV: 3494 case TCGETA: 3495 case TCGETS: 3496 case LDGETT: 3497 case TIOCGETP: 3498 case KIOCGRPTDELAY: 3499 case KIOCGRPTRATE: 3500 strioc.ic_len = 0; 3501 strioc.ic_dp = (char *)arg; 3502 return (strdoioctl(stp, &strioc, flag, 3503 copyflag, crp, rvalp)); 3504 } 3505 } 3506 3507 /* 3508 * Unknown cmd - send it down as a transparent ioctl. 3509 */ 3510 strioc.ic_cmd = cmd; 3511 strioc.ic_timout = INFTIM; 3512 strioc.ic_len = TRANSPARENT; 3513 strioc.ic_dp = (char *)&arg; 3514 3515 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp)); 3516 3517 case I_STR: 3518 /* 3519 * Stream ioctl. Read in an strioctl buffer from the user 3520 * along with any data specified and send it downstream. 3521 * Strdoioctl will wait allow only one ioctl message at 3522 * a time, and waits for the acknowledgement. 3523 */ 3524 3525 if (stp->sd_flag & STRHUP) 3526 return (ENXIO); 3527 3528 error = strcopyin_strioctl((void *)arg, &strioc, flag, 3529 copyflag); 3530 if (error != 0) 3531 return (error); 3532 3533 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1)) 3534 return (EINVAL); 3535 3536 access = job_control_type(strioc.ic_cmd); 3537 mutex_enter(&stp->sd_lock); 3538 if ((access != -1) && 3539 ((error = i_straccess(stp, access)) != 0)) { 3540 mutex_exit(&stp->sd_lock); 3541 return (error); 3542 } 3543 mutex_exit(&stp->sd_lock); 3544 3545 /* 3546 * The I_STR facility provides a trap door for malicious 3547 * code to send down bogus streamio(7I) ioctl commands to 3548 * unsuspecting STREAMS modules and drivers which expect to 3549 * only get these messages from the stream head. 3550 * Explicitly prohibit any streamio ioctls which can be 3551 * passed downstream by the stream head. Note that we do 3552 * not block all streamio ioctls because the ioctl 3553 * numberspace is not well managed and thus it's possible 3554 * that a module or driver's ioctl numbers may accidentally 3555 * collide with them. 3556 */ 3557 switch (strioc.ic_cmd) { 3558 case I_LINK: 3559 case I_PLINK: 3560 case I_UNLINK: 3561 case I_PUNLINK: 3562 case _I_GETPEERCRED: 3563 case _I_PLINK_LH: 3564 return (EINVAL); 3565 } 3566 3567 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp); 3568 if (error == 0) { 3569 error = strcopyout_strioctl(&strioc, (void *)arg, 3570 flag, copyflag); 3571 } 3572 return (error); 3573 3574 case _I_CMD: 3575 /* 3576 * Like I_STR, but without using M_IOC* messages and without 3577 * copyins/copyouts beyond the passed-in argument. 3578 */ 3579 if (stp->sd_flag & STRHUP) 3580 return (ENXIO); 3581 3582 if ((scp = kmem_alloc(sizeof (strcmd_t), KM_NOSLEEP)) == NULL) 3583 return (ENOMEM); 3584 3585 if (copyin((void *)arg, scp, sizeof (strcmd_t))) { 3586 kmem_free(scp, sizeof (strcmd_t)); 3587 return (EFAULT); 3588 } 3589 3590 access = job_control_type(scp->sc_cmd); 3591 mutex_enter(&stp->sd_lock); 3592 if (access != -1 && (error = i_straccess(stp, access)) != 0) { 3593 mutex_exit(&stp->sd_lock); 3594 kmem_free(scp, sizeof (strcmd_t)); 3595 return (error); 3596 } 3597 mutex_exit(&stp->sd_lock); 3598 3599 *rvalp = 0; 3600 if ((error = strdocmd(stp, scp, crp)) == 0) { 3601 if (copyout(scp, (void *)arg, sizeof (strcmd_t))) 3602 error = EFAULT; 3603 } 3604 kmem_free(scp, sizeof (strcmd_t)); 3605 return (error); 3606 3607 case I_NREAD: 3608 /* 3609 * Return number of bytes of data in first message 3610 * in queue in "arg" and return the number of messages 3611 * in queue in return value. 3612 */ 3613 { 3614 size_t size; 3615 int retval; 3616 int count = 0; 3617 3618 mutex_enter(QLOCK(rdq)); 3619 3620 size = msgdsize(rdq->q_first); 3621 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3622 count++; 3623 3624 mutex_exit(QLOCK(rdq)); 3625 if (stp->sd_struiordq) { 3626 infod_t infod; 3627 3628 infod.d_cmd = INFOD_COUNT; 3629 infod.d_count = 0; 3630 if (count == 0) { 3631 infod.d_cmd |= INFOD_FIRSTBYTES; 3632 infod.d_bytes = 0; 3633 } 3634 infod.d_res = 0; 3635 (void) infonext(rdq, &infod); 3636 count += infod.d_count; 3637 if (infod.d_res & INFOD_FIRSTBYTES) 3638 size = infod.d_bytes; 3639 } 3640 3641 /* 3642 * Drop down from size_t to the "int" required by the 3643 * interface. Cap at INT_MAX. 3644 */ 3645 retval = MIN(size, INT_MAX); 3646 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3647 copyflag); 3648 if (!error) 3649 *rvalp = count; 3650 return (error); 3651 } 3652 3653 case FIONREAD: 3654 /* 3655 * Return number of bytes of data in all data messages 3656 * in queue in "arg". 3657 */ 3658 { 3659 size_t size = 0; 3660 int retval; 3661 3662 mutex_enter(QLOCK(rdq)); 3663 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3664 size += msgdsize(mp); 3665 mutex_exit(QLOCK(rdq)); 3666 3667 if (stp->sd_struiordq) { 3668 infod_t infod; 3669 3670 infod.d_cmd = INFOD_BYTES; 3671 infod.d_res = 0; 3672 infod.d_bytes = 0; 3673 (void) infonext(rdq, &infod); 3674 size += infod.d_bytes; 3675 } 3676 3677 /* 3678 * Drop down from size_t to the "int" required by the 3679 * interface. Cap at INT_MAX. 3680 */ 3681 retval = MIN(size, INT_MAX); 3682 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3683 copyflag); 3684 3685 *rvalp = 0; 3686 return (error); 3687 } 3688 case FIORDCHK: 3689 /* 3690 * FIORDCHK does not use arg value (like FIONREAD), 3691 * instead a count is returned. I_NREAD value may 3692 * not be accurate but safe. The real thing to do is 3693 * to add the msgdsizes of all data messages until 3694 * a non-data message. 3695 */ 3696 { 3697 size_t size = 0; 3698 3699 mutex_enter(QLOCK(rdq)); 3700 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3701 size += msgdsize(mp); 3702 mutex_exit(QLOCK(rdq)); 3703 3704 if (stp->sd_struiordq) { 3705 infod_t infod; 3706 3707 infod.d_cmd = INFOD_BYTES; 3708 infod.d_res = 0; 3709 infod.d_bytes = 0; 3710 (void) infonext(rdq, &infod); 3711 size += infod.d_bytes; 3712 } 3713 3714 /* 3715 * Since ioctl returns an int, and memory sizes under 3716 * LP64 may not fit, we return INT_MAX if the count was 3717 * actually greater. 3718 */ 3719 *rvalp = MIN(size, INT_MAX); 3720 return (0); 3721 } 3722 3723 case I_FIND: 3724 /* 3725 * Get module name. 3726 */ 3727 { 3728 char mname[FMNAMESZ + 1]; 3729 queue_t *q; 3730 3731 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3732 mname, FMNAMESZ + 1, NULL); 3733 if (error) 3734 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3735 3736 /* 3737 * Return EINVAL if we're handed a bogus module name. 3738 */ 3739 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) { 3740 TRACE_0(TR_FAC_STREAMS_FR, 3741 TR_I_CANT_FIND, "couldn't I_FIND"); 3742 return (EINVAL); 3743 } 3744 3745 *rvalp = 0; 3746 3747 /* Look downstream to see if module is there. */ 3748 claimstr(stp->sd_wrq); 3749 for (q = stp->sd_wrq->q_next; q; q = q->q_next) { 3750 if (q->q_flag & QREADR) { 3751 q = NULL; 3752 break; 3753 } 3754 if (strcmp(mname, Q2NAME(q)) == 0) 3755 break; 3756 } 3757 releasestr(stp->sd_wrq); 3758 3759 *rvalp = (q ? 1 : 0); 3760 return (error); 3761 } 3762 3763 case I_PUSH: 3764 case __I_PUSH_NOCTTY: 3765 /* 3766 * Push a module. 3767 * For the case __I_PUSH_NOCTTY push a module but 3768 * do not allocate controlling tty. See bugid 4025044 3769 */ 3770 3771 { 3772 char mname[FMNAMESZ + 1]; 3773 fmodsw_impl_t *fp; 3774 dev_t dummydev; 3775 3776 if (stp->sd_flag & STRHUP) 3777 return (ENXIO); 3778 3779 /* 3780 * Get module name and look up in fmodsw. 3781 */ 3782 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3783 mname, FMNAMESZ + 1, NULL); 3784 if (error) 3785 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3786 3787 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) == 3788 NULL) 3789 return (EINVAL); 3790 3791 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH, 3792 "I_PUSH:fp %p stp %p", fp, stp); 3793 3794 if (error = strstartplumb(stp, flag, cmd)) { 3795 fmodsw_rele(fp); 3796 return (error); 3797 } 3798 3799 /* 3800 * See if any more modules can be pushed on this stream. 3801 * Note that this check must be done after strstartplumb() 3802 * since otherwise multiple threads issuing I_PUSHes on 3803 * the same stream will be able to exceed nstrpush. 3804 */ 3805 mutex_enter(&stp->sd_lock); 3806 if (stp->sd_pushcnt >= nstrpush) { 3807 fmodsw_rele(fp); 3808 strendplumb(stp); 3809 mutex_exit(&stp->sd_lock); 3810 return (EINVAL); 3811 } 3812 mutex_exit(&stp->sd_lock); 3813 3814 /* 3815 * Push new module and call its open routine 3816 * via qattach(). Modules don't change device 3817 * numbers, so just ignore dummydev here. 3818 */ 3819 dummydev = vp->v_rdev; 3820 if ((error = qattach(rdq, &dummydev, 0, crp, fp, 3821 B_FALSE)) == 0) { 3822 if (vp->v_type == VCHR && /* sorry, no pipes allowed */ 3823 (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) { 3824 /* 3825 * try to allocate it as a controlling terminal 3826 */ 3827 (void) strctty(stp); 3828 } 3829 } 3830 3831 mutex_enter(&stp->sd_lock); 3832 3833 /* 3834 * As a performance concern we are caching the values of 3835 * q_minpsz and q_maxpsz of the module below the stream 3836 * head in the stream head. 3837 */ 3838 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 3839 rmin = stp->sd_wrq->q_next->q_minpsz; 3840 rmax = stp->sd_wrq->q_next->q_maxpsz; 3841 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 3842 3843 /* Do this processing here as a performance concern */ 3844 if (strmsgsz != 0) { 3845 if (rmax == INFPSZ) 3846 rmax = strmsgsz; 3847 else { 3848 if (vp->v_type == VFIFO) 3849 rmax = MIN(PIPE_BUF, rmax); 3850 else rmax = MIN(strmsgsz, rmax); 3851 } 3852 } 3853 3854 mutex_enter(QLOCK(wrq)); 3855 stp->sd_qn_minpsz = rmin; 3856 stp->sd_qn_maxpsz = rmax; 3857 mutex_exit(QLOCK(wrq)); 3858 3859 strendplumb(stp); 3860 mutex_exit(&stp->sd_lock); 3861 return (error); 3862 } 3863 3864 case I_POP: 3865 { 3866 queue_t *q; 3867 3868 if (stp->sd_flag & STRHUP) 3869 return (ENXIO); 3870 if (!wrq->q_next) /* for broken pipes */ 3871 return (EINVAL); 3872 3873 if (error = strstartplumb(stp, flag, cmd)) 3874 return (error); 3875 3876 /* 3877 * If there is an anchor on this stream and popping 3878 * the current module would attempt to pop through the 3879 * anchor, then disallow the pop unless we have sufficient 3880 * privileges; take the cheapest (non-locking) check 3881 * first. 3882 */ 3883 if (secpolicy_ip_config(crp, B_TRUE) != 0 || 3884 (stp->sd_anchorzone != crgetzoneid(crp))) { 3885 mutex_enter(&stp->sd_lock); 3886 /* 3887 * Anchors only apply if there's at least one 3888 * module on the stream (sd_pushcnt > 0). 3889 */ 3890 if (stp->sd_pushcnt > 0 && 3891 stp->sd_pushcnt == stp->sd_anchor && 3892 stp->sd_vnode->v_type != VFIFO) { 3893 strendplumb(stp); 3894 mutex_exit(&stp->sd_lock); 3895 if (stp->sd_anchorzone != crgetzoneid(crp)) 3896 return (EINVAL); 3897 /* Audit and report error */ 3898 return (secpolicy_ip_config(crp, B_FALSE)); 3899 } 3900 mutex_exit(&stp->sd_lock); 3901 } 3902 3903 q = wrq->q_next; 3904 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP, 3905 "I_POP:%p from %p", q, stp); 3906 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) { 3907 error = EINVAL; 3908 } else { 3909 qdetach(_RD(q), 1, flag, crp, B_FALSE); 3910 error = 0; 3911 } 3912 mutex_enter(&stp->sd_lock); 3913 3914 /* 3915 * As a performance concern we are caching the values of 3916 * q_minpsz and q_maxpsz of the module below the stream 3917 * head in the stream head. 3918 */ 3919 mutex_enter(QLOCK(wrq->q_next)); 3920 rmin = wrq->q_next->q_minpsz; 3921 rmax = wrq->q_next->q_maxpsz; 3922 mutex_exit(QLOCK(wrq->q_next)); 3923 3924 /* Do this processing here as a performance concern */ 3925 if (strmsgsz != 0) { 3926 if (rmax == INFPSZ) 3927 rmax = strmsgsz; 3928 else { 3929 if (vp->v_type == VFIFO) 3930 rmax = MIN(PIPE_BUF, rmax); 3931 else rmax = MIN(strmsgsz, rmax); 3932 } 3933 } 3934 3935 mutex_enter(QLOCK(wrq)); 3936 stp->sd_qn_minpsz = rmin; 3937 stp->sd_qn_maxpsz = rmax; 3938 mutex_exit(QLOCK(wrq)); 3939 3940 /* If we popped through the anchor, then reset the anchor. */ 3941 if (stp->sd_pushcnt < stp->sd_anchor) { 3942 stp->sd_anchor = 0; 3943 stp->sd_anchorzone = 0; 3944 } 3945 strendplumb(stp); 3946 mutex_exit(&stp->sd_lock); 3947 return (error); 3948 } 3949 3950 case _I_MUXID2FD: 3951 { 3952 /* 3953 * Create a fd for a I_PLINK'ed lower stream with a given 3954 * muxid. With the fd, application can send down ioctls, 3955 * like I_LIST, to the previously I_PLINK'ed stream. Note 3956 * that after getting the fd, the application has to do an 3957 * I_PUNLINK on the muxid before it can do any operation 3958 * on the lower stream. This is required by spec1170. 3959 * 3960 * The fd used to do this ioctl should point to the same 3961 * controlling device used to do the I_PLINK. If it uses 3962 * a different stream or an invalid muxid, I_MUXID2FD will 3963 * fail. The error code is set to EINVAL. 3964 * 3965 * The intended use of this interface is the following. 3966 * An application I_PLINK'ed a stream and exits. The fd 3967 * to the lower stream is gone. Another application 3968 * wants to get a fd to the lower stream, it uses I_MUXID2FD. 3969 */ 3970 int muxid = (int)arg; 3971 int fd; 3972 linkinfo_t *linkp; 3973 struct file *fp; 3974 netstack_t *ns; 3975 str_stack_t *ss; 3976 3977 /* 3978 * Do not allow the wildcard muxid. This ioctl is not 3979 * intended to find arbitrary link. 3980 */ 3981 if (muxid == 0) { 3982 return (EINVAL); 3983 } 3984 3985 ns = netstack_find_by_cred(crp); 3986 ASSERT(ns != NULL); 3987 ss = ns->netstack_str; 3988 ASSERT(ss != NULL); 3989 3990 mutex_enter(&muxifier); 3991 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss); 3992 if (linkp == NULL) { 3993 mutex_exit(&muxifier); 3994 netstack_rele(ss->ss_netstack); 3995 return (EINVAL); 3996 } 3997 3998 if ((fd = ufalloc(0)) == -1) { 3999 mutex_exit(&muxifier); 4000 netstack_rele(ss->ss_netstack); 4001 return (EMFILE); 4002 } 4003 fp = linkp->li_fpdown; 4004 mutex_enter(&fp->f_tlock); 4005 fp->f_count++; 4006 mutex_exit(&fp->f_tlock); 4007 mutex_exit(&muxifier); 4008 setf(fd, fp); 4009 *rvalp = fd; 4010 netstack_rele(ss->ss_netstack); 4011 return (0); 4012 } 4013 4014 case _I_INSERT: 4015 { 4016 /* 4017 * To insert a module to a given position in a stream. 4018 * In the first release, only allow privileged user 4019 * to use this ioctl. Furthermore, the insert is only allowed 4020 * below an anchor if the zoneid is the same as the zoneid 4021 * which created the anchor. 4022 * 4023 * Note that we do not plan to support this ioctl 4024 * on pipes in the first release. We want to learn more 4025 * about the implications of these ioctls before extending 4026 * their support. And we do not think these features are 4027 * valuable for pipes. 4028 */ 4029 STRUCT_DECL(strmodconf, strmodinsert); 4030 char mod_name[FMNAMESZ + 1]; 4031 fmodsw_impl_t *fp; 4032 dev_t dummydev; 4033 queue_t *tmp_wrq; 4034 int pos; 4035 boolean_t is_insert; 4036 4037 STRUCT_INIT(strmodinsert, flag); 4038 if (stp->sd_flag & STRHUP) 4039 return (ENXIO); 4040 if (STRMATED(stp)) 4041 return (EINVAL); 4042 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4043 return (error); 4044 if (stp->sd_anchor != 0 && 4045 stp->sd_anchorzone != crgetzoneid(crp)) 4046 return (EINVAL); 4047 4048 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert), 4049 STRUCT_SIZE(strmodinsert), copyflag); 4050 if (error) 4051 return (error); 4052 4053 /* 4054 * Get module name and look up in fmodsw. 4055 */ 4056 error = (copyflag & U_TO_K ? copyinstr : 4057 copystr)(STRUCT_FGETP(strmodinsert, mod_name), 4058 mod_name, FMNAMESZ + 1, NULL); 4059 if (error) 4060 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4061 4062 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) == 4063 NULL) 4064 return (EINVAL); 4065 4066 if (error = strstartplumb(stp, flag, cmd)) { 4067 fmodsw_rele(fp); 4068 return (error); 4069 } 4070 4071 /* 4072 * Is this _I_INSERT just like an I_PUSH? We need to know 4073 * this because we do some optimizations if this is a 4074 * module being pushed. 4075 */ 4076 pos = STRUCT_FGET(strmodinsert, pos); 4077 is_insert = (pos != 0); 4078 4079 /* 4080 * Make sure pos is valid. Even though it is not an I_PUSH, 4081 * we impose the same limit on the number of modules in a 4082 * stream. 4083 */ 4084 mutex_enter(&stp->sd_lock); 4085 if (stp->sd_pushcnt >= nstrpush || pos < 0 || 4086 pos > stp->sd_pushcnt) { 4087 fmodsw_rele(fp); 4088 strendplumb(stp); 4089 mutex_exit(&stp->sd_lock); 4090 return (EINVAL); 4091 } 4092 if (stp->sd_anchor != 0) { 4093 /* 4094 * Is this insert below the anchor? 4095 * Pushcnt hasn't been increased yet hence 4096 * we test for greater than here, and greater or 4097 * equal after qattach. 4098 */ 4099 if (pos > (stp->sd_pushcnt - stp->sd_anchor) && 4100 stp->sd_anchorzone != crgetzoneid(crp)) { 4101 fmodsw_rele(fp); 4102 strendplumb(stp); 4103 mutex_exit(&stp->sd_lock); 4104 return (EPERM); 4105 } 4106 } 4107 4108 mutex_exit(&stp->sd_lock); 4109 4110 /* 4111 * First find the correct position this module to 4112 * be inserted. We don't need to call claimstr() 4113 * as the stream should not be changing at this point. 4114 * 4115 * Insert new module and call its open routine 4116 * via qattach(). Modules don't change device 4117 * numbers, so just ignore dummydev here. 4118 */ 4119 for (tmp_wrq = stp->sd_wrq; pos > 0; 4120 tmp_wrq = tmp_wrq->q_next, pos--) { 4121 ASSERT(SAMESTR(tmp_wrq)); 4122 } 4123 dummydev = vp->v_rdev; 4124 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp, 4125 fp, is_insert)) != 0) { 4126 mutex_enter(&stp->sd_lock); 4127 strendplumb(stp); 4128 mutex_exit(&stp->sd_lock); 4129 return (error); 4130 } 4131 4132 mutex_enter(&stp->sd_lock); 4133 4134 /* 4135 * As a performance concern we are caching the values of 4136 * q_minpsz and q_maxpsz of the module below the stream 4137 * head in the stream head. 4138 */ 4139 if (!is_insert) { 4140 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 4141 rmin = stp->sd_wrq->q_next->q_minpsz; 4142 rmax = stp->sd_wrq->q_next->q_maxpsz; 4143 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 4144 4145 /* Do this processing here as a performance concern */ 4146 if (strmsgsz != 0) { 4147 if (rmax == INFPSZ) { 4148 rmax = strmsgsz; 4149 } else { 4150 rmax = MIN(strmsgsz, rmax); 4151 } 4152 } 4153 4154 mutex_enter(QLOCK(wrq)); 4155 stp->sd_qn_minpsz = rmin; 4156 stp->sd_qn_maxpsz = rmax; 4157 mutex_exit(QLOCK(wrq)); 4158 } 4159 4160 /* 4161 * Need to update the anchor value if this module is 4162 * inserted below the anchor point. 4163 */ 4164 if (stp->sd_anchor != 0) { 4165 pos = STRUCT_FGET(strmodinsert, pos); 4166 if (pos >= (stp->sd_pushcnt - stp->sd_anchor)) 4167 stp->sd_anchor++; 4168 } 4169 4170 strendplumb(stp); 4171 mutex_exit(&stp->sd_lock); 4172 return (0); 4173 } 4174 4175 case _I_REMOVE: 4176 { 4177 /* 4178 * To remove a module with a given name in a stream. The 4179 * caller of this ioctl needs to provide both the name and 4180 * the position of the module to be removed. This eliminates 4181 * the ambiguity of removal if a module is inserted/pushed 4182 * multiple times in a stream. In the first release, only 4183 * allow privileged user to use this ioctl. 4184 * Furthermore, the remove is only allowed 4185 * below an anchor if the zoneid is the same as the zoneid 4186 * which created the anchor. 4187 * 4188 * Note that we do not plan to support this ioctl 4189 * on pipes in the first release. We want to learn more 4190 * about the implications of these ioctls before extending 4191 * their support. And we do not think these features are 4192 * valuable for pipes. 4193 * 4194 * Also note that _I_REMOVE cannot be used to remove a 4195 * driver or the stream head. 4196 */ 4197 STRUCT_DECL(strmodconf, strmodremove); 4198 queue_t *q; 4199 int pos; 4200 char mod_name[FMNAMESZ + 1]; 4201 boolean_t is_remove; 4202 4203 STRUCT_INIT(strmodremove, flag); 4204 if (stp->sd_flag & STRHUP) 4205 return (ENXIO); 4206 if (STRMATED(stp)) 4207 return (EINVAL); 4208 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4209 return (error); 4210 if (stp->sd_anchor != 0 && 4211 stp->sd_anchorzone != crgetzoneid(crp)) 4212 return (EINVAL); 4213 4214 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove), 4215 STRUCT_SIZE(strmodremove), copyflag); 4216 if (error) 4217 return (error); 4218 4219 error = (copyflag & U_TO_K ? copyinstr : 4220 copystr)(STRUCT_FGETP(strmodremove, mod_name), 4221 mod_name, FMNAMESZ + 1, NULL); 4222 if (error) 4223 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4224 4225 if ((error = strstartplumb(stp, flag, cmd)) != 0) 4226 return (error); 4227 4228 /* 4229 * Match the name of given module to the name of module at 4230 * the given position. 4231 */ 4232 pos = STRUCT_FGET(strmodremove, pos); 4233 4234 is_remove = (pos != 0); 4235 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0; 4236 q = q->q_next, pos--) 4237 ; 4238 if (pos > 0 || !SAMESTR(q) || 4239 strcmp(Q2NAME(q), mod_name) != 0) { 4240 mutex_enter(&stp->sd_lock); 4241 strendplumb(stp); 4242 mutex_exit(&stp->sd_lock); 4243 return (EINVAL); 4244 } 4245 4246 /* 4247 * If the position is at or below an anchor, then the zoneid 4248 * must match the zoneid that created the anchor. 4249 */ 4250 if (stp->sd_anchor != 0) { 4251 pos = STRUCT_FGET(strmodremove, pos); 4252 if (pos >= (stp->sd_pushcnt - stp->sd_anchor) && 4253 stp->sd_anchorzone != crgetzoneid(crp)) { 4254 mutex_enter(&stp->sd_lock); 4255 strendplumb(stp); 4256 mutex_exit(&stp->sd_lock); 4257 return (EPERM); 4258 } 4259 } 4260 4261 4262 ASSERT(!(q->q_flag & QREADR)); 4263 qdetach(_RD(q), 1, flag, crp, is_remove); 4264 4265 mutex_enter(&stp->sd_lock); 4266 4267 /* 4268 * As a performance concern we are caching the values of 4269 * q_minpsz and q_maxpsz of the module below the stream 4270 * head in the stream head. 4271 */ 4272 if (!is_remove) { 4273 mutex_enter(QLOCK(wrq->q_next)); 4274 rmin = wrq->q_next->q_minpsz; 4275 rmax = wrq->q_next->q_maxpsz; 4276 mutex_exit(QLOCK(wrq->q_next)); 4277 4278 /* Do this processing here as a performance concern */ 4279 if (strmsgsz != 0) { 4280 if (rmax == INFPSZ) 4281 rmax = strmsgsz; 4282 else { 4283 if (vp->v_type == VFIFO) 4284 rmax = MIN(PIPE_BUF, rmax); 4285 else rmax = MIN(strmsgsz, rmax); 4286 } 4287 } 4288 4289 mutex_enter(QLOCK(wrq)); 4290 stp->sd_qn_minpsz = rmin; 4291 stp->sd_qn_maxpsz = rmax; 4292 mutex_exit(QLOCK(wrq)); 4293 } 4294 4295 /* 4296 * Need to update the anchor value if this module is removed 4297 * at or below the anchor point. If the removed module is at 4298 * the anchor point, remove the anchor for this stream if 4299 * there is no module above the anchor point. Otherwise, if 4300 * the removed module is below the anchor point, decrement the 4301 * anchor point by 1. 4302 */ 4303 if (stp->sd_anchor != 0) { 4304 pos = STRUCT_FGET(strmodremove, pos); 4305 if (pos == stp->sd_pushcnt - stp->sd_anchor + 1) 4306 stp->sd_anchor = 0; 4307 else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1)) 4308 stp->sd_anchor--; 4309 } 4310 4311 strendplumb(stp); 4312 mutex_exit(&stp->sd_lock); 4313 return (0); 4314 } 4315 4316 case I_ANCHOR: 4317 /* 4318 * Set the anchor position on the stream to reside at 4319 * the top module (in other words, the top module 4320 * cannot be popped). Anchors with a FIFO make no 4321 * obvious sense, so they're not allowed. 4322 */ 4323 mutex_enter(&stp->sd_lock); 4324 4325 if (stp->sd_vnode->v_type == VFIFO) { 4326 mutex_exit(&stp->sd_lock); 4327 return (EINVAL); 4328 } 4329 /* Only allow the same zoneid to update the anchor */ 4330 if (stp->sd_anchor != 0 && 4331 stp->sd_anchorzone != crgetzoneid(crp)) { 4332 mutex_exit(&stp->sd_lock); 4333 return (EINVAL); 4334 } 4335 stp->sd_anchor = stp->sd_pushcnt; 4336 stp->sd_anchorzone = crgetzoneid(crp); 4337 mutex_exit(&stp->sd_lock); 4338 return (0); 4339 4340 case I_LOOK: 4341 /* 4342 * Get name of first module downstream. 4343 * If no module, return an error. 4344 */ 4345 claimstr(wrq); 4346 if (_SAMESTR(wrq) && wrq->q_next->q_next != NULL) { 4347 char *name = Q2NAME(wrq->q_next); 4348 4349 error = strcopyout(name, (void *)arg, strlen(name) + 1, 4350 copyflag); 4351 releasestr(wrq); 4352 return (error); 4353 } 4354 releasestr(wrq); 4355 return (EINVAL); 4356 4357 case I_LINK: 4358 case I_PLINK: 4359 /* 4360 * Link a multiplexor. 4361 */ 4362 return (mlink(vp, cmd, (int)arg, crp, rvalp, 0)); 4363 4364 case _I_PLINK_LH: 4365 /* 4366 * Link a multiplexor: Call must originate from kernel. 4367 */ 4368 if (kioctl) 4369 return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp)); 4370 4371 return (EINVAL); 4372 case I_UNLINK: 4373 case I_PUNLINK: 4374 /* 4375 * Unlink a multiplexor. 4376 * If arg is -1, unlink all links for which this is the 4377 * controlling stream. Otherwise, arg is an index number 4378 * for a link to be removed. 4379 */ 4380 { 4381 struct linkinfo *linkp; 4382 int native_arg = (int)arg; 4383 int type; 4384 netstack_t *ns; 4385 str_stack_t *ss; 4386 4387 TRACE_1(TR_FAC_STREAMS_FR, 4388 TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp); 4389 if (vp->v_type == VFIFO) { 4390 return (EINVAL); 4391 } 4392 if (cmd == I_UNLINK) 4393 type = LINKNORMAL; 4394 else /* I_PUNLINK */ 4395 type = LINKPERSIST; 4396 if (native_arg == 0) { 4397 return (EINVAL); 4398 } 4399 ns = netstack_find_by_cred(crp); 4400 ASSERT(ns != NULL); 4401 ss = ns->netstack_str; 4402 ASSERT(ss != NULL); 4403 4404 if (native_arg == MUXID_ALL) 4405 error = munlinkall(stp, type, crp, rvalp, ss); 4406 else { 4407 mutex_enter(&muxifier); 4408 if (!(linkp = findlinks(stp, (int)arg, type, ss))) { 4409 /* invalid user supplied index number */ 4410 mutex_exit(&muxifier); 4411 netstack_rele(ss->ss_netstack); 4412 return (EINVAL); 4413 } 4414 /* munlink drops the muxifier lock */ 4415 error = munlink(stp, linkp, type, crp, rvalp, ss); 4416 } 4417 netstack_rele(ss->ss_netstack); 4418 return (error); 4419 } 4420 4421 case I_FLUSH: 4422 /* 4423 * send a flush message downstream 4424 * flush message can indicate 4425 * FLUSHR - flush read queue 4426 * FLUSHW - flush write queue 4427 * FLUSHRW - flush read/write queue 4428 */ 4429 if (stp->sd_flag & STRHUP) 4430 return (ENXIO); 4431 if (arg & ~FLUSHRW) 4432 return (EINVAL); 4433 4434 for (;;) { 4435 if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) { 4436 break; 4437 } 4438 if (error = strwaitbuf(1, BPRI_HI)) { 4439 return (error); 4440 } 4441 } 4442 4443 /* 4444 * Send down an unsupported ioctl and wait for the nack 4445 * in order to allow the M_FLUSH to propagate back 4446 * up to the stream head. 4447 * Replaces if (qready()) runqueues(); 4448 */ 4449 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4450 strioc.ic_timout = 0; 4451 strioc.ic_len = 0; 4452 strioc.ic_dp = NULL; 4453 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4454 *rvalp = 0; 4455 return (0); 4456 4457 case I_FLUSHBAND: 4458 { 4459 struct bandinfo binfo; 4460 4461 error = strcopyin((void *)arg, &binfo, sizeof (binfo), 4462 copyflag); 4463 if (error) 4464 return (error); 4465 if (stp->sd_flag & STRHUP) 4466 return (ENXIO); 4467 if (binfo.bi_flag & ~FLUSHRW) 4468 return (EINVAL); 4469 while (!(mp = allocb(2, BPRI_HI))) { 4470 if (error = strwaitbuf(2, BPRI_HI)) 4471 return (error); 4472 } 4473 mp->b_datap->db_type = M_FLUSH; 4474 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND; 4475 *mp->b_wptr++ = binfo.bi_pri; 4476 putnext(stp->sd_wrq, mp); 4477 /* 4478 * Send down an unsupported ioctl and wait for the nack 4479 * in order to allow the M_FLUSH to propagate back 4480 * up to the stream head. 4481 * Replaces if (qready()) runqueues(); 4482 */ 4483 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4484 strioc.ic_timout = 0; 4485 strioc.ic_len = 0; 4486 strioc.ic_dp = NULL; 4487 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4488 *rvalp = 0; 4489 return (0); 4490 } 4491 4492 case I_SRDOPT: 4493 /* 4494 * Set read options 4495 * 4496 * RNORM - default stream mode 4497 * RMSGN - message no discard 4498 * RMSGD - message discard 4499 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs 4500 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs 4501 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs 4502 */ 4503 if (arg & ~(RMODEMASK | RPROTMASK)) 4504 return (EINVAL); 4505 4506 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN)) 4507 return (EINVAL); 4508 4509 mutex_enter(&stp->sd_lock); 4510 switch (arg & RMODEMASK) { 4511 case RNORM: 4512 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 4513 break; 4514 case RMSGD: 4515 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) | 4516 RD_MSGDIS; 4517 break; 4518 case RMSGN: 4519 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) | 4520 RD_MSGNODIS; 4521 break; 4522 } 4523 4524 switch (arg & RPROTMASK) { 4525 case RPROTNORM: 4526 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 4527 break; 4528 4529 case RPROTDAT: 4530 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) | 4531 RD_PROTDAT); 4532 break; 4533 4534 case RPROTDIS: 4535 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) | 4536 RD_PROTDIS); 4537 break; 4538 } 4539 mutex_exit(&stp->sd_lock); 4540 return (0); 4541 4542 case I_GRDOPT: 4543 /* 4544 * Get read option and return the value 4545 * to spot pointed to by arg 4546 */ 4547 { 4548 int rdopt; 4549 4550 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD : 4551 ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM)); 4552 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT : 4553 ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM)); 4554 4555 return (strcopyout(&rdopt, (void *)arg, sizeof (int), 4556 copyflag)); 4557 } 4558 4559 case I_SERROPT: 4560 /* 4561 * Set error options 4562 * 4563 * RERRNORM - persistent read errors 4564 * RERRNONPERSIST - non-persistent read errors 4565 * WERRNORM - persistent write errors 4566 * WERRNONPERSIST - non-persistent write errors 4567 */ 4568 if (arg & ~(RERRMASK | WERRMASK)) 4569 return (EINVAL); 4570 4571 mutex_enter(&stp->sd_lock); 4572 switch (arg & RERRMASK) { 4573 case RERRNORM: 4574 stp->sd_flag &= ~STRDERRNONPERSIST; 4575 break; 4576 case RERRNONPERSIST: 4577 stp->sd_flag |= STRDERRNONPERSIST; 4578 break; 4579 } 4580 switch (arg & WERRMASK) { 4581 case WERRNORM: 4582 stp->sd_flag &= ~STWRERRNONPERSIST; 4583 break; 4584 case WERRNONPERSIST: 4585 stp->sd_flag |= STWRERRNONPERSIST; 4586 break; 4587 } 4588 mutex_exit(&stp->sd_lock); 4589 return (0); 4590 4591 case I_GERROPT: 4592 /* 4593 * Get error option and return the value 4594 * to spot pointed to by arg 4595 */ 4596 { 4597 int erropt = 0; 4598 4599 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST : 4600 RERRNORM; 4601 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST : 4602 WERRNORM; 4603 return (strcopyout(&erropt, (void *)arg, sizeof (int), 4604 copyflag)); 4605 } 4606 4607 case I_SETSIG: 4608 /* 4609 * Register the calling proc to receive the SIGPOLL 4610 * signal based on the events given in arg. If 4611 * arg is zero, remove the proc from register list. 4612 */ 4613 { 4614 strsig_t *ssp, *pssp; 4615 struct pid *pidp; 4616 4617 pssp = NULL; 4618 pidp = curproc->p_pidp; 4619 /* 4620 * Hold sd_lock to prevent traversal of sd_siglist while 4621 * it is modified. 4622 */ 4623 mutex_enter(&stp->sd_lock); 4624 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp); 4625 pssp = ssp, ssp = ssp->ss_next) 4626 ; 4627 4628 if (arg) { 4629 if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4630 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4631 mutex_exit(&stp->sd_lock); 4632 return (EINVAL); 4633 } 4634 if ((arg & S_BANDURG) && !(arg & S_RDBAND)) { 4635 mutex_exit(&stp->sd_lock); 4636 return (EINVAL); 4637 } 4638 4639 /* 4640 * If proc not already registered, add it 4641 * to list. 4642 */ 4643 if (!ssp) { 4644 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4645 ssp->ss_pidp = pidp; 4646 ssp->ss_pid = pidp->pid_id; 4647 ssp->ss_next = NULL; 4648 if (pssp) 4649 pssp->ss_next = ssp; 4650 else 4651 stp->sd_siglist = ssp; 4652 mutex_enter(&pidlock); 4653 PID_HOLD(pidp); 4654 mutex_exit(&pidlock); 4655 } 4656 4657 /* 4658 * Set events. 4659 */ 4660 ssp->ss_events = (int)arg; 4661 } else { 4662 /* 4663 * Remove proc from register list. 4664 */ 4665 if (ssp) { 4666 mutex_enter(&pidlock); 4667 PID_RELE(pidp); 4668 mutex_exit(&pidlock); 4669 if (pssp) 4670 pssp->ss_next = ssp->ss_next; 4671 else 4672 stp->sd_siglist = ssp->ss_next; 4673 kmem_free(ssp, sizeof (strsig_t)); 4674 } else { 4675 mutex_exit(&stp->sd_lock); 4676 return (EINVAL); 4677 } 4678 } 4679 4680 /* 4681 * Recalculate OR of sig events. 4682 */ 4683 stp->sd_sigflags = 0; 4684 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4685 stp->sd_sigflags |= ssp->ss_events; 4686 mutex_exit(&stp->sd_lock); 4687 return (0); 4688 } 4689 4690 case I_GETSIG: 4691 /* 4692 * Return (in arg) the current registration of events 4693 * for which the calling proc is to be signaled. 4694 */ 4695 { 4696 struct strsig *ssp; 4697 struct pid *pidp; 4698 4699 pidp = curproc->p_pidp; 4700 mutex_enter(&stp->sd_lock); 4701 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4702 if (ssp->ss_pidp == pidp) { 4703 error = strcopyout(&ssp->ss_events, (void *)arg, 4704 sizeof (int), copyflag); 4705 mutex_exit(&stp->sd_lock); 4706 return (error); 4707 } 4708 mutex_exit(&stp->sd_lock); 4709 return (EINVAL); 4710 } 4711 4712 case I_ESETSIG: 4713 /* 4714 * Register the ss_pid to receive the SIGPOLL 4715 * signal based on the events is ss_events arg. If 4716 * ss_events is zero, remove the proc from register list. 4717 */ 4718 { 4719 struct strsig *ssp, *pssp; 4720 struct proc *proc; 4721 struct pid *pidp; 4722 pid_t pid; 4723 struct strsigset ss; 4724 4725 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4726 if (error) 4727 return (error); 4728 4729 pid = ss.ss_pid; 4730 4731 if (ss.ss_events != 0) { 4732 /* 4733 * Permissions check by sending signal 0. 4734 * Note that when kill fails it does a set_errno 4735 * causing the system call to fail. 4736 */ 4737 error = kill(pid, 0); 4738 if (error) { 4739 return (error); 4740 } 4741 } 4742 mutex_enter(&pidlock); 4743 if (pid == 0) 4744 proc = curproc; 4745 else if (pid < 0) 4746 proc = pgfind(-pid); 4747 else 4748 proc = prfind(pid); 4749 if (proc == NULL) { 4750 mutex_exit(&pidlock); 4751 return (ESRCH); 4752 } 4753 if (pid < 0) 4754 pidp = proc->p_pgidp; 4755 else 4756 pidp = proc->p_pidp; 4757 ASSERT(pidp); 4758 /* 4759 * Get a hold on the pid structure while referencing it. 4760 * There is a separate PID_HOLD should it be inserted 4761 * in the list below. 4762 */ 4763 PID_HOLD(pidp); 4764 mutex_exit(&pidlock); 4765 4766 pssp = NULL; 4767 /* 4768 * Hold sd_lock to prevent traversal of sd_siglist while 4769 * it is modified. 4770 */ 4771 mutex_enter(&stp->sd_lock); 4772 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid); 4773 pssp = ssp, ssp = ssp->ss_next) 4774 ; 4775 4776 if (ss.ss_events) { 4777 if (ss.ss_events & 4778 ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4779 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4780 mutex_exit(&stp->sd_lock); 4781 mutex_enter(&pidlock); 4782 PID_RELE(pidp); 4783 mutex_exit(&pidlock); 4784 return (EINVAL); 4785 } 4786 if ((ss.ss_events & S_BANDURG) && 4787 !(ss.ss_events & S_RDBAND)) { 4788 mutex_exit(&stp->sd_lock); 4789 mutex_enter(&pidlock); 4790 PID_RELE(pidp); 4791 mutex_exit(&pidlock); 4792 return (EINVAL); 4793 } 4794 4795 /* 4796 * If proc not already registered, add it 4797 * to list. 4798 */ 4799 if (!ssp) { 4800 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4801 ssp->ss_pidp = pidp; 4802 ssp->ss_pid = pid; 4803 ssp->ss_next = NULL; 4804 if (pssp) 4805 pssp->ss_next = ssp; 4806 else 4807 stp->sd_siglist = ssp; 4808 mutex_enter(&pidlock); 4809 PID_HOLD(pidp); 4810 mutex_exit(&pidlock); 4811 } 4812 4813 /* 4814 * Set events. 4815 */ 4816 ssp->ss_events = ss.ss_events; 4817 } else { 4818 /* 4819 * Remove proc from register list. 4820 */ 4821 if (ssp) { 4822 mutex_enter(&pidlock); 4823 PID_RELE(pidp); 4824 mutex_exit(&pidlock); 4825 if (pssp) 4826 pssp->ss_next = ssp->ss_next; 4827 else 4828 stp->sd_siglist = ssp->ss_next; 4829 kmem_free(ssp, sizeof (strsig_t)); 4830 } else { 4831 mutex_exit(&stp->sd_lock); 4832 mutex_enter(&pidlock); 4833 PID_RELE(pidp); 4834 mutex_exit(&pidlock); 4835 return (EINVAL); 4836 } 4837 } 4838 4839 /* 4840 * Recalculate OR of sig events. 4841 */ 4842 stp->sd_sigflags = 0; 4843 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4844 stp->sd_sigflags |= ssp->ss_events; 4845 mutex_exit(&stp->sd_lock); 4846 mutex_enter(&pidlock); 4847 PID_RELE(pidp); 4848 mutex_exit(&pidlock); 4849 return (0); 4850 } 4851 4852 case I_EGETSIG: 4853 /* 4854 * Return (in arg) the current registration of events 4855 * for which the calling proc is to be signaled. 4856 */ 4857 { 4858 struct strsig *ssp; 4859 struct proc *proc; 4860 pid_t pid; 4861 struct pid *pidp; 4862 struct strsigset ss; 4863 4864 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4865 if (error) 4866 return (error); 4867 4868 pid = ss.ss_pid; 4869 mutex_enter(&pidlock); 4870 if (pid == 0) 4871 proc = curproc; 4872 else if (pid < 0) 4873 proc = pgfind(-pid); 4874 else 4875 proc = prfind(pid); 4876 if (proc == NULL) { 4877 mutex_exit(&pidlock); 4878 return (ESRCH); 4879 } 4880 if (pid < 0) 4881 pidp = proc->p_pgidp; 4882 else 4883 pidp = proc->p_pidp; 4884 4885 /* Prevent the pidp from being reassigned */ 4886 PID_HOLD(pidp); 4887 mutex_exit(&pidlock); 4888 4889 mutex_enter(&stp->sd_lock); 4890 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4891 if (ssp->ss_pid == pid) { 4892 ss.ss_pid = ssp->ss_pid; 4893 ss.ss_events = ssp->ss_events; 4894 error = strcopyout(&ss, (void *)arg, 4895 sizeof (struct strsigset), copyflag); 4896 mutex_exit(&stp->sd_lock); 4897 mutex_enter(&pidlock); 4898 PID_RELE(pidp); 4899 mutex_exit(&pidlock); 4900 return (error); 4901 } 4902 mutex_exit(&stp->sd_lock); 4903 mutex_enter(&pidlock); 4904 PID_RELE(pidp); 4905 mutex_exit(&pidlock); 4906 return (EINVAL); 4907 } 4908 4909 case I_PEEK: 4910 { 4911 STRUCT_DECL(strpeek, strpeek); 4912 size_t n; 4913 mblk_t *fmp, *tmp_mp = NULL; 4914 4915 STRUCT_INIT(strpeek, flag); 4916 4917 error = strcopyin((void *)arg, STRUCT_BUF(strpeek), 4918 STRUCT_SIZE(strpeek), copyflag); 4919 if (error) 4920 return (error); 4921 4922 mutex_enter(QLOCK(rdq)); 4923 /* 4924 * Skip the invalid messages 4925 */ 4926 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 4927 if (mp->b_datap->db_type != M_SIG) 4928 break; 4929 4930 /* 4931 * If user has requested to peek at a high priority message 4932 * and first message is not, return 0 4933 */ 4934 if (mp != NULL) { 4935 if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) && 4936 queclass(mp) == QNORM) { 4937 *rvalp = 0; 4938 mutex_exit(QLOCK(rdq)); 4939 return (0); 4940 } 4941 } else if (stp->sd_struiordq == NULL || 4942 (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) { 4943 /* 4944 * No mblks to look at at the streamhead and 4945 * 1). This isn't a synch stream or 4946 * 2). This is a synch stream but caller wants high 4947 * priority messages which is not supported by 4948 * the synch stream. (it only supports QNORM) 4949 */ 4950 *rvalp = 0; 4951 mutex_exit(QLOCK(rdq)); 4952 return (0); 4953 } 4954 4955 fmp = mp; 4956 4957 if (mp && mp->b_datap->db_type == M_PASSFP) { 4958 mutex_exit(QLOCK(rdq)); 4959 return (EBADMSG); 4960 } 4961 4962 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO || 4963 mp->b_datap->db_type == M_PROTO || 4964 mp->b_datap->db_type == M_DATA); 4965 4966 if (mp && mp->b_datap->db_type == M_PCPROTO) { 4967 STRUCT_FSET(strpeek, flags, RS_HIPRI); 4968 } else { 4969 STRUCT_FSET(strpeek, flags, 0); 4970 } 4971 4972 4973 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) { 4974 mutex_exit(QLOCK(rdq)); 4975 return (ENOSR); 4976 } 4977 mutex_exit(QLOCK(rdq)); 4978 4979 /* 4980 * set mp = tmp_mp, so that I_PEEK processing can continue. 4981 * tmp_mp is used to free the dup'd message. 4982 */ 4983 mp = tmp_mp; 4984 4985 uio.uio_fmode = 0; 4986 uio.uio_extflg = UIO_COPY_CACHED; 4987 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 4988 UIO_SYSSPACE; 4989 uio.uio_limit = 0; 4990 /* 4991 * First process PROTO blocks, if any. 4992 * If user doesn't want to get ctl info by setting maxlen <= 0, 4993 * then set len to -1/0 and skip control blocks part. 4994 */ 4995 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0) 4996 STRUCT_FSET(strpeek, ctlbuf.len, -1); 4997 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0) 4998 STRUCT_FSET(strpeek, ctlbuf.len, 0); 4999 else { 5000 int ctl_part = 0; 5001 5002 iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf); 5003 iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen); 5004 uio.uio_iov = &iov; 5005 uio.uio_resid = iov.iov_len; 5006 uio.uio_loffset = 0; 5007 uio.uio_iovcnt = 1; 5008 while (mp && mp->b_datap->db_type != M_DATA && 5009 uio.uio_resid >= 0) { 5010 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ? 5011 mp->b_datap->db_type == M_PROTO : 5012 mp->b_datap->db_type == M_PCPROTO); 5013 5014 if ((n = MIN(uio.uio_resid, 5015 mp->b_wptr - mp->b_rptr)) != 0 && 5016 (error = uiomove((char *)mp->b_rptr, n, 5017 UIO_READ, &uio)) != 0) { 5018 freemsg(tmp_mp); 5019 return (error); 5020 } 5021 ctl_part = 1; 5022 mp = mp->b_cont; 5023 } 5024 /* No ctl message */ 5025 if (ctl_part == 0) 5026 STRUCT_FSET(strpeek, ctlbuf.len, -1); 5027 else 5028 STRUCT_FSET(strpeek, ctlbuf.len, 5029 STRUCT_FGET(strpeek, ctlbuf.maxlen) - 5030 uio.uio_resid); 5031 } 5032 5033 /* 5034 * Now process DATA blocks, if any. 5035 * If user doesn't want to get data info by setting maxlen <= 0, 5036 * then set len to -1/0 and skip data blocks part. 5037 */ 5038 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0) 5039 STRUCT_FSET(strpeek, databuf.len, -1); 5040 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0) 5041 STRUCT_FSET(strpeek, databuf.len, 0); 5042 else { 5043 int data_part = 0; 5044 5045 iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf); 5046 iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen); 5047 uio.uio_iov = &iov; 5048 uio.uio_resid = iov.iov_len; 5049 uio.uio_loffset = 0; 5050 uio.uio_iovcnt = 1; 5051 while (mp && uio.uio_resid) { 5052 if (mp->b_datap->db_type == M_DATA) { 5053 if ((n = MIN(uio.uio_resid, 5054 mp->b_wptr - mp->b_rptr)) != 0 && 5055 (error = uiomove((char *)mp->b_rptr, 5056 n, UIO_READ, &uio)) != 0) { 5057 freemsg(tmp_mp); 5058 return (error); 5059 } 5060 data_part = 1; 5061 } 5062 ASSERT(data_part == 0 || 5063 mp->b_datap->db_type == M_DATA); 5064 mp = mp->b_cont; 5065 } 5066 /* No data message */ 5067 if (data_part == 0) 5068 STRUCT_FSET(strpeek, databuf.len, -1); 5069 else 5070 STRUCT_FSET(strpeek, databuf.len, 5071 STRUCT_FGET(strpeek, databuf.maxlen) - 5072 uio.uio_resid); 5073 } 5074 freemsg(tmp_mp); 5075 5076 /* 5077 * It is a synch stream and user wants to get 5078 * data (maxlen > 0). 5079 * uio setup is done by the codes that process DATA 5080 * blocks above. 5081 */ 5082 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) { 5083 infod_t infod; 5084 5085 infod.d_cmd = INFOD_COPYOUT; 5086 infod.d_res = 0; 5087 infod.d_uiop = &uio; 5088 error = infonext(rdq, &infod); 5089 if (error == EINVAL || error == EBUSY) 5090 error = 0; 5091 if (error) 5092 return (error); 5093 STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek, 5094 databuf.maxlen) - uio.uio_resid); 5095 if (STRUCT_FGET(strpeek, databuf.len) == 0) { 5096 /* 5097 * No data found by the infonext(). 5098 */ 5099 STRUCT_FSET(strpeek, databuf.len, -1); 5100 } 5101 } 5102 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg, 5103 STRUCT_SIZE(strpeek), copyflag); 5104 if (error) { 5105 return (error); 5106 } 5107 /* 5108 * If there is no message retrieved, set return code to 0 5109 * otherwise, set it to 1. 5110 */ 5111 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 && 5112 STRUCT_FGET(strpeek, databuf.len) == -1) 5113 *rvalp = 0; 5114 else 5115 *rvalp = 1; 5116 return (0); 5117 } 5118 5119 case I_FDINSERT: 5120 { 5121 STRUCT_DECL(strfdinsert, strfdinsert); 5122 struct file *resftp; 5123 struct stdata *resstp; 5124 t_uscalar_t ival; 5125 ssize_t msgsize; 5126 struct strbuf mctl; 5127 5128 STRUCT_INIT(strfdinsert, flag); 5129 if (stp->sd_flag & STRHUP) 5130 return (ENXIO); 5131 /* 5132 * STRDERR, STWRERR and STPLEX tested above. 5133 */ 5134 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert), 5135 STRUCT_SIZE(strfdinsert), copyflag); 5136 if (error) 5137 return (error); 5138 5139 if (STRUCT_FGET(strfdinsert, offset) < 0 || 5140 (STRUCT_FGET(strfdinsert, offset) % 5141 sizeof (t_uscalar_t)) != 0) 5142 return (EINVAL); 5143 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) { 5144 if ((resstp = resftp->f_vnode->v_stream) == NULL) { 5145 releasef(STRUCT_FGET(strfdinsert, fildes)); 5146 return (EINVAL); 5147 } 5148 } else 5149 return (EINVAL); 5150 5151 mutex_enter(&resstp->sd_lock); 5152 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) { 5153 error = strgeterr(resstp, 5154 STRDERR|STWRERR|STRHUP|STPLEX, 0); 5155 if (error != 0) { 5156 mutex_exit(&resstp->sd_lock); 5157 releasef(STRUCT_FGET(strfdinsert, fildes)); 5158 return (error); 5159 } 5160 } 5161 mutex_exit(&resstp->sd_lock); 5162 5163 #ifdef _ILP32 5164 { 5165 queue_t *q; 5166 queue_t *mate = NULL; 5167 5168 /* get read queue of stream terminus */ 5169 claimstr(resstp->sd_wrq); 5170 for (q = resstp->sd_wrq->q_next; q->q_next != NULL; 5171 q = q->q_next) 5172 if (!STRMATED(resstp) && STREAM(q) != resstp && 5173 mate == NULL) { 5174 ASSERT(q->q_qinfo->qi_srvp); 5175 ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp); 5176 claimstr(q); 5177 mate = q; 5178 } 5179 q = _RD(q); 5180 if (mate) 5181 releasestr(mate); 5182 releasestr(resstp->sd_wrq); 5183 ival = (t_uscalar_t)q; 5184 } 5185 #else 5186 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev); 5187 #endif /* _ILP32 */ 5188 5189 if (STRUCT_FGET(strfdinsert, ctlbuf.len) < 5190 STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) { 5191 releasef(STRUCT_FGET(strfdinsert, fildes)); 5192 return (EINVAL); 5193 } 5194 5195 /* 5196 * Check for legal flag value. 5197 */ 5198 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) { 5199 releasef(STRUCT_FGET(strfdinsert, fildes)); 5200 return (EINVAL); 5201 } 5202 5203 /* get these values from those cached in the stream head */ 5204 mutex_enter(QLOCK(stp->sd_wrq)); 5205 rmin = stp->sd_qn_minpsz; 5206 rmax = stp->sd_qn_maxpsz; 5207 mutex_exit(QLOCK(stp->sd_wrq)); 5208 5209 /* 5210 * Make sure ctl and data sizes together fall within 5211 * the limits of the max and min receive packet sizes 5212 * and do not exceed system limit. A negative data 5213 * length means that no data part is to be sent. 5214 */ 5215 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 5216 if (rmax == 0) { 5217 releasef(STRUCT_FGET(strfdinsert, fildes)); 5218 return (ERANGE); 5219 } 5220 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0) 5221 msgsize = 0; 5222 if ((msgsize < rmin) || 5223 ((msgsize > rmax) && (rmax != INFPSZ)) || 5224 (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) { 5225 releasef(STRUCT_FGET(strfdinsert, fildes)); 5226 return (ERANGE); 5227 } 5228 5229 mutex_enter(&stp->sd_lock); 5230 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) && 5231 !canputnext(stp->sd_wrq)) { 5232 if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, 5233 flag, -1, &done)) != 0 || done) { 5234 mutex_exit(&stp->sd_lock); 5235 releasef(STRUCT_FGET(strfdinsert, fildes)); 5236 return (error); 5237 } 5238 if ((error = i_straccess(stp, access)) != 0) { 5239 mutex_exit(&stp->sd_lock); 5240 releasef( 5241 STRUCT_FGET(strfdinsert, fildes)); 5242 return (error); 5243 } 5244 } 5245 mutex_exit(&stp->sd_lock); 5246 5247 /* 5248 * Copy strfdinsert.ctlbuf into native form of 5249 * ctlbuf to pass down into strmakemsg(). 5250 */ 5251 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen); 5252 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len); 5253 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf); 5254 5255 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf); 5256 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len); 5257 uio.uio_iov = &iov; 5258 uio.uio_iovcnt = 1; 5259 uio.uio_loffset = 0; 5260 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 5261 UIO_SYSSPACE; 5262 uio.uio_fmode = 0; 5263 uio.uio_extflg = UIO_COPY_CACHED; 5264 uio.uio_resid = iov.iov_len; 5265 if ((error = strmakemsg(&mctl, 5266 &msgsize, &uio, stp, 5267 STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) { 5268 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5269 releasef(STRUCT_FGET(strfdinsert, fildes)); 5270 return (error); 5271 } 5272 5273 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5274 5275 /* 5276 * Place the possibly reencoded queue pointer 'offset' bytes 5277 * from the start of the control portion of the message. 5278 */ 5279 *((t_uscalar_t *)(mp->b_rptr + 5280 STRUCT_FGET(strfdinsert, offset))) = ival; 5281 5282 /* 5283 * Put message downstream. 5284 */ 5285 stream_willservice(stp); 5286 putnext(stp->sd_wrq, mp); 5287 stream_runservice(stp); 5288 releasef(STRUCT_FGET(strfdinsert, fildes)); 5289 return (error); 5290 } 5291 5292 case I_SENDFD: 5293 { 5294 struct file *fp; 5295 5296 if ((fp = getf((int)arg)) == NULL) 5297 return (EBADF); 5298 error = do_sendfp(stp, fp, crp); 5299 if (auditing) { 5300 audit_fdsend((int)arg, fp, error); 5301 } 5302 releasef((int)arg); 5303 return (error); 5304 } 5305 5306 case I_RECVFD: 5307 case I_E_RECVFD: 5308 { 5309 struct k_strrecvfd *srf; 5310 int i, fd; 5311 5312 mutex_enter(&stp->sd_lock); 5313 while (!(mp = getq(rdq))) { 5314 if (stp->sd_flag & (STRHUP|STREOF)) { 5315 mutex_exit(&stp->sd_lock); 5316 return (ENXIO); 5317 } 5318 if ((error = strwaitq(stp, GETWAIT, (ssize_t)0, 5319 flag, -1, &done)) != 0 || done) { 5320 mutex_exit(&stp->sd_lock); 5321 return (error); 5322 } 5323 if ((error = i_straccess(stp, access)) != 0) { 5324 mutex_exit(&stp->sd_lock); 5325 return (error); 5326 } 5327 } 5328 if (mp->b_datap->db_type != M_PASSFP) { 5329 putback(stp, rdq, mp, mp->b_band); 5330 mutex_exit(&stp->sd_lock); 5331 return (EBADMSG); 5332 } 5333 mutex_exit(&stp->sd_lock); 5334 5335 srf = (struct k_strrecvfd *)mp->b_rptr; 5336 if ((fd = ufalloc(0)) == -1) { 5337 mutex_enter(&stp->sd_lock); 5338 putback(stp, rdq, mp, mp->b_band); 5339 mutex_exit(&stp->sd_lock); 5340 return (EMFILE); 5341 } 5342 if (cmd == I_RECVFD) { 5343 struct o_strrecvfd ostrfd; 5344 5345 /* check to see if uid/gid values are too large. */ 5346 5347 if (srf->uid > (o_uid_t)USHRT_MAX || 5348 srf->gid > (o_gid_t)USHRT_MAX) { 5349 mutex_enter(&stp->sd_lock); 5350 putback(stp, rdq, mp, mp->b_band); 5351 mutex_exit(&stp->sd_lock); 5352 setf(fd, NULL); /* release fd entry */ 5353 return (EOVERFLOW); 5354 } 5355 5356 ostrfd.fd = fd; 5357 ostrfd.uid = (o_uid_t)srf->uid; 5358 ostrfd.gid = (o_gid_t)srf->gid; 5359 5360 /* Null the filler bits */ 5361 for (i = 0; i < 8; i++) 5362 ostrfd.fill[i] = 0; 5363 5364 error = strcopyout(&ostrfd, (void *)arg, 5365 sizeof (struct o_strrecvfd), copyflag); 5366 } else { /* I_E_RECVFD */ 5367 struct strrecvfd strfd; 5368 5369 strfd.fd = fd; 5370 strfd.uid = srf->uid; 5371 strfd.gid = srf->gid; 5372 5373 /* null the filler bits */ 5374 for (i = 0; i < 8; i++) 5375 strfd.fill[i] = 0; 5376 5377 error = strcopyout(&strfd, (void *)arg, 5378 sizeof (struct strrecvfd), copyflag); 5379 } 5380 5381 if (error) { 5382 setf(fd, NULL); /* release fd entry */ 5383 mutex_enter(&stp->sd_lock); 5384 putback(stp, rdq, mp, mp->b_band); 5385 mutex_exit(&stp->sd_lock); 5386 return (error); 5387 } 5388 if (auditing) { 5389 audit_fdrecv(fd, srf->fp); 5390 } 5391 5392 /* 5393 * Always increment f_count since the freemsg() below will 5394 * always call free_passfp() which performs a closef(). 5395 */ 5396 mutex_enter(&srf->fp->f_tlock); 5397 srf->fp->f_count++; 5398 mutex_exit(&srf->fp->f_tlock); 5399 setf(fd, srf->fp); 5400 freemsg(mp); 5401 return (0); 5402 } 5403 5404 case I_SWROPT: 5405 /* 5406 * Set/clear the write options. arg is a bit 5407 * mask with any of the following bits set... 5408 * SNDZERO - send zero length message 5409 * SNDPIPE - send sigpipe to process if 5410 * sd_werror is set and process is 5411 * doing a write or putmsg. 5412 * The new stream head write options should reflect 5413 * what is in arg. 5414 */ 5415 if (arg & ~(SNDZERO|SNDPIPE)) 5416 return (EINVAL); 5417 5418 mutex_enter(&stp->sd_lock); 5419 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO); 5420 if (arg & SNDZERO) 5421 stp->sd_wput_opt |= SW_SNDZERO; 5422 if (arg & SNDPIPE) 5423 stp->sd_wput_opt |= SW_SIGPIPE; 5424 mutex_exit(&stp->sd_lock); 5425 return (0); 5426 5427 case I_GWROPT: 5428 { 5429 int wropt = 0; 5430 5431 if (stp->sd_wput_opt & SW_SNDZERO) 5432 wropt |= SNDZERO; 5433 if (stp->sd_wput_opt & SW_SIGPIPE) 5434 wropt |= SNDPIPE; 5435 return (strcopyout(&wropt, (void *)arg, sizeof (wropt), 5436 copyflag)); 5437 } 5438 5439 case I_LIST: 5440 /* 5441 * Returns all the modules found on this stream, 5442 * upto the driver. If argument is NULL, return the 5443 * number of modules (including driver). If argument 5444 * is not NULL, copy the names into the structure 5445 * provided. 5446 */ 5447 5448 { 5449 queue_t *q; 5450 char *qname; 5451 int i, nmods; 5452 struct str_mlist *mlist; 5453 STRUCT_DECL(str_list, strlist); 5454 5455 if (arg == NULL) { /* Return number of modules plus driver */ 5456 if (stp->sd_vnode->v_type == VFIFO) 5457 *rvalp = stp->sd_pushcnt; 5458 else 5459 *rvalp = stp->sd_pushcnt + 1; 5460 return (0); 5461 } 5462 5463 STRUCT_INIT(strlist, flag); 5464 5465 error = strcopyin((void *)arg, STRUCT_BUF(strlist), 5466 STRUCT_SIZE(strlist), copyflag); 5467 if (error != 0) 5468 return (error); 5469 5470 mlist = STRUCT_FGETP(strlist, sl_modlist); 5471 nmods = STRUCT_FGET(strlist, sl_nmods); 5472 if (nmods <= 0) 5473 return (EINVAL); 5474 5475 claimstr(stp->sd_wrq); 5476 q = stp->sd_wrq; 5477 for (i = 0; i < nmods && _SAMESTR(q); i++, q = q->q_next) { 5478 qname = Q2NAME(q->q_next); 5479 error = strcopyout(qname, &mlist[i], strlen(qname) + 1, 5480 copyflag); 5481 if (error != 0) { 5482 releasestr(stp->sd_wrq); 5483 return (error); 5484 } 5485 } 5486 releasestr(stp->sd_wrq); 5487 return (strcopyout(&i, (void *)arg, sizeof (int), copyflag)); 5488 } 5489 5490 case I_CKBAND: 5491 { 5492 queue_t *q; 5493 qband_t *qbp; 5494 5495 if ((arg < 0) || (arg >= NBAND)) 5496 return (EINVAL); 5497 q = _RD(stp->sd_wrq); 5498 mutex_enter(QLOCK(q)); 5499 if (arg > (int)q->q_nband) { 5500 *rvalp = 0; 5501 } else { 5502 if (arg == 0) { 5503 if (q->q_first) 5504 *rvalp = 1; 5505 else 5506 *rvalp = 0; 5507 } else { 5508 qbp = q->q_bandp; 5509 while (--arg > 0) 5510 qbp = qbp->qb_next; 5511 if (qbp->qb_first) 5512 *rvalp = 1; 5513 else 5514 *rvalp = 0; 5515 } 5516 } 5517 mutex_exit(QLOCK(q)); 5518 return (0); 5519 } 5520 5521 case I_GETBAND: 5522 { 5523 int intpri; 5524 queue_t *q; 5525 5526 q = _RD(stp->sd_wrq); 5527 mutex_enter(QLOCK(q)); 5528 mp = q->q_first; 5529 if (!mp) { 5530 mutex_exit(QLOCK(q)); 5531 return (ENODATA); 5532 } 5533 intpri = (int)mp->b_band; 5534 error = strcopyout(&intpri, (void *)arg, sizeof (int), 5535 copyflag); 5536 mutex_exit(QLOCK(q)); 5537 return (error); 5538 } 5539 5540 case I_ATMARK: 5541 { 5542 queue_t *q; 5543 5544 if (arg & ~(ANYMARK|LASTMARK)) 5545 return (EINVAL); 5546 q = _RD(stp->sd_wrq); 5547 mutex_enter(&stp->sd_lock); 5548 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) { 5549 *rvalp = 1; 5550 } else { 5551 mutex_enter(QLOCK(q)); 5552 mp = q->q_first; 5553 5554 if (mp == NULL) 5555 *rvalp = 0; 5556 else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK)) 5557 *rvalp = 1; 5558 else if ((arg == LASTMARK) && (mp == stp->sd_mark)) 5559 *rvalp = 1; 5560 else 5561 *rvalp = 0; 5562 mutex_exit(QLOCK(q)); 5563 } 5564 mutex_exit(&stp->sd_lock); 5565 return (0); 5566 } 5567 5568 case I_CANPUT: 5569 { 5570 char band; 5571 5572 if ((arg < 0) || (arg >= NBAND)) 5573 return (EINVAL); 5574 band = (char)arg; 5575 *rvalp = bcanputnext(stp->sd_wrq, band); 5576 return (0); 5577 } 5578 5579 case I_SETCLTIME: 5580 { 5581 int closetime; 5582 5583 error = strcopyin((void *)arg, &closetime, sizeof (int), 5584 copyflag); 5585 if (error) 5586 return (error); 5587 if (closetime < 0) 5588 return (EINVAL); 5589 5590 stp->sd_closetime = closetime; 5591 return (0); 5592 } 5593 5594 case I_GETCLTIME: 5595 { 5596 int closetime; 5597 5598 closetime = stp->sd_closetime; 5599 return (strcopyout(&closetime, (void *)arg, sizeof (int), 5600 copyflag)); 5601 } 5602 5603 case TIOCGSID: 5604 { 5605 pid_t sid; 5606 5607 mutex_enter(&stp->sd_lock); 5608 if (stp->sd_sidp == NULL) { 5609 mutex_exit(&stp->sd_lock); 5610 return (ENOTTY); 5611 } 5612 sid = stp->sd_sidp->pid_id; 5613 mutex_exit(&stp->sd_lock); 5614 return (strcopyout(&sid, (void *)arg, sizeof (pid_t), 5615 copyflag)); 5616 } 5617 5618 case TIOCSPGRP: 5619 { 5620 pid_t pgrp; 5621 proc_t *q; 5622 pid_t sid, fg_pgid, bg_pgid; 5623 5624 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t), 5625 copyflag)) 5626 return (error); 5627 mutex_enter(&stp->sd_lock); 5628 mutex_enter(&pidlock); 5629 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) { 5630 mutex_exit(&pidlock); 5631 mutex_exit(&stp->sd_lock); 5632 return (ENOTTY); 5633 } 5634 if (pgrp == stp->sd_pgidp->pid_id) { 5635 mutex_exit(&pidlock); 5636 mutex_exit(&stp->sd_lock); 5637 return (0); 5638 } 5639 if (pgrp <= 0 || pgrp >= maxpid) { 5640 mutex_exit(&pidlock); 5641 mutex_exit(&stp->sd_lock); 5642 return (EINVAL); 5643 } 5644 if ((q = pgfind(pgrp)) == NULL || 5645 q->p_sessp != ttoproc(curthread)->p_sessp) { 5646 mutex_exit(&pidlock); 5647 mutex_exit(&stp->sd_lock); 5648 return (EPERM); 5649 } 5650 sid = stp->sd_sidp->pid_id; 5651 fg_pgid = q->p_pgrp; 5652 bg_pgid = stp->sd_pgidp->pid_id; 5653 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid); 5654 PID_RELE(stp->sd_pgidp); 5655 ctty_clear_sighuped(); 5656 stp->sd_pgidp = q->p_pgidp; 5657 PID_HOLD(stp->sd_pgidp); 5658 mutex_exit(&pidlock); 5659 mutex_exit(&stp->sd_lock); 5660 return (0); 5661 } 5662 5663 case TIOCGPGRP: 5664 { 5665 pid_t pgrp; 5666 5667 mutex_enter(&stp->sd_lock); 5668 if (stp->sd_sidp == NULL) { 5669 mutex_exit(&stp->sd_lock); 5670 return (ENOTTY); 5671 } 5672 pgrp = stp->sd_pgidp->pid_id; 5673 mutex_exit(&stp->sd_lock); 5674 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t), 5675 copyflag)); 5676 } 5677 5678 case TIOCSCTTY: 5679 { 5680 return (strctty(stp)); 5681 } 5682 5683 case TIOCNOTTY: 5684 { 5685 /* freectty() always assumes curproc. */ 5686 if (freectty(B_FALSE) != 0) 5687 return (0); 5688 return (ENOTTY); 5689 } 5690 5691 case FIONBIO: 5692 case FIOASYNC: 5693 return (0); /* handled by the upper layer */ 5694 } 5695 } 5696 5697 /* 5698 * Custom free routine used for M_PASSFP messages. 5699 */ 5700 static void 5701 free_passfp(struct k_strrecvfd *srf) 5702 { 5703 (void) closef(srf->fp); 5704 kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t)); 5705 } 5706 5707 /* ARGSUSED */ 5708 int 5709 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr) 5710 { 5711 queue_t *qp, *nextqp; 5712 struct k_strrecvfd *srf; 5713 mblk_t *mp; 5714 frtn_t *frtnp; 5715 size_t bufsize; 5716 queue_t *mate = NULL; 5717 syncq_t *sq = NULL; 5718 int retval = 0; 5719 5720 if (stp->sd_flag & STRHUP) 5721 return (ENXIO); 5722 5723 claimstr(stp->sd_wrq); 5724 5725 /* Fastpath, we have a pipe, and we are already mated, use it. */ 5726 if (STRMATED(stp)) { 5727 qp = _RD(stp->sd_mate->sd_wrq); 5728 claimstr(qp); 5729 mate = qp; 5730 } else { /* Not already mated. */ 5731 5732 /* 5733 * Walk the stream to the end of this one. 5734 * assumes that the claimstr() will prevent 5735 * plumbing between the stream head and the 5736 * driver from changing 5737 */ 5738 qp = stp->sd_wrq; 5739 5740 /* 5741 * Loop until we reach the end of this stream. 5742 * On completion, qp points to the write queue 5743 * at the end of the stream, or the read queue 5744 * at the stream head if this is a fifo. 5745 */ 5746 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp)) 5747 ; 5748 5749 /* 5750 * Just in case we get a q_next which is NULL, but 5751 * not at the end of the stream. This is actually 5752 * broken, so we set an assert to catch it in 5753 * debug, and set an error and return if not debug. 5754 */ 5755 ASSERT(qp); 5756 if (qp == NULL) { 5757 releasestr(stp->sd_wrq); 5758 return (EINVAL); 5759 } 5760 5761 /* 5762 * Enter the syncq for the driver, so (hopefully) 5763 * the queue values will not change on us. 5764 * XXXX - This will only prevent the race IFF only 5765 * the write side modifies the q_next member, and 5766 * the put procedure is protected by at least 5767 * MT_PERQ. 5768 */ 5769 if ((sq = qp->q_syncq) != NULL) 5770 entersq(sq, SQ_PUT); 5771 5772 /* Now get the q_next value from this qp. */ 5773 nextqp = qp->q_next; 5774 5775 /* 5776 * If nextqp exists and the other stream is different 5777 * from this one claim the stream, set the mate, and 5778 * get the read queue at the stream head of the other 5779 * stream. Assumes that nextqp was at least valid when 5780 * we got it. Hopefully the entersq of the driver 5781 * will prevent it from changing on us. 5782 */ 5783 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) { 5784 ASSERT(qp->q_qinfo->qi_srvp); 5785 ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp); 5786 ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp); 5787 claimstr(nextqp); 5788 5789 /* Make sure we still have a q_next */ 5790 if (nextqp != qp->q_next) { 5791 releasestr(stp->sd_wrq); 5792 releasestr(nextqp); 5793 return (EINVAL); 5794 } 5795 5796 qp = _RD(STREAM(nextqp)->sd_wrq); 5797 mate = qp; 5798 } 5799 /* If we entered the synq above, leave it. */ 5800 if (sq != NULL) 5801 leavesq(sq, SQ_PUT); 5802 } /* STRMATED(STP) */ 5803 5804 /* XXX prevents substitution of the ops vector */ 5805 if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) { 5806 retval = EINVAL; 5807 goto out; 5808 } 5809 5810 if (qp->q_flag & QFULL) { 5811 retval = EAGAIN; 5812 goto out; 5813 } 5814 5815 /* 5816 * Since M_PASSFP messages include a file descriptor, we use 5817 * esballoc() and specify a custom free routine (free_passfp()) that 5818 * will close the descriptor as part of freeing the message. For 5819 * convenience, we stash the frtn_t right after the data block. 5820 */ 5821 bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t); 5822 srf = kmem_alloc(bufsize, KM_NOSLEEP); 5823 if (srf == NULL) { 5824 retval = EAGAIN; 5825 goto out; 5826 } 5827 5828 frtnp = (frtn_t *)(srf + 1); 5829 frtnp->free_arg = (caddr_t)srf; 5830 frtnp->free_func = free_passfp; 5831 5832 mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp); 5833 if (mp == NULL) { 5834 kmem_free(srf, bufsize); 5835 retval = EAGAIN; 5836 goto out; 5837 } 5838 mp->b_wptr += sizeof (struct k_strrecvfd); 5839 mp->b_datap->db_type = M_PASSFP; 5840 5841 srf->fp = fp; 5842 srf->uid = crgetuid(curthread->t_cred); 5843 srf->gid = crgetgid(curthread->t_cred); 5844 mutex_enter(&fp->f_tlock); 5845 fp->f_count++; 5846 mutex_exit(&fp->f_tlock); 5847 5848 put(qp, mp); 5849 out: 5850 releasestr(stp->sd_wrq); 5851 if (mate) 5852 releasestr(mate); 5853 return (retval); 5854 } 5855 5856 /* 5857 * Send an ioctl message downstream and wait for acknowledgement. 5858 * flags may be set to either U_TO_K or K_TO_K and a combination 5859 * of STR_NOERROR or STR_NOSIG 5860 * STR_NOSIG: Signals are essentially ignored or held and have 5861 * no effect for the duration of the call. 5862 * STR_NOERROR: Ignores stream head read, write and hup errors. 5863 * Additionally, if an existing ioctl times out, it is assumed 5864 * lost and and this ioctl will continue as if the previous ioctl had 5865 * finished. ETIME may be returned if this ioctl times out (i.e. 5866 * ic_timout is not INFTIM). Non-stream head errors may be returned if 5867 * the ioc_error indicates that the driver/module had problems, 5868 * an EFAULT was found when accessing user data, a lack of 5869 * resources, etc. 5870 */ 5871 int 5872 strdoioctl( 5873 struct stdata *stp, 5874 struct strioctl *strioc, 5875 int fflags, /* file flags with model info */ 5876 int flag, 5877 cred_t *crp, 5878 int *rvalp) 5879 { 5880 mblk_t *bp; 5881 struct iocblk *iocbp; 5882 struct copyreq *reqp; 5883 struct copyresp *resp; 5884 int id; 5885 int transparent = 0; 5886 int error = 0; 5887 int len = 0; 5888 caddr_t taddr; 5889 int copyflag = (flag & (U_TO_K | K_TO_K)); 5890 int sigflag = (flag & STR_NOSIG); 5891 int errs; 5892 uint_t waitflags; 5893 boolean_t set_iocwaitne = B_FALSE; 5894 5895 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 5896 ASSERT((fflags & FMODELS) != 0); 5897 5898 TRACE_2(TR_FAC_STREAMS_FR, 5899 TR_STRDOIOCTL, 5900 "strdoioctl:stp %p strioc %p", stp, strioc); 5901 if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */ 5902 transparent = 1; 5903 strioc->ic_len = sizeof (intptr_t); 5904 } 5905 5906 if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz)) 5907 return (EINVAL); 5908 5909 if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error, 5910 crp, curproc->p_pid)) == NULL) 5911 return (error); 5912 5913 bzero(bp->b_wptr, sizeof (union ioctypes)); 5914 5915 iocbp = (struct iocblk *)bp->b_wptr; 5916 iocbp->ioc_count = strioc->ic_len; 5917 iocbp->ioc_cmd = strioc->ic_cmd; 5918 iocbp->ioc_flag = (fflags & FMODELS); 5919 5920 crhold(crp); 5921 iocbp->ioc_cr = crp; 5922 DB_TYPE(bp) = M_IOCTL; 5923 bp->b_wptr += sizeof (struct iocblk); 5924 5925 if (flag & STR_NOERROR) 5926 errs = STPLEX; 5927 else 5928 errs = STRHUP|STRDERR|STWRERR|STPLEX; 5929 5930 /* 5931 * If there is data to copy into ioctl block, do so. 5932 */ 5933 if (iocbp->ioc_count > 0) { 5934 if (transparent) 5935 /* 5936 * Note: STR_NOERROR does not have an effect 5937 * in putiocd() 5938 */ 5939 id = K_TO_K | sigflag; 5940 else 5941 id = flag; 5942 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) { 5943 freemsg(bp); 5944 crfree(crp); 5945 return (error); 5946 } 5947 5948 /* 5949 * We could have slept copying in user pages. 5950 * Recheck the stream head state (the other end 5951 * of a pipe could have gone away). 5952 */ 5953 if (stp->sd_flag & errs) { 5954 mutex_enter(&stp->sd_lock); 5955 error = strgeterr(stp, errs, 0); 5956 mutex_exit(&stp->sd_lock); 5957 if (error != 0) { 5958 freemsg(bp); 5959 crfree(crp); 5960 return (error); 5961 } 5962 } 5963 } 5964 if (transparent) 5965 iocbp->ioc_count = TRANSPARENT; 5966 5967 /* 5968 * Block for up to STRTIMOUT milliseconds if there is an outstanding 5969 * ioctl for this stream already running. All processes 5970 * sleeping here will be awakened as a result of an ACK 5971 * or NAK being received for the outstanding ioctl, or 5972 * as a result of the timer expiring on the outstanding 5973 * ioctl (a failure), or as a result of any waiting 5974 * process's timer expiring (also a failure). 5975 */ 5976 5977 error = 0; 5978 mutex_enter(&stp->sd_lock); 5979 while ((stp->sd_flag & IOCWAIT) || 5980 (!set_iocwaitne && (stp->sd_flag & IOCWAITNE))) { 5981 clock_t cv_rval; 5982 5983 TRACE_0(TR_FAC_STREAMS_FR, 5984 TR_STRDOIOCTL_WAIT, 5985 "strdoioctl sleeps - IOCWAIT"); 5986 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock, 5987 STRTIMOUT, sigflag); 5988 if (cv_rval <= 0) { 5989 if (cv_rval == 0) { 5990 error = EINTR; 5991 } else { 5992 if (flag & STR_NOERROR) { 5993 /* 5994 * Terminating current ioctl in 5995 * progress -- assume it got lost and 5996 * wake up the other thread so that the 5997 * operation completes. 5998 */ 5999 if (!(stp->sd_flag & IOCWAITNE)) { 6000 set_iocwaitne = B_TRUE; 6001 stp->sd_flag |= IOCWAITNE; 6002 cv_broadcast(&stp->sd_monitor); 6003 } 6004 /* 6005 * Otherwise, there's a running 6006 * STR_NOERROR -- we have no choice 6007 * here but to wait forever (or until 6008 * interrupted). 6009 */ 6010 } else { 6011 /* 6012 * pending ioctl has caused 6013 * us to time out 6014 */ 6015 error = ETIME; 6016 } 6017 } 6018 } else if ((stp->sd_flag & errs)) { 6019 error = strgeterr(stp, errs, 0); 6020 } 6021 if (error) { 6022 mutex_exit(&stp->sd_lock); 6023 freemsg(bp); 6024 crfree(crp); 6025 return (error); 6026 } 6027 } 6028 6029 /* 6030 * Have control of ioctl mechanism. 6031 * Send down ioctl packet and wait for response. 6032 */ 6033 if (stp->sd_iocblk != (mblk_t *)-1) { 6034 freemsg(stp->sd_iocblk); 6035 } 6036 stp->sd_iocblk = NULL; 6037 6038 /* 6039 * If this is marked with 'noerror' (internal; mostly 6040 * I_{P,}{UN,}LINK), then make sure nobody else is able to get 6041 * in here by setting IOCWAITNE. 6042 */ 6043 waitflags = IOCWAIT; 6044 if (flag & STR_NOERROR) 6045 waitflags |= IOCWAITNE; 6046 6047 stp->sd_flag |= waitflags; 6048 6049 /* 6050 * Assign sequence number. 6051 */ 6052 iocbp->ioc_id = stp->sd_iocid = getiocseqno(); 6053 6054 mutex_exit(&stp->sd_lock); 6055 6056 TRACE_1(TR_FAC_STREAMS_FR, 6057 TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp); 6058 stream_willservice(stp); 6059 putnext(stp->sd_wrq, bp); 6060 stream_runservice(stp); 6061 6062 /* 6063 * Timed wait for acknowledgment. The wait time is limited by the 6064 * timeout value, which must be a positive integer (number of 6065 * milliseconds) to wait, or 0 (use default value of STRTIMOUT 6066 * milliseconds), or -1 (wait forever). This will be awakened 6067 * either by an ACK/NAK message arriving, the timer expiring, or 6068 * the timer expiring on another ioctl waiting for control of the 6069 * mechanism. 6070 */ 6071 waitioc: 6072 mutex_enter(&stp->sd_lock); 6073 6074 6075 /* 6076 * If the reply has already arrived, don't sleep. If awakened from 6077 * the sleep, fail only if the reply has not arrived by then. 6078 * Otherwise, process the reply. 6079 */ 6080 while (!stp->sd_iocblk) { 6081 clock_t cv_rval; 6082 6083 if (stp->sd_flag & errs) { 6084 error = strgeterr(stp, errs, 0); 6085 if (error != 0) { 6086 stp->sd_flag &= ~waitflags; 6087 cv_broadcast(&stp->sd_iocmonitor); 6088 mutex_exit(&stp->sd_lock); 6089 crfree(crp); 6090 return (error); 6091 } 6092 } 6093 6094 TRACE_0(TR_FAC_STREAMS_FR, 6095 TR_STRDOIOCTL_WAIT2, 6096 "strdoioctl sleeps awaiting reply"); 6097 ASSERT(error == 0); 6098 6099 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, 6100 (strioc->ic_timout ? 6101 strioc->ic_timout * 1000 : STRTIMOUT), sigflag); 6102 6103 /* 6104 * There are four possible cases here: interrupt, timeout, 6105 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a 6106 * valid M_IOCTL reply). 6107 * 6108 * If we've been awakened by a STR_NOERROR ioctl on some other 6109 * thread, then sd_iocblk will still be NULL, and IOCWAITNE 6110 * will be set. Pretend as if we just timed out. Note that 6111 * this other thread waited at least STRTIMOUT before trying to 6112 * awaken our thread, so this is indistinguishable (even for 6113 * INFTIM) from the case where we failed with ETIME waiting on 6114 * IOCWAIT in the prior loop. 6115 */ 6116 if (cv_rval > 0 && !(flag & STR_NOERROR) && 6117 stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) { 6118 cv_rval = -1; 6119 } 6120 6121 /* 6122 * note: STR_NOERROR does not protect 6123 * us here.. use ic_timout < 0 6124 */ 6125 if (cv_rval <= 0) { 6126 if (cv_rval == 0) { 6127 error = EINTR; 6128 } else { 6129 error = ETIME; 6130 } 6131 /* 6132 * A message could have come in after we were scheduled 6133 * but before we were actually run. 6134 */ 6135 bp = stp->sd_iocblk; 6136 stp->sd_iocblk = NULL; 6137 if (bp != NULL) { 6138 if ((bp->b_datap->db_type == M_COPYIN) || 6139 (bp->b_datap->db_type == M_COPYOUT)) { 6140 mutex_exit(&stp->sd_lock); 6141 if (bp->b_cont) { 6142 freemsg(bp->b_cont); 6143 bp->b_cont = NULL; 6144 } 6145 bp->b_datap->db_type = M_IOCDATA; 6146 bp->b_wptr = bp->b_rptr + 6147 sizeof (struct copyresp); 6148 resp = (struct copyresp *)bp->b_rptr; 6149 resp->cp_rval = 6150 (caddr_t)1; /* failure */ 6151 stream_willservice(stp); 6152 putnext(stp->sd_wrq, bp); 6153 stream_runservice(stp); 6154 mutex_enter(&stp->sd_lock); 6155 } else { 6156 freemsg(bp); 6157 } 6158 } 6159 stp->sd_flag &= ~waitflags; 6160 cv_broadcast(&stp->sd_iocmonitor); 6161 mutex_exit(&stp->sd_lock); 6162 crfree(crp); 6163 return (error); 6164 } 6165 } 6166 bp = stp->sd_iocblk; 6167 /* 6168 * Note: it is strictly impossible to get here with sd_iocblk set to 6169 * -1. This is because the initial loop above doesn't allow any new 6170 * ioctls into the fray until all others have passed this point. 6171 */ 6172 ASSERT(bp != NULL && bp != (mblk_t *)-1); 6173 TRACE_1(TR_FAC_STREAMS_FR, 6174 TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp); 6175 if ((bp->b_datap->db_type == M_IOCACK) || 6176 (bp->b_datap->db_type == M_IOCNAK)) { 6177 /* for detection of duplicate ioctl replies */ 6178 stp->sd_iocblk = (mblk_t *)-1; 6179 stp->sd_flag &= ~waitflags; 6180 cv_broadcast(&stp->sd_iocmonitor); 6181 mutex_exit(&stp->sd_lock); 6182 } else { 6183 /* 6184 * flags not cleared here because we're still doing 6185 * copy in/out for ioctl. 6186 */ 6187 stp->sd_iocblk = NULL; 6188 mutex_exit(&stp->sd_lock); 6189 } 6190 6191 6192 /* 6193 * Have received acknowledgment. 6194 */ 6195 6196 switch (bp->b_datap->db_type) { 6197 case M_IOCACK: 6198 /* 6199 * Positive ack. 6200 */ 6201 iocbp = (struct iocblk *)bp->b_rptr; 6202 6203 /* 6204 * Set error if indicated. 6205 */ 6206 if (iocbp->ioc_error) { 6207 error = iocbp->ioc_error; 6208 break; 6209 } 6210 6211 /* 6212 * Set return value. 6213 */ 6214 *rvalp = iocbp->ioc_rval; 6215 6216 /* 6217 * Data may have been returned in ACK message (ioc_count > 0). 6218 * If so, copy it out to the user's buffer. 6219 */ 6220 if (iocbp->ioc_count && !transparent) { 6221 if (error = getiocd(bp, strioc->ic_dp, copyflag)) 6222 break; 6223 } 6224 if (!transparent) { 6225 if (len) /* an M_COPYOUT was used with I_STR */ 6226 strioc->ic_len = len; 6227 else 6228 strioc->ic_len = (int)iocbp->ioc_count; 6229 } 6230 break; 6231 6232 case M_IOCNAK: 6233 /* 6234 * Negative ack. 6235 * 6236 * The only thing to do is set error as specified 6237 * in neg ack packet. 6238 */ 6239 iocbp = (struct iocblk *)bp->b_rptr; 6240 6241 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL); 6242 break; 6243 6244 case M_COPYIN: 6245 /* 6246 * Driver or module has requested user ioctl data. 6247 */ 6248 reqp = (struct copyreq *)bp->b_rptr; 6249 6250 /* 6251 * M_COPYIN should *never* have a message attached, though 6252 * it's harmless if it does -- thus, panic on a DEBUG 6253 * kernel and just free it on a non-DEBUG build. 6254 */ 6255 ASSERT(bp->b_cont == NULL); 6256 if (bp->b_cont != NULL) { 6257 freemsg(bp->b_cont); 6258 bp->b_cont = NULL; 6259 } 6260 6261 error = putiocd(bp, reqp->cq_addr, flag, crp); 6262 if (error && bp->b_cont) { 6263 freemsg(bp->b_cont); 6264 bp->b_cont = NULL; 6265 } 6266 6267 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6268 bp->b_datap->db_type = M_IOCDATA; 6269 6270 mblk_setcred(bp, crp, curproc->p_pid); 6271 resp = (struct copyresp *)bp->b_rptr; 6272 resp->cp_rval = (caddr_t)(uintptr_t)error; 6273 resp->cp_flag = (fflags & FMODELS); 6274 6275 stream_willservice(stp); 6276 putnext(stp->sd_wrq, bp); 6277 stream_runservice(stp); 6278 6279 if (error) { 6280 mutex_enter(&stp->sd_lock); 6281 stp->sd_flag &= ~waitflags; 6282 cv_broadcast(&stp->sd_iocmonitor); 6283 mutex_exit(&stp->sd_lock); 6284 crfree(crp); 6285 return (error); 6286 } 6287 6288 goto waitioc; 6289 6290 case M_COPYOUT: 6291 /* 6292 * Driver or module has ioctl data for a user. 6293 */ 6294 reqp = (struct copyreq *)bp->b_rptr; 6295 ASSERT(bp->b_cont != NULL); 6296 6297 /* 6298 * Always (transparent or non-transparent ) 6299 * use the address specified in the request 6300 */ 6301 taddr = reqp->cq_addr; 6302 if (!transparent) 6303 len = (int)reqp->cq_size; 6304 6305 /* copyout data to the provided address */ 6306 error = getiocd(bp, taddr, copyflag); 6307 6308 freemsg(bp->b_cont); 6309 bp->b_cont = NULL; 6310 6311 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6312 bp->b_datap->db_type = M_IOCDATA; 6313 6314 mblk_setcred(bp, crp, curproc->p_pid); 6315 resp = (struct copyresp *)bp->b_rptr; 6316 resp->cp_rval = (caddr_t)(uintptr_t)error; 6317 resp->cp_flag = (fflags & FMODELS); 6318 6319 stream_willservice(stp); 6320 putnext(stp->sd_wrq, bp); 6321 stream_runservice(stp); 6322 6323 if (error) { 6324 mutex_enter(&stp->sd_lock); 6325 stp->sd_flag &= ~waitflags; 6326 cv_broadcast(&stp->sd_iocmonitor); 6327 mutex_exit(&stp->sd_lock); 6328 crfree(crp); 6329 return (error); 6330 } 6331 goto waitioc; 6332 6333 default: 6334 ASSERT(0); 6335 mutex_enter(&stp->sd_lock); 6336 stp->sd_flag &= ~waitflags; 6337 cv_broadcast(&stp->sd_iocmonitor); 6338 mutex_exit(&stp->sd_lock); 6339 break; 6340 } 6341 6342 freemsg(bp); 6343 crfree(crp); 6344 return (error); 6345 } 6346 6347 /* 6348 * Send an M_CMD message downstream and wait for a reply. This is a ptools 6349 * special used to retrieve information from modules/drivers a stream without 6350 * being subjected to flow control or interfering with pending messages on the 6351 * stream (e.g. an ioctl in flight). 6352 */ 6353 int 6354 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp) 6355 { 6356 mblk_t *mp; 6357 struct cmdblk *cmdp; 6358 int error = 0; 6359 int errs = STRHUP|STRDERR|STWRERR|STPLEX; 6360 clock_t rval, timeout = STRTIMOUT; 6361 6362 if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) || 6363 scp->sc_timeout < -1) 6364 return (EINVAL); 6365 6366 if (scp->sc_timeout > 0) 6367 timeout = scp->sc_timeout * MILLISEC; 6368 6369 if ((mp = allocb_cred(sizeof (struct cmdblk), crp, 6370 curproc->p_pid)) == NULL) 6371 return (ENOMEM); 6372 6373 crhold(crp); 6374 6375 cmdp = (struct cmdblk *)mp->b_wptr; 6376 cmdp->cb_cr = crp; 6377 cmdp->cb_cmd = scp->sc_cmd; 6378 cmdp->cb_len = scp->sc_len; 6379 cmdp->cb_error = 0; 6380 mp->b_wptr += sizeof (struct cmdblk); 6381 6382 DB_TYPE(mp) = M_CMD; 6383 DB_CPID(mp) = curproc->p_pid; 6384 6385 /* 6386 * Copy in the payload. 6387 */ 6388 if (cmdp->cb_len > 0) { 6389 mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp, 6390 curproc->p_pid); 6391 if (mp->b_cont == NULL) { 6392 error = ENOMEM; 6393 goto out; 6394 } 6395 6396 /* cb_len comes from sc_len, which has already been checked */ 6397 ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf)); 6398 (void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len); 6399 mp->b_cont->b_wptr += cmdp->cb_len; 6400 DB_CPID(mp->b_cont) = curproc->p_pid; 6401 } 6402 6403 /* 6404 * Since this mechanism is strictly for ptools, and since only one 6405 * process can be grabbed at a time, we simply fail if there's 6406 * currently an operation pending. 6407 */ 6408 mutex_enter(&stp->sd_lock); 6409 if (stp->sd_flag & STRCMDWAIT) { 6410 mutex_exit(&stp->sd_lock); 6411 error = EBUSY; 6412 goto out; 6413 } 6414 stp->sd_flag |= STRCMDWAIT; 6415 ASSERT(stp->sd_cmdblk == NULL); 6416 mutex_exit(&stp->sd_lock); 6417 6418 putnext(stp->sd_wrq, mp); 6419 mp = NULL; 6420 6421 /* 6422 * Timed wait for acknowledgment. If the reply has already arrived, 6423 * don't sleep. If awakened from the sleep, fail only if the reply 6424 * has not arrived by then. Otherwise, process the reply. 6425 */ 6426 mutex_enter(&stp->sd_lock); 6427 while (stp->sd_cmdblk == NULL) { 6428 if (stp->sd_flag & errs) { 6429 if ((error = strgeterr(stp, errs, 0)) != 0) 6430 goto waitout; 6431 } 6432 6433 rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0); 6434 if (stp->sd_cmdblk != NULL) 6435 break; 6436 6437 if (rval <= 0) { 6438 error = (rval == 0) ? EINTR : ETIME; 6439 goto waitout; 6440 } 6441 } 6442 6443 /* 6444 * We received a reply. 6445 */ 6446 mp = stp->sd_cmdblk; 6447 stp->sd_cmdblk = NULL; 6448 ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD); 6449 ASSERT(stp->sd_flag & STRCMDWAIT); 6450 stp->sd_flag &= ~STRCMDWAIT; 6451 mutex_exit(&stp->sd_lock); 6452 6453 cmdp = (struct cmdblk *)mp->b_rptr; 6454 if ((error = cmdp->cb_error) != 0) 6455 goto out; 6456 6457 /* 6458 * Data may have been returned in the reply (cb_len > 0). 6459 * If so, copy it out to the user's buffer. 6460 */ 6461 if (cmdp->cb_len > 0) { 6462 if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) { 6463 error = EPROTO; 6464 goto out; 6465 } 6466 6467 cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf)); 6468 (void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len); 6469 } 6470 scp->sc_len = cmdp->cb_len; 6471 out: 6472 freemsg(mp); 6473 crfree(crp); 6474 return (error); 6475 waitout: 6476 ASSERT(stp->sd_cmdblk == NULL); 6477 stp->sd_flag &= ~STRCMDWAIT; 6478 mutex_exit(&stp->sd_lock); 6479 crfree(crp); 6480 return (error); 6481 } 6482 6483 /* 6484 * For the SunOS keyboard driver. 6485 * Return the next available "ioctl" sequence number. 6486 * Exported, so that streams modules can send "ioctl" messages 6487 * downstream from their open routine. 6488 */ 6489 int 6490 getiocseqno(void) 6491 { 6492 int i; 6493 6494 mutex_enter(&strresources); 6495 i = ++ioc_id; 6496 mutex_exit(&strresources); 6497 return (i); 6498 } 6499 6500 /* 6501 * Get the next message from the read queue. If the message is 6502 * priority, STRPRI will have been set by strrput(). This flag 6503 * should be reset only when the entire message at the front of the 6504 * queue as been consumed. 6505 * 6506 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 6507 */ 6508 int 6509 strgetmsg( 6510 struct vnode *vp, 6511 struct strbuf *mctl, 6512 struct strbuf *mdata, 6513 unsigned char *prip, 6514 int *flagsp, 6515 int fmode, 6516 rval_t *rvp) 6517 { 6518 struct stdata *stp; 6519 mblk_t *bp, *nbp; 6520 mblk_t *savemp = NULL; 6521 mblk_t *savemptail = NULL; 6522 uint_t old_sd_flag; 6523 int flg; 6524 int more = 0; 6525 int error = 0; 6526 char first = 1; 6527 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 6528 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 6529 unsigned char pri = 0; 6530 queue_t *q; 6531 int pr = 0; /* Partial read successful */ 6532 struct uio uios; 6533 struct uio *uiop = &uios; 6534 struct iovec iovs; 6535 unsigned char type; 6536 6537 TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER, 6538 "strgetmsg:%p", vp); 6539 6540 ASSERT(vp->v_stream); 6541 stp = vp->v_stream; 6542 rvp->r_val1 = 0; 6543 6544 mutex_enter(&stp->sd_lock); 6545 6546 if ((error = i_straccess(stp, JCREAD)) != 0) { 6547 mutex_exit(&stp->sd_lock); 6548 return (error); 6549 } 6550 6551 if (stp->sd_flag & (STRDERR|STPLEX)) { 6552 error = strgeterr(stp, STRDERR|STPLEX, 0); 6553 if (error != 0) { 6554 mutex_exit(&stp->sd_lock); 6555 return (error); 6556 } 6557 } 6558 mutex_exit(&stp->sd_lock); 6559 6560 switch (*flagsp) { 6561 case MSG_HIPRI: 6562 if (*prip != 0) 6563 return (EINVAL); 6564 break; 6565 6566 case MSG_ANY: 6567 case MSG_BAND: 6568 break; 6569 6570 default: 6571 return (EINVAL); 6572 } 6573 /* 6574 * Setup uio and iov for data part 6575 */ 6576 iovs.iov_base = mdata->buf; 6577 iovs.iov_len = mdata->maxlen; 6578 uios.uio_iov = &iovs; 6579 uios.uio_iovcnt = 1; 6580 uios.uio_loffset = 0; 6581 uios.uio_segflg = UIO_USERSPACE; 6582 uios.uio_fmode = 0; 6583 uios.uio_extflg = UIO_COPY_CACHED; 6584 uios.uio_resid = mdata->maxlen; 6585 uios.uio_offset = 0; 6586 6587 q = _RD(stp->sd_wrq); 6588 mutex_enter(&stp->sd_lock); 6589 old_sd_flag = stp->sd_flag; 6590 mark = 0; 6591 for (;;) { 6592 int done = 0; 6593 mblk_t *q_first = q->q_first; 6594 6595 /* 6596 * Get the next message of appropriate priority 6597 * from the stream head. If the caller is interested 6598 * in band or hipri messages, then they should already 6599 * be enqueued at the stream head. On the other hand 6600 * if the caller wants normal (band 0) messages, they 6601 * might be deferred in a synchronous stream and they 6602 * will need to be pulled up. 6603 * 6604 * After we have dequeued a message, we might find that 6605 * it was a deferred M_SIG that was enqueued at the 6606 * stream head. It must now be posted as part of the 6607 * read by calling strsignal_nolock(). 6608 * 6609 * Also note that strrput does not enqueue an M_PCSIG, 6610 * and there cannot be more than one hipri message, 6611 * so there was no need to have the M_PCSIG case. 6612 * 6613 * At some time it might be nice to try and wrap the 6614 * functionality of kstrgetmsg() and strgetmsg() into 6615 * a common routine so to reduce the amount of replicated 6616 * code (since they are extremely similar). 6617 */ 6618 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) { 6619 /* Asking for normal, band0 data */ 6620 bp = strget(stp, q, uiop, first, &error); 6621 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6622 if (bp != NULL) { 6623 if (DB_TYPE(bp) == M_SIG) { 6624 strsignal_nolock(stp, *bp->b_rptr, 6625 bp->b_band); 6626 freemsg(bp); 6627 continue; 6628 } else { 6629 break; 6630 } 6631 } 6632 if (error != 0) 6633 goto getmout; 6634 6635 /* 6636 * We can't depend on the value of STRPRI here because 6637 * the stream head may be in transit. Therefore, we 6638 * must look at the type of the first message to 6639 * determine if a high priority messages is waiting 6640 */ 6641 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL && 6642 DB_TYPE(q_first) >= QPCTL && 6643 (bp = getq_noenab(q, 0)) != NULL) { 6644 /* Asked for HIPRI and got one */ 6645 ASSERT(DB_TYPE(bp) >= QPCTL); 6646 break; 6647 } else if ((*flagsp & MSG_BAND) && q_first != NULL && 6648 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) && 6649 (bp = getq_noenab(q, 0)) != NULL) { 6650 /* 6651 * Asked for at least band "prip" and got either at 6652 * least that band or a hipri message. 6653 */ 6654 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL); 6655 if (DB_TYPE(bp) == M_SIG) { 6656 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 6657 freemsg(bp); 6658 continue; 6659 } else { 6660 break; 6661 } 6662 } 6663 6664 /* No data. Time to sleep? */ 6665 qbackenable(q, 0); 6666 6667 /* 6668 * If STRHUP or STREOF, return 0 length control and data. 6669 * If resid is 0, then a read(fd,buf,0) was done. Do not 6670 * sleep to satisfy this request because by default we have 6671 * zero bytes to return. 6672 */ 6673 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 && 6674 mdata->maxlen == 0)) { 6675 mctl->len = mdata->len = 0; 6676 *flagsp = 0; 6677 mutex_exit(&stp->sd_lock); 6678 return (0); 6679 } 6680 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT, 6681 "strgetmsg calls strwaitq:%p, %p", 6682 vp, uiop); 6683 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1, 6684 &done)) != 0) || done) { 6685 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE, 6686 "strgetmsg error or done:%p, %p", 6687 vp, uiop); 6688 mutex_exit(&stp->sd_lock); 6689 return (error); 6690 } 6691 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE, 6692 "strgetmsg awakes:%p, %p", vp, uiop); 6693 if ((error = i_straccess(stp, JCREAD)) != 0) { 6694 mutex_exit(&stp->sd_lock); 6695 return (error); 6696 } 6697 first = 0; 6698 } 6699 ASSERT(bp != NULL); 6700 /* 6701 * Extract any mark information. If the message is not completely 6702 * consumed this information will be put in the mblk 6703 * that is putback. 6704 * If MSGMARKNEXT is set and the message is completely consumed 6705 * the STRATMARK flag will be set below. Likewise, if 6706 * MSGNOTMARKNEXT is set and the message is 6707 * completely consumed STRNOTATMARK will be set. 6708 */ 6709 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 6710 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 6711 (MSGMARKNEXT|MSGNOTMARKNEXT)); 6712 if (mark != 0 && bp == stp->sd_mark) { 6713 mark |= _LASTMARK; 6714 stp->sd_mark = NULL; 6715 } 6716 /* 6717 * keep track of the original message type and priority 6718 */ 6719 pri = bp->b_band; 6720 type = bp->b_datap->db_type; 6721 if (type == M_PASSFP) { 6722 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 6723 stp->sd_mark = bp; 6724 bp->b_flag |= mark & ~_LASTMARK; 6725 putback(stp, q, bp, pri); 6726 qbackenable(q, pri); 6727 mutex_exit(&stp->sd_lock); 6728 return (EBADMSG); 6729 } 6730 ASSERT(type != M_SIG); 6731 6732 /* 6733 * Set this flag so strrput will not generate signals. Need to 6734 * make sure this flag is cleared before leaving this routine 6735 * else signals will stop being sent. 6736 */ 6737 stp->sd_flag |= STRGETINPROG; 6738 mutex_exit(&stp->sd_lock); 6739 6740 if (STREAM_NEEDSERVICE(stp)) 6741 stream_runservice(stp); 6742 6743 /* 6744 * Set HIPRI flag if message is priority. 6745 */ 6746 if (type >= QPCTL) 6747 flg = MSG_HIPRI; 6748 else 6749 flg = MSG_BAND; 6750 6751 /* 6752 * First process PROTO or PCPROTO blocks, if any. 6753 */ 6754 if (mctl->maxlen >= 0 && type != M_DATA) { 6755 size_t n, bcnt; 6756 char *ubuf; 6757 6758 bcnt = mctl->maxlen; 6759 ubuf = mctl->buf; 6760 while (bp != NULL && bp->b_datap->db_type != M_DATA) { 6761 if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 && 6762 copyout(bp->b_rptr, ubuf, n)) { 6763 error = EFAULT; 6764 mutex_enter(&stp->sd_lock); 6765 /* 6766 * clear stream head pri flag based on 6767 * first message type 6768 */ 6769 if (type >= QPCTL) { 6770 ASSERT(type == M_PCPROTO); 6771 stp->sd_flag &= ~STRPRI; 6772 } 6773 more = 0; 6774 freemsg(bp); 6775 goto getmout; 6776 } 6777 ubuf += n; 6778 bp->b_rptr += n; 6779 if (bp->b_rptr >= bp->b_wptr) { 6780 nbp = bp; 6781 bp = bp->b_cont; 6782 freeb(nbp); 6783 } 6784 ASSERT(n <= bcnt); 6785 bcnt -= n; 6786 if (bcnt == 0) 6787 break; 6788 } 6789 mctl->len = mctl->maxlen - bcnt; 6790 } else 6791 mctl->len = -1; 6792 6793 if (bp && bp->b_datap->db_type != M_DATA) { 6794 /* 6795 * More PROTO blocks in msg. 6796 */ 6797 more |= MORECTL; 6798 savemp = bp; 6799 while (bp && bp->b_datap->db_type != M_DATA) { 6800 savemptail = bp; 6801 bp = bp->b_cont; 6802 } 6803 savemptail->b_cont = NULL; 6804 } 6805 6806 /* 6807 * Now process DATA blocks, if any. 6808 */ 6809 if (mdata->maxlen >= 0 && bp) { 6810 /* 6811 * struiocopyout will consume a potential zero-length 6812 * M_DATA even if uio_resid is zero. 6813 */ 6814 size_t oldresid = uiop->uio_resid; 6815 6816 bp = struiocopyout(bp, uiop, &error); 6817 if (error != 0) { 6818 mutex_enter(&stp->sd_lock); 6819 /* 6820 * clear stream head hi pri flag based on 6821 * first message 6822 */ 6823 if (type >= QPCTL) { 6824 ASSERT(type == M_PCPROTO); 6825 stp->sd_flag &= ~STRPRI; 6826 } 6827 more = 0; 6828 freemsg(savemp); 6829 goto getmout; 6830 } 6831 /* 6832 * (pr == 1) indicates a partial read. 6833 */ 6834 if (oldresid > uiop->uio_resid) 6835 pr = 1; 6836 mdata->len = mdata->maxlen - uiop->uio_resid; 6837 } else 6838 mdata->len = -1; 6839 6840 if (bp) { /* more data blocks in msg */ 6841 more |= MOREDATA; 6842 if (savemp) 6843 savemptail->b_cont = bp; 6844 else 6845 savemp = bp; 6846 } 6847 6848 mutex_enter(&stp->sd_lock); 6849 if (savemp) { 6850 if (pr && (savemp->b_datap->db_type == M_DATA) && 6851 msgnodata(savemp)) { 6852 /* 6853 * Avoid queuing a zero-length tail part of 6854 * a message. pr=1 indicates that we read some of 6855 * the message. 6856 */ 6857 freemsg(savemp); 6858 more &= ~MOREDATA; 6859 /* 6860 * clear stream head hi pri flag based on 6861 * first message 6862 */ 6863 if (type >= QPCTL) { 6864 ASSERT(type == M_PCPROTO); 6865 stp->sd_flag &= ~STRPRI; 6866 } 6867 } else { 6868 savemp->b_band = pri; 6869 /* 6870 * If the first message was HIPRI and the one we're 6871 * putting back isn't, then clear STRPRI, otherwise 6872 * set STRPRI again. Note that we must set STRPRI 6873 * again since the flush logic in strrput_nondata() 6874 * may have cleared it while we had sd_lock dropped. 6875 */ 6876 if (type >= QPCTL) { 6877 ASSERT(type == M_PCPROTO); 6878 if (queclass(savemp) < QPCTL) 6879 stp->sd_flag &= ~STRPRI; 6880 else 6881 stp->sd_flag |= STRPRI; 6882 } else if (queclass(savemp) >= QPCTL) { 6883 /* 6884 * The first message was not a HIPRI message, 6885 * but the one we are about to putback is. 6886 * For simplicitly, we do not allow for HIPRI 6887 * messages to be embedded in the message 6888 * body, so just force it to same type as 6889 * first message. 6890 */ 6891 ASSERT(type == M_DATA || type == M_PROTO); 6892 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 6893 savemp->b_datap->db_type = type; 6894 } 6895 if (mark != 0) { 6896 savemp->b_flag |= mark & ~_LASTMARK; 6897 if ((mark & _LASTMARK) && 6898 (stp->sd_mark == NULL)) { 6899 /* 6900 * If another marked message arrived 6901 * while sd_lock was not held sd_mark 6902 * would be non-NULL. 6903 */ 6904 stp->sd_mark = savemp; 6905 } 6906 } 6907 putback(stp, q, savemp, pri); 6908 } 6909 } else { 6910 /* 6911 * The complete message was consumed. 6912 * 6913 * If another M_PCPROTO arrived while sd_lock was not held 6914 * it would have been discarded since STRPRI was still set. 6915 * 6916 * Move the MSG*MARKNEXT information 6917 * to the stream head just in case 6918 * the read queue becomes empty. 6919 * clear stream head hi pri flag based on 6920 * first message 6921 * 6922 * If the stream head was at the mark 6923 * (STRATMARK) before we dropped sd_lock above 6924 * and some data was consumed then we have 6925 * moved past the mark thus STRATMARK is 6926 * cleared. However, if a message arrived in 6927 * strrput during the copyout above causing 6928 * STRATMARK to be set we can not clear that 6929 * flag. 6930 */ 6931 if (type >= QPCTL) { 6932 ASSERT(type == M_PCPROTO); 6933 stp->sd_flag &= ~STRPRI; 6934 } 6935 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 6936 if (mark & MSGMARKNEXT) { 6937 stp->sd_flag &= ~STRNOTATMARK; 6938 stp->sd_flag |= STRATMARK; 6939 } else if (mark & MSGNOTMARKNEXT) { 6940 stp->sd_flag &= ~STRATMARK; 6941 stp->sd_flag |= STRNOTATMARK; 6942 } else { 6943 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 6944 } 6945 } else if (pr && (old_sd_flag & STRATMARK)) { 6946 stp->sd_flag &= ~STRATMARK; 6947 } 6948 } 6949 6950 *flagsp = flg; 6951 *prip = pri; 6952 6953 /* 6954 * Getmsg cleanup processing - if the state of the queue has changed 6955 * some signals may need to be sent and/or poll awakened. 6956 */ 6957 getmout: 6958 qbackenable(q, pri); 6959 6960 /* 6961 * We dropped the stream head lock above. Send all M_SIG messages 6962 * before processing stream head for SIGPOLL messages. 6963 */ 6964 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6965 while ((bp = q->q_first) != NULL && 6966 (bp->b_datap->db_type == M_SIG)) { 6967 /* 6968 * sd_lock is held so the content of the read queue can not 6969 * change. 6970 */ 6971 bp = getq(q); 6972 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 6973 6974 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 6975 mutex_exit(&stp->sd_lock); 6976 freemsg(bp); 6977 if (STREAM_NEEDSERVICE(stp)) 6978 stream_runservice(stp); 6979 mutex_enter(&stp->sd_lock); 6980 } 6981 6982 /* 6983 * stream head cannot change while we make the determination 6984 * whether or not to send a signal. Drop the flag to allow strrput 6985 * to send firstmsgsigs again. 6986 */ 6987 stp->sd_flag &= ~STRGETINPROG; 6988 6989 /* 6990 * If the type of message at the front of the queue changed 6991 * due to the receive the appropriate signals and pollwakeup events 6992 * are generated. The type of changes are: 6993 * Processed a hipri message, q_first is not hipri. 6994 * Processed a band X message, and q_first is band Y. 6995 * The generated signals and pollwakeups are identical to what 6996 * strrput() generates should the message that is now on q_first 6997 * arrive to an empty read queue. 6998 * 6999 * Note: only strrput will send a signal for a hipri message. 7000 */ 7001 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7002 strsigset_t signals = 0; 7003 strpollset_t pollwakeups = 0; 7004 7005 if (flg & MSG_HIPRI) { 7006 /* 7007 * Removed a hipri message. Regular data at 7008 * the front of the queue. 7009 */ 7010 if (bp->b_band == 0) { 7011 signals = S_INPUT | S_RDNORM; 7012 pollwakeups = POLLIN | POLLRDNORM; 7013 } else { 7014 signals = S_INPUT | S_RDBAND; 7015 pollwakeups = POLLIN | POLLRDBAND; 7016 } 7017 } else if (pri != bp->b_band) { 7018 /* 7019 * The band is different for the new q_first. 7020 */ 7021 if (bp->b_band == 0) { 7022 signals = S_RDNORM; 7023 pollwakeups = POLLIN | POLLRDNORM; 7024 } else { 7025 signals = S_RDBAND; 7026 pollwakeups = POLLIN | POLLRDBAND; 7027 } 7028 } 7029 7030 if (pollwakeups != 0) { 7031 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7032 if (!(stp->sd_rput_opt & SR_POLLIN)) 7033 goto no_pollwake; 7034 stp->sd_rput_opt &= ~SR_POLLIN; 7035 } 7036 mutex_exit(&stp->sd_lock); 7037 pollwakeup(&stp->sd_pollist, pollwakeups); 7038 mutex_enter(&stp->sd_lock); 7039 } 7040 no_pollwake: 7041 7042 if (stp->sd_sigflags & signals) 7043 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7044 } 7045 mutex_exit(&stp->sd_lock); 7046 7047 rvp->r_val1 = more; 7048 return (error); 7049 #undef _LASTMARK 7050 } 7051 7052 /* 7053 * Get the next message from the read queue. If the message is 7054 * priority, STRPRI will have been set by strrput(). This flag 7055 * should be reset only when the entire message at the front of the 7056 * queue as been consumed. 7057 * 7058 * If uiop is NULL all data is returned in mctlp. 7059 * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed 7060 * not enabled. 7061 * The timeout parameter is in milliseconds; -1 for infinity. 7062 * This routine handles the consolidation private flags: 7063 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7064 * MSG_DELAYERROR Defer the error check until the queue is empty. 7065 * MSG_HOLDSIG Hold signals while waiting for data. 7066 * MSG_IPEEK Only peek at messages. 7067 * MSG_DISCARDTAIL Discard the tail M_DATA part of the message 7068 * that doesn't fit. 7069 * MSG_NOMARK If the message is marked leave it on the queue. 7070 * 7071 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 7072 */ 7073 int 7074 kstrgetmsg( 7075 struct vnode *vp, 7076 mblk_t **mctlp, 7077 struct uio *uiop, 7078 unsigned char *prip, 7079 int *flagsp, 7080 clock_t timout, 7081 rval_t *rvp) 7082 { 7083 struct stdata *stp; 7084 mblk_t *bp, *nbp; 7085 mblk_t *savemp = NULL; 7086 mblk_t *savemptail = NULL; 7087 int flags; 7088 uint_t old_sd_flag; 7089 int flg; 7090 int more = 0; 7091 int error = 0; 7092 char first = 1; 7093 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 7094 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 7095 unsigned char pri = 0; 7096 queue_t *q; 7097 int pr = 0; /* Partial read successful */ 7098 unsigned char type; 7099 7100 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER, 7101 "kstrgetmsg:%p", vp); 7102 7103 ASSERT(vp->v_stream); 7104 stp = vp->v_stream; 7105 rvp->r_val1 = 0; 7106 7107 mutex_enter(&stp->sd_lock); 7108 7109 if ((error = i_straccess(stp, JCREAD)) != 0) { 7110 mutex_exit(&stp->sd_lock); 7111 return (error); 7112 } 7113 7114 flags = *flagsp; 7115 if (stp->sd_flag & (STRDERR|STPLEX)) { 7116 if ((stp->sd_flag & STPLEX) || 7117 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) { 7118 error = strgeterr(stp, STRDERR|STPLEX, 7119 (flags & MSG_IPEEK)); 7120 if (error != 0) { 7121 mutex_exit(&stp->sd_lock); 7122 return (error); 7123 } 7124 } 7125 } 7126 mutex_exit(&stp->sd_lock); 7127 7128 switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) { 7129 case MSG_HIPRI: 7130 if (*prip != 0) 7131 return (EINVAL); 7132 break; 7133 7134 case MSG_ANY: 7135 case MSG_BAND: 7136 break; 7137 7138 default: 7139 return (EINVAL); 7140 } 7141 7142 retry: 7143 q = _RD(stp->sd_wrq); 7144 mutex_enter(&stp->sd_lock); 7145 old_sd_flag = stp->sd_flag; 7146 mark = 0; 7147 for (;;) { 7148 int done = 0; 7149 int waitflag; 7150 int fmode; 7151 mblk_t *q_first = q->q_first; 7152 7153 /* 7154 * This section of the code operates just like the code 7155 * in strgetmsg(). There is a comment there about what 7156 * is going on here. 7157 */ 7158 if (!(flags & (MSG_HIPRI|MSG_BAND))) { 7159 /* Asking for normal, band0 data */ 7160 bp = strget(stp, q, uiop, first, &error); 7161 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7162 if (bp != NULL) { 7163 if (DB_TYPE(bp) == M_SIG) { 7164 strsignal_nolock(stp, *bp->b_rptr, 7165 bp->b_band); 7166 freemsg(bp); 7167 continue; 7168 } else { 7169 break; 7170 } 7171 } 7172 if (error != 0) { 7173 goto getmout; 7174 } 7175 /* 7176 * We can't depend on the value of STRPRI here because 7177 * the stream head may be in transit. Therefore, we 7178 * must look at the type of the first message to 7179 * determine if a high priority messages is waiting 7180 */ 7181 } else if ((flags & MSG_HIPRI) && q_first != NULL && 7182 DB_TYPE(q_first) >= QPCTL && 7183 (bp = getq_noenab(q, 0)) != NULL) { 7184 ASSERT(DB_TYPE(bp) >= QPCTL); 7185 break; 7186 } else if ((flags & MSG_BAND) && q_first != NULL && 7187 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) && 7188 (bp = getq_noenab(q, 0)) != NULL) { 7189 /* 7190 * Asked for at least band "prip" and got either at 7191 * least that band or a hipri message. 7192 */ 7193 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL); 7194 if (DB_TYPE(bp) == M_SIG) { 7195 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7196 freemsg(bp); 7197 continue; 7198 } else { 7199 break; 7200 } 7201 } 7202 7203 /* No data. Time to sleep? */ 7204 qbackenable(q, 0); 7205 7206 /* 7207 * Delayed error notification? 7208 */ 7209 if ((stp->sd_flag & (STRDERR|STPLEX)) && 7210 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) { 7211 error = strgeterr(stp, STRDERR|STPLEX, 7212 (flags & MSG_IPEEK)); 7213 if (error != 0) { 7214 mutex_exit(&stp->sd_lock); 7215 return (error); 7216 } 7217 } 7218 7219 /* 7220 * If STRHUP or STREOF, return 0 length control and data. 7221 * If a read(fd,buf,0) has been done, do not sleep, just 7222 * return. 7223 * 7224 * If mctlp == NULL and uiop == NULL, then the code will 7225 * do the strwaitq. This is an understood way of saying 7226 * sleep "polling" until a message is received. 7227 */ 7228 if ((stp->sd_flag & (STRHUP|STREOF)) || 7229 (uiop != NULL && uiop->uio_resid == 0)) { 7230 if (mctlp != NULL) 7231 *mctlp = NULL; 7232 *flagsp = 0; 7233 mutex_exit(&stp->sd_lock); 7234 return (0); 7235 } 7236 7237 waitflag = GETWAIT; 7238 if (flags & 7239 (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) { 7240 if (flags & MSG_HOLDSIG) 7241 waitflag |= STR_NOSIG; 7242 if (flags & MSG_IGNERROR) 7243 waitflag |= STR_NOERROR; 7244 if (flags & MSG_IPEEK) 7245 waitflag |= STR_PEEK; 7246 if (flags & MSG_DELAYERROR) 7247 waitflag |= STR_DELAYERR; 7248 } 7249 if (uiop != NULL) 7250 fmode = uiop->uio_fmode; 7251 else 7252 fmode = 0; 7253 7254 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT, 7255 "kstrgetmsg calls strwaitq:%p, %p", 7256 vp, uiop); 7257 if (((error = strwaitq(stp, waitflag, (ssize_t)0, 7258 fmode, timout, &done))) != 0 || done) { 7259 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE, 7260 "kstrgetmsg error or done:%p, %p", 7261 vp, uiop); 7262 mutex_exit(&stp->sd_lock); 7263 return (error); 7264 } 7265 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE, 7266 "kstrgetmsg awakes:%p, %p", vp, uiop); 7267 if ((error = i_straccess(stp, JCREAD)) != 0) { 7268 mutex_exit(&stp->sd_lock); 7269 return (error); 7270 } 7271 first = 0; 7272 } 7273 ASSERT(bp != NULL); 7274 /* 7275 * Extract any mark information. If the message is not completely 7276 * consumed this information will be put in the mblk 7277 * that is putback. 7278 * If MSGMARKNEXT is set and the message is completely consumed 7279 * the STRATMARK flag will be set below. Likewise, if 7280 * MSGNOTMARKNEXT is set and the message is 7281 * completely consumed STRNOTATMARK will be set. 7282 */ 7283 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 7284 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 7285 (MSGMARKNEXT|MSGNOTMARKNEXT)); 7286 pri = bp->b_band; 7287 if (mark != 0) { 7288 /* 7289 * If the caller doesn't want the mark return. 7290 * Used to implement MSG_WAITALL in sockets. 7291 */ 7292 if (flags & MSG_NOMARK) { 7293 putback(stp, q, bp, pri); 7294 qbackenable(q, pri); 7295 mutex_exit(&stp->sd_lock); 7296 return (EWOULDBLOCK); 7297 } 7298 if (bp == stp->sd_mark) { 7299 mark |= _LASTMARK; 7300 stp->sd_mark = NULL; 7301 } 7302 } 7303 7304 /* 7305 * keep track of the first message type 7306 */ 7307 type = bp->b_datap->db_type; 7308 7309 if (bp->b_datap->db_type == M_PASSFP) { 7310 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7311 stp->sd_mark = bp; 7312 bp->b_flag |= mark & ~_LASTMARK; 7313 putback(stp, q, bp, pri); 7314 qbackenable(q, pri); 7315 mutex_exit(&stp->sd_lock); 7316 return (EBADMSG); 7317 } 7318 ASSERT(type != M_SIG); 7319 7320 if (flags & MSG_IPEEK) { 7321 /* 7322 * Clear any struioflag - we do the uiomove over again 7323 * when peeking since it simplifies the code. 7324 * 7325 * Dup the message and put the original back on the queue. 7326 * If dupmsg() fails, try again with copymsg() to see if 7327 * there is indeed a shortage of memory. dupmsg() may fail 7328 * if db_ref in any of the messages reaches its limit. 7329 */ 7330 7331 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) { 7332 /* 7333 * Restore the state of the stream head since we 7334 * need to drop sd_lock (strwaitbuf is sleeping). 7335 */ 7336 size_t size = msgdsize(bp); 7337 7338 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7339 stp->sd_mark = bp; 7340 bp->b_flag |= mark & ~_LASTMARK; 7341 putback(stp, q, bp, pri); 7342 mutex_exit(&stp->sd_lock); 7343 error = strwaitbuf(size, BPRI_HI); 7344 if (error) { 7345 /* 7346 * There is no net change to the queue thus 7347 * no need to qbackenable. 7348 */ 7349 return (error); 7350 } 7351 goto retry; 7352 } 7353 7354 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7355 stp->sd_mark = bp; 7356 bp->b_flag |= mark & ~_LASTMARK; 7357 putback(stp, q, bp, pri); 7358 bp = nbp; 7359 } 7360 7361 /* 7362 * Set this flag so strrput will not generate signals. Need to 7363 * make sure this flag is cleared before leaving this routine 7364 * else signals will stop being sent. 7365 */ 7366 stp->sd_flag |= STRGETINPROG; 7367 mutex_exit(&stp->sd_lock); 7368 7369 if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) { 7370 mblk_t *tmp, *prevmp; 7371 7372 /* 7373 * Put first non-data mblk back to stream head and 7374 * cut the mblk chain so sd_rputdatafunc only sees 7375 * M_DATA mblks. We can skip the first mblk since it 7376 * is M_DATA according to the condition above. 7377 */ 7378 for (prevmp = bp, tmp = bp->b_cont; tmp != NULL; 7379 prevmp = tmp, tmp = tmp->b_cont) { 7380 if (DB_TYPE(tmp) != M_DATA) { 7381 prevmp->b_cont = NULL; 7382 mutex_enter(&stp->sd_lock); 7383 putback(stp, q, tmp, tmp->b_band); 7384 mutex_exit(&stp->sd_lock); 7385 break; 7386 } 7387 } 7388 7389 bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp, 7390 NULL, NULL, NULL, NULL); 7391 7392 if (bp == NULL) 7393 goto retry; 7394 } 7395 7396 if (STREAM_NEEDSERVICE(stp)) 7397 stream_runservice(stp); 7398 7399 /* 7400 * Set HIPRI flag if message is priority. 7401 */ 7402 if (type >= QPCTL) 7403 flg = MSG_HIPRI; 7404 else 7405 flg = MSG_BAND; 7406 7407 /* 7408 * First process PROTO or PCPROTO blocks, if any. 7409 */ 7410 if (mctlp != NULL && type != M_DATA) { 7411 mblk_t *nbp; 7412 7413 *mctlp = bp; 7414 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA) 7415 bp = bp->b_cont; 7416 nbp = bp->b_cont; 7417 bp->b_cont = NULL; 7418 bp = nbp; 7419 } 7420 7421 if (bp && bp->b_datap->db_type != M_DATA) { 7422 /* 7423 * More PROTO blocks in msg. Will only happen if mctlp is NULL. 7424 */ 7425 more |= MORECTL; 7426 savemp = bp; 7427 while (bp && bp->b_datap->db_type != M_DATA) { 7428 savemptail = bp; 7429 bp = bp->b_cont; 7430 } 7431 savemptail->b_cont = NULL; 7432 } 7433 7434 /* 7435 * Now process DATA blocks, if any. 7436 */ 7437 if (uiop == NULL) { 7438 /* Append data to tail of mctlp */ 7439 7440 if (mctlp != NULL) { 7441 mblk_t **mpp = mctlp; 7442 7443 while (*mpp != NULL) 7444 mpp = &((*mpp)->b_cont); 7445 *mpp = bp; 7446 bp = NULL; 7447 } 7448 } else if (uiop->uio_resid >= 0 && bp) { 7449 size_t oldresid = uiop->uio_resid; 7450 7451 /* 7452 * If a streams message is likely to consist 7453 * of many small mblks, it is pulled up into 7454 * one continuous chunk of memory. 7455 * The size of the first mblk may be bogus because 7456 * successive read() calls on the socket reduce 7457 * the size of this mblk until it is exhausted 7458 * and then the code walks on to the next. Thus 7459 * the size of the mblk may not be the original size 7460 * that was passed up, it's simply a remainder 7461 * and hence can be very small without any 7462 * implication that the packet is badly fragmented. 7463 * So the size of the possible second mblk is 7464 * used to spot a badly fragmented packet. 7465 * see longer comment at top of page 7466 * by mblk_pull_len declaration. 7467 */ 7468 7469 if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) { 7470 (void) pullupmsg(bp, -1); 7471 } 7472 7473 bp = struiocopyout(bp, uiop, &error); 7474 if (error != 0) { 7475 if (mctlp != NULL) { 7476 freemsg(*mctlp); 7477 *mctlp = NULL; 7478 } else 7479 freemsg(savemp); 7480 mutex_enter(&stp->sd_lock); 7481 /* 7482 * clear stream head hi pri flag based on 7483 * first message 7484 */ 7485 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7486 ASSERT(type == M_PCPROTO); 7487 stp->sd_flag &= ~STRPRI; 7488 } 7489 more = 0; 7490 goto getmout; 7491 } 7492 /* 7493 * (pr == 1) indicates a partial read. 7494 */ 7495 if (oldresid > uiop->uio_resid) 7496 pr = 1; 7497 } 7498 7499 if (bp) { /* more data blocks in msg */ 7500 more |= MOREDATA; 7501 if (savemp) 7502 savemptail->b_cont = bp; 7503 else 7504 savemp = bp; 7505 } 7506 7507 mutex_enter(&stp->sd_lock); 7508 if (savemp) { 7509 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) { 7510 /* 7511 * When MSG_DISCARDTAIL is set or 7512 * when peeking discard any tail. When peeking this 7513 * is the tail of the dup that was copied out - the 7514 * message has already been putback on the queue. 7515 * Return MOREDATA to the caller even though the data 7516 * is discarded. This is used by sockets (to 7517 * set MSG_TRUNC). 7518 */ 7519 freemsg(savemp); 7520 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7521 ASSERT(type == M_PCPROTO); 7522 stp->sd_flag &= ~STRPRI; 7523 } 7524 } else if (pr && (savemp->b_datap->db_type == M_DATA) && 7525 msgnodata(savemp)) { 7526 /* 7527 * Avoid queuing a zero-length tail part of 7528 * a message. pr=1 indicates that we read some of 7529 * the message. 7530 */ 7531 freemsg(savemp); 7532 more &= ~MOREDATA; 7533 if (type >= QPCTL) { 7534 ASSERT(type == M_PCPROTO); 7535 stp->sd_flag &= ~STRPRI; 7536 } 7537 } else { 7538 savemp->b_band = pri; 7539 /* 7540 * If the first message was HIPRI and the one we're 7541 * putting back isn't, then clear STRPRI, otherwise 7542 * set STRPRI again. Note that we must set STRPRI 7543 * again since the flush logic in strrput_nondata() 7544 * may have cleared it while we had sd_lock dropped. 7545 */ 7546 7547 if (type >= QPCTL) { 7548 ASSERT(type == M_PCPROTO); 7549 if (queclass(savemp) < QPCTL) 7550 stp->sd_flag &= ~STRPRI; 7551 else 7552 stp->sd_flag |= STRPRI; 7553 } else if (queclass(savemp) >= QPCTL) { 7554 /* 7555 * The first message was not a HIPRI message, 7556 * but the one we are about to putback is. 7557 * For simplicitly, we do not allow for HIPRI 7558 * messages to be embedded in the message 7559 * body, so just force it to same type as 7560 * first message. 7561 */ 7562 ASSERT(type == M_DATA || type == M_PROTO); 7563 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 7564 savemp->b_datap->db_type = type; 7565 } 7566 if (mark != 0) { 7567 if ((mark & _LASTMARK) && 7568 (stp->sd_mark == NULL)) { 7569 /* 7570 * If another marked message arrived 7571 * while sd_lock was not held sd_mark 7572 * would be non-NULL. 7573 */ 7574 stp->sd_mark = savemp; 7575 } 7576 savemp->b_flag |= mark & ~_LASTMARK; 7577 } 7578 putback(stp, q, savemp, pri); 7579 } 7580 } else if (!(flags & MSG_IPEEK)) { 7581 /* 7582 * The complete message was consumed. 7583 * 7584 * If another M_PCPROTO arrived while sd_lock was not held 7585 * it would have been discarded since STRPRI was still set. 7586 * 7587 * Move the MSG*MARKNEXT information 7588 * to the stream head just in case 7589 * the read queue becomes empty. 7590 * clear stream head hi pri flag based on 7591 * first message 7592 * 7593 * If the stream head was at the mark 7594 * (STRATMARK) before we dropped sd_lock above 7595 * and some data was consumed then we have 7596 * moved past the mark thus STRATMARK is 7597 * cleared. However, if a message arrived in 7598 * strrput during the copyout above causing 7599 * STRATMARK to be set we can not clear that 7600 * flag. 7601 * XXX A "perimeter" would help by single-threading strrput, 7602 * strread, strgetmsg and kstrgetmsg. 7603 */ 7604 if (type >= QPCTL) { 7605 ASSERT(type == M_PCPROTO); 7606 stp->sd_flag &= ~STRPRI; 7607 } 7608 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 7609 if (mark & MSGMARKNEXT) { 7610 stp->sd_flag &= ~STRNOTATMARK; 7611 stp->sd_flag |= STRATMARK; 7612 } else if (mark & MSGNOTMARKNEXT) { 7613 stp->sd_flag &= ~STRATMARK; 7614 stp->sd_flag |= STRNOTATMARK; 7615 } else { 7616 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 7617 } 7618 } else if (pr && (old_sd_flag & STRATMARK)) { 7619 stp->sd_flag &= ~STRATMARK; 7620 } 7621 } 7622 7623 *flagsp = flg; 7624 *prip = pri; 7625 7626 /* 7627 * Getmsg cleanup processing - if the state of the queue has changed 7628 * some signals may need to be sent and/or poll awakened. 7629 */ 7630 getmout: 7631 qbackenable(q, pri); 7632 7633 /* 7634 * We dropped the stream head lock above. Send all M_SIG messages 7635 * before processing stream head for SIGPOLL messages. 7636 */ 7637 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7638 while ((bp = q->q_first) != NULL && 7639 (bp->b_datap->db_type == M_SIG)) { 7640 /* 7641 * sd_lock is held so the content of the read queue can not 7642 * change. 7643 */ 7644 bp = getq(q); 7645 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 7646 7647 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7648 mutex_exit(&stp->sd_lock); 7649 freemsg(bp); 7650 if (STREAM_NEEDSERVICE(stp)) 7651 stream_runservice(stp); 7652 mutex_enter(&stp->sd_lock); 7653 } 7654 7655 /* 7656 * stream head cannot change while we make the determination 7657 * whether or not to send a signal. Drop the flag to allow strrput 7658 * to send firstmsgsigs again. 7659 */ 7660 stp->sd_flag &= ~STRGETINPROG; 7661 7662 /* 7663 * If the type of message at the front of the queue changed 7664 * due to the receive the appropriate signals and pollwakeup events 7665 * are generated. The type of changes are: 7666 * Processed a hipri message, q_first is not hipri. 7667 * Processed a band X message, and q_first is band Y. 7668 * The generated signals and pollwakeups are identical to what 7669 * strrput() generates should the message that is now on q_first 7670 * arrive to an empty read queue. 7671 * 7672 * Note: only strrput will send a signal for a hipri message. 7673 */ 7674 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7675 strsigset_t signals = 0; 7676 strpollset_t pollwakeups = 0; 7677 7678 if (flg & MSG_HIPRI) { 7679 /* 7680 * Removed a hipri message. Regular data at 7681 * the front of the queue. 7682 */ 7683 if (bp->b_band == 0) { 7684 signals = S_INPUT | S_RDNORM; 7685 pollwakeups = POLLIN | POLLRDNORM; 7686 } else { 7687 signals = S_INPUT | S_RDBAND; 7688 pollwakeups = POLLIN | POLLRDBAND; 7689 } 7690 } else if (pri != bp->b_band) { 7691 /* 7692 * The band is different for the new q_first. 7693 */ 7694 if (bp->b_band == 0) { 7695 signals = S_RDNORM; 7696 pollwakeups = POLLIN | POLLRDNORM; 7697 } else { 7698 signals = S_RDBAND; 7699 pollwakeups = POLLIN | POLLRDBAND; 7700 } 7701 } 7702 7703 if (pollwakeups != 0) { 7704 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7705 if (!(stp->sd_rput_opt & SR_POLLIN)) 7706 goto no_pollwake; 7707 stp->sd_rput_opt &= ~SR_POLLIN; 7708 } 7709 mutex_exit(&stp->sd_lock); 7710 pollwakeup(&stp->sd_pollist, pollwakeups); 7711 mutex_enter(&stp->sd_lock); 7712 } 7713 no_pollwake: 7714 7715 if (stp->sd_sigflags & signals) 7716 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7717 } 7718 mutex_exit(&stp->sd_lock); 7719 7720 rvp->r_val1 = more; 7721 return (error); 7722 #undef _LASTMARK 7723 } 7724 7725 /* 7726 * Put a message downstream. 7727 * 7728 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7729 */ 7730 int 7731 strputmsg( 7732 struct vnode *vp, 7733 struct strbuf *mctl, 7734 struct strbuf *mdata, 7735 unsigned char pri, 7736 int flag, 7737 int fmode) 7738 { 7739 struct stdata *stp; 7740 queue_t *wqp; 7741 mblk_t *mp; 7742 ssize_t msgsize; 7743 ssize_t rmin, rmax; 7744 int error; 7745 struct uio uios; 7746 struct uio *uiop = &uios; 7747 struct iovec iovs; 7748 int xpg4 = 0; 7749 7750 ASSERT(vp->v_stream); 7751 stp = vp->v_stream; 7752 wqp = stp->sd_wrq; 7753 7754 /* 7755 * If it is an XPG4 application, we need to send 7756 * SIGPIPE below 7757 */ 7758 7759 xpg4 = (flag & MSG_XPG4) ? 1 : 0; 7760 flag &= ~MSG_XPG4; 7761 7762 if (AU_AUDITING()) 7763 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode); 7764 7765 mutex_enter(&stp->sd_lock); 7766 7767 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7768 mutex_exit(&stp->sd_lock); 7769 return (error); 7770 } 7771 7772 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7773 error = strwriteable(stp, B_FALSE, xpg4); 7774 if (error != 0) { 7775 mutex_exit(&stp->sd_lock); 7776 return (error); 7777 } 7778 } 7779 7780 mutex_exit(&stp->sd_lock); 7781 7782 /* 7783 * Check for legal flag value. 7784 */ 7785 switch (flag) { 7786 case MSG_HIPRI: 7787 if ((mctl->len < 0) || (pri != 0)) 7788 return (EINVAL); 7789 break; 7790 case MSG_BAND: 7791 break; 7792 7793 default: 7794 return (EINVAL); 7795 } 7796 7797 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN, 7798 "strputmsg in:stp %p", stp); 7799 7800 /* get these values from those cached in the stream head */ 7801 rmin = stp->sd_qn_minpsz; 7802 rmax = stp->sd_qn_maxpsz; 7803 7804 /* 7805 * Make sure ctl and data sizes together fall within the 7806 * limits of the max and min receive packet sizes and do 7807 * not exceed system limit. 7808 */ 7809 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 7810 if (rmax == 0) { 7811 return (ERANGE); 7812 } 7813 /* 7814 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 7815 * Needed to prevent partial failures in the strmakedata loop. 7816 */ 7817 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 7818 rmax = stp->sd_maxblk; 7819 7820 if ((msgsize = mdata->len) < 0) { 7821 msgsize = 0; 7822 rmin = 0; /* no range check for NULL data part */ 7823 } 7824 if ((msgsize < rmin) || 7825 ((msgsize > rmax) && (rmax != INFPSZ)) || 7826 (mctl->len > strctlsz)) { 7827 return (ERANGE); 7828 } 7829 7830 /* 7831 * Setup uio and iov for data part 7832 */ 7833 iovs.iov_base = mdata->buf; 7834 iovs.iov_len = msgsize; 7835 uios.uio_iov = &iovs; 7836 uios.uio_iovcnt = 1; 7837 uios.uio_loffset = 0; 7838 uios.uio_segflg = UIO_USERSPACE; 7839 uios.uio_fmode = fmode; 7840 uios.uio_extflg = UIO_COPY_DEFAULT; 7841 uios.uio_resid = msgsize; 7842 uios.uio_offset = 0; 7843 7844 /* Ignore flow control in strput for HIPRI */ 7845 if (flag & MSG_HIPRI) 7846 flag |= MSG_IGNFLOW; 7847 7848 for (;;) { 7849 int done = 0; 7850 7851 /* 7852 * strput will always free the ctl mblk - even when strput 7853 * fails. 7854 */ 7855 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) { 7856 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7857 "strputmsg out:stp %p out %d error %d", 7858 stp, 1, error); 7859 return (error); 7860 } 7861 /* 7862 * Verify that the whole message can be transferred by 7863 * strput. 7864 */ 7865 ASSERT(stp->sd_maxblk == INFPSZ || 7866 stp->sd_maxblk >= mdata->len); 7867 7868 msgsize = mdata->len; 7869 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 7870 mdata->len = msgsize; 7871 7872 if (error == 0) 7873 break; 7874 7875 if (error != EWOULDBLOCK) 7876 goto out; 7877 7878 mutex_enter(&stp->sd_lock); 7879 /* 7880 * Check for a missed wakeup. 7881 * Needed since strput did not hold sd_lock across 7882 * the canputnext. 7883 */ 7884 if (bcanputnext(wqp, pri)) { 7885 /* Try again */ 7886 mutex_exit(&stp->sd_lock); 7887 continue; 7888 } 7889 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT, 7890 "strputmsg wait:stp %p waits pri %d", stp, pri); 7891 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1, 7892 &done)) != 0) || done) { 7893 mutex_exit(&stp->sd_lock); 7894 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7895 "strputmsg out:q %p out %d error %d", 7896 stp, 0, error); 7897 return (error); 7898 } 7899 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE, 7900 "strputmsg wake:stp %p wakes", stp); 7901 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7902 mutex_exit(&stp->sd_lock); 7903 return (error); 7904 } 7905 mutex_exit(&stp->sd_lock); 7906 } 7907 out: 7908 /* 7909 * For historic reasons, applications expect EAGAIN 7910 * when data mblk could not be allocated. so change 7911 * ENOMEM back to EAGAIN 7912 */ 7913 if (error == ENOMEM) 7914 error = EAGAIN; 7915 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7916 "strputmsg out:stp %p out %d error %d", stp, 2, error); 7917 return (error); 7918 } 7919 7920 /* 7921 * Put a message downstream. 7922 * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop. 7923 * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio 7924 * and the fmode parameter. 7925 * 7926 * This routine handles the consolidation private flags: 7927 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7928 * MSG_HOLDSIG Hold signals while waiting for data. 7929 * MSG_IGNFLOW Don't check streams flow control. 7930 * 7931 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7932 */ 7933 int 7934 kstrputmsg( 7935 struct vnode *vp, 7936 mblk_t *mctl, 7937 struct uio *uiop, 7938 ssize_t msgsize, 7939 unsigned char pri, 7940 int flag, 7941 int fmode) 7942 { 7943 struct stdata *stp; 7944 queue_t *wqp; 7945 ssize_t rmin, rmax; 7946 int error; 7947 7948 ASSERT(vp->v_stream); 7949 stp = vp->v_stream; 7950 wqp = stp->sd_wrq; 7951 if (AU_AUDITING()) 7952 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode); 7953 if (mctl == NULL) 7954 return (EINVAL); 7955 7956 mutex_enter(&stp->sd_lock); 7957 7958 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7959 mutex_exit(&stp->sd_lock); 7960 freemsg(mctl); 7961 return (error); 7962 } 7963 7964 if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) { 7965 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7966 error = strwriteable(stp, B_FALSE, B_TRUE); 7967 if (error != 0) { 7968 mutex_exit(&stp->sd_lock); 7969 freemsg(mctl); 7970 return (error); 7971 } 7972 } 7973 } 7974 7975 mutex_exit(&stp->sd_lock); 7976 7977 /* 7978 * Check for legal flag value. 7979 */ 7980 switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) { 7981 case MSG_HIPRI: 7982 if (pri != 0) { 7983 freemsg(mctl); 7984 return (EINVAL); 7985 } 7986 break; 7987 case MSG_BAND: 7988 break; 7989 default: 7990 freemsg(mctl); 7991 return (EINVAL); 7992 } 7993 7994 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN, 7995 "kstrputmsg in:stp %p", stp); 7996 7997 /* get these values from those cached in the stream head */ 7998 rmin = stp->sd_qn_minpsz; 7999 rmax = stp->sd_qn_maxpsz; 8000 8001 /* 8002 * Make sure ctl and data sizes together fall within the 8003 * limits of the max and min receive packet sizes and do 8004 * not exceed system limit. 8005 */ 8006 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 8007 if (rmax == 0) { 8008 freemsg(mctl); 8009 return (ERANGE); 8010 } 8011 /* 8012 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 8013 * Needed to prevent partial failures in the strmakedata loop. 8014 */ 8015 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 8016 rmax = stp->sd_maxblk; 8017 8018 if (uiop == NULL) { 8019 msgsize = -1; 8020 rmin = -1; /* no range check for NULL data part */ 8021 } else { 8022 /* Use uio flags as well as the fmode parameter flags */ 8023 fmode |= uiop->uio_fmode; 8024 8025 if ((msgsize < rmin) || 8026 ((msgsize > rmax) && (rmax != INFPSZ))) { 8027 freemsg(mctl); 8028 return (ERANGE); 8029 } 8030 } 8031 8032 /* Ignore flow control in strput for HIPRI */ 8033 if (flag & MSG_HIPRI) 8034 flag |= MSG_IGNFLOW; 8035 8036 for (;;) { 8037 int done = 0; 8038 int waitflag; 8039 mblk_t *mp; 8040 8041 /* 8042 * strput will always free the ctl mblk - even when strput 8043 * fails. If MSG_IGNFLOW is set then any error returned 8044 * will cause us to break the loop, so we don't need a copy 8045 * of the message. If MSG_IGNFLOW is not set, then we can 8046 * get hit by flow control and be forced to try again. In 8047 * this case we need to have a copy of the message. We 8048 * do this using copymsg since the message may get modified 8049 * by something below us. 8050 * 8051 * We've observed that many TPI providers do not check db_ref 8052 * on the control messages but blindly reuse them for the 8053 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more 8054 * friendly to such providers than using dupmsg. Also, note 8055 * that sockfs uses MSG_IGNFLOW for all TPI control messages. 8056 * Only data messages are subject to flow control, hence 8057 * subject to this copymsg. 8058 */ 8059 if (flag & MSG_IGNFLOW) { 8060 mp = mctl; 8061 mctl = NULL; 8062 } else { 8063 do { 8064 /* 8065 * If a message has a free pointer, the message 8066 * must be dupmsg to maintain this pointer. 8067 * Code using this facility must be sure 8068 * that modules below will not change the 8069 * contents of the dblk without checking db_ref 8070 * first. If db_ref is > 1, then the module 8071 * needs to do a copymsg first. Otherwise, 8072 * the contents of the dblk may become 8073 * inconsistent because the freesmg/freeb below 8074 * may end up calling atomic_add_32_nv. 8075 * The atomic_add_32_nv in freeb (accessing 8076 * all of db_ref, db_type, db_flags, and 8077 * db_struioflag) does not prevent other threads 8078 * from concurrently trying to modify e.g. 8079 * db_type. 8080 */ 8081 if (mctl->b_datap->db_frtnp != NULL) 8082 mp = dupmsg(mctl); 8083 else 8084 mp = copymsg(mctl); 8085 8086 if (mp != NULL) 8087 break; 8088 8089 error = strwaitbuf(msgdsize(mctl), BPRI_MED); 8090 if (error) { 8091 freemsg(mctl); 8092 return (error); 8093 } 8094 } while (mp == NULL); 8095 } 8096 /* 8097 * Verify that all of msgsize can be transferred by 8098 * strput. 8099 */ 8100 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize); 8101 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 8102 if (error == 0) 8103 break; 8104 8105 if (error != EWOULDBLOCK) 8106 goto out; 8107 8108 /* 8109 * IF MSG_IGNFLOW is set we should have broken out of loop 8110 * above. 8111 */ 8112 ASSERT(!(flag & MSG_IGNFLOW)); 8113 mutex_enter(&stp->sd_lock); 8114 /* 8115 * Check for a missed wakeup. 8116 * Needed since strput did not hold sd_lock across 8117 * the canputnext. 8118 */ 8119 if (bcanputnext(wqp, pri)) { 8120 /* Try again */ 8121 mutex_exit(&stp->sd_lock); 8122 continue; 8123 } 8124 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT, 8125 "kstrputmsg wait:stp %p waits pri %d", stp, pri); 8126 8127 waitflag = WRITEWAIT; 8128 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) { 8129 if (flag & MSG_HOLDSIG) 8130 waitflag |= STR_NOSIG; 8131 if (flag & MSG_IGNERROR) 8132 waitflag |= STR_NOERROR; 8133 } 8134 if (((error = strwaitq(stp, waitflag, 8135 (ssize_t)0, fmode, -1, &done)) != 0) || done) { 8136 mutex_exit(&stp->sd_lock); 8137 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8138 "kstrputmsg out:stp %p out %d error %d", 8139 stp, 0, error); 8140 freemsg(mctl); 8141 return (error); 8142 } 8143 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE, 8144 "kstrputmsg wake:stp %p wakes", stp); 8145 if ((error = i_straccess(stp, JCWRITE)) != 0) { 8146 mutex_exit(&stp->sd_lock); 8147 freemsg(mctl); 8148 return (error); 8149 } 8150 mutex_exit(&stp->sd_lock); 8151 } 8152 out: 8153 freemsg(mctl); 8154 /* 8155 * For historic reasons, applications expect EAGAIN 8156 * when data mblk could not be allocated. so change 8157 * ENOMEM back to EAGAIN 8158 */ 8159 if (error == ENOMEM) 8160 error = EAGAIN; 8161 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8162 "kstrputmsg out:stp %p out %d error %d", stp, 2, error); 8163 return (error); 8164 } 8165 8166 /* 8167 * Determines whether the necessary conditions are set on a stream 8168 * for it to be readable, writeable, or have exceptions. 8169 * 8170 * strpoll handles the consolidation private events: 8171 * POLLNOERR Do not return POLLERR even if there are stream 8172 * head errors. 8173 * Used by sockfs. 8174 * POLLRDDATA Do not return POLLIN unless at least one message on 8175 * the queue contains one or more M_DATA mblks. Thus 8176 * when this flag is set a queue with only 8177 * M_PROTO/M_PCPROTO mblks does not return POLLIN. 8178 * Used by sockfs to ignore T_EXDATA_IND messages. 8179 * 8180 * Note: POLLRDDATA assumes that synch streams only return messages with 8181 * an M_DATA attached (i.e. not messages consisting of only 8182 * an M_PROTO/M_PCPROTO part). 8183 */ 8184 int 8185 strpoll( 8186 struct stdata *stp, 8187 short events_arg, 8188 int anyyet, 8189 short *reventsp, 8190 struct pollhead **phpp) 8191 { 8192 int events = (ushort_t)events_arg; 8193 int retevents = 0; 8194 mblk_t *mp; 8195 qband_t *qbp; 8196 long sd_flags = stp->sd_flag; 8197 int headlocked = 0; 8198 8199 /* 8200 * For performance, a single 'if' tests for most possible edge 8201 * conditions in one shot 8202 */ 8203 if (sd_flags & (STPLEX | STRDERR | STWRERR)) { 8204 if (sd_flags & STPLEX) { 8205 *reventsp = POLLNVAL; 8206 return (EINVAL); 8207 } 8208 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) && 8209 (sd_flags & STRDERR)) || 8210 ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) && 8211 (sd_flags & STWRERR))) { 8212 if (!(events & POLLNOERR)) { 8213 *reventsp = POLLERR; 8214 return (0); 8215 } 8216 } 8217 } 8218 if (sd_flags & STRHUP) { 8219 retevents |= POLLHUP; 8220 } else if (events & (POLLWRNORM | POLLWRBAND)) { 8221 queue_t *tq; 8222 queue_t *qp = stp->sd_wrq; 8223 8224 claimstr(qp); 8225 /* Find next module forward that has a service procedure */ 8226 tq = qp->q_next->q_nfsrv; 8227 ASSERT(tq != NULL); 8228 8229 polllock(&stp->sd_pollist, QLOCK(tq)); 8230 if (events & POLLWRNORM) { 8231 queue_t *sqp; 8232 8233 if (tq->q_flag & QFULL) 8234 /* ensure backq svc procedure runs */ 8235 tq->q_flag |= QWANTW; 8236 else if ((sqp = stp->sd_struiowrq) != NULL) { 8237 /* Check sync stream barrier write q */ 8238 mutex_exit(QLOCK(tq)); 8239 polllock(&stp->sd_pollist, QLOCK(sqp)); 8240 if (sqp->q_flag & QFULL) 8241 /* ensure pollwakeup() is done */ 8242 sqp->q_flag |= QWANTWSYNC; 8243 else 8244 retevents |= POLLOUT; 8245 /* More write events to process ??? */ 8246 if (! (events & POLLWRBAND)) { 8247 mutex_exit(QLOCK(sqp)); 8248 releasestr(qp); 8249 goto chkrd; 8250 } 8251 mutex_exit(QLOCK(sqp)); 8252 polllock(&stp->sd_pollist, QLOCK(tq)); 8253 } else 8254 retevents |= POLLOUT; 8255 } 8256 if (events & POLLWRBAND) { 8257 qbp = tq->q_bandp; 8258 if (qbp) { 8259 while (qbp) { 8260 if (qbp->qb_flag & QB_FULL) 8261 qbp->qb_flag |= QB_WANTW; 8262 else 8263 retevents |= POLLWRBAND; 8264 qbp = qbp->qb_next; 8265 } 8266 } else { 8267 retevents |= POLLWRBAND; 8268 } 8269 } 8270 mutex_exit(QLOCK(tq)); 8271 releasestr(qp); 8272 } 8273 chkrd: 8274 if (sd_flags & STRPRI) { 8275 retevents |= (events & POLLPRI); 8276 } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) { 8277 queue_t *qp = _RD(stp->sd_wrq); 8278 int normevents = (events & (POLLIN | POLLRDNORM)); 8279 8280 /* 8281 * Note: Need to do polllock() here since ps_lock may be 8282 * held. See bug 4191544. 8283 */ 8284 polllock(&stp->sd_pollist, &stp->sd_lock); 8285 headlocked = 1; 8286 mp = qp->q_first; 8287 while (mp) { 8288 /* 8289 * For POLLRDDATA we scan b_cont and b_next until we 8290 * find an M_DATA. 8291 */ 8292 if ((events & POLLRDDATA) && 8293 mp->b_datap->db_type != M_DATA) { 8294 mblk_t *nmp = mp->b_cont; 8295 8296 while (nmp != NULL && 8297 nmp->b_datap->db_type != M_DATA) 8298 nmp = nmp->b_cont; 8299 if (nmp == NULL) { 8300 mp = mp->b_next; 8301 continue; 8302 } 8303 } 8304 if (mp->b_band == 0) 8305 retevents |= normevents; 8306 else 8307 retevents |= (events & (POLLIN | POLLRDBAND)); 8308 break; 8309 } 8310 if (! (retevents & normevents) && 8311 (stp->sd_wakeq & RSLEEP)) { 8312 /* 8313 * Sync stream barrier read queue has data. 8314 */ 8315 retevents |= normevents; 8316 } 8317 /* Treat eof as normal data */ 8318 if (sd_flags & STREOF) 8319 retevents |= normevents; 8320 } 8321 8322 *reventsp = (short)retevents; 8323 if (retevents) { 8324 if (headlocked) 8325 mutex_exit(&stp->sd_lock); 8326 return (0); 8327 } 8328 8329 /* 8330 * If poll() has not found any events yet, set up event cell 8331 * to wake up the poll if a requested event occurs on this 8332 * stream. Check for collisions with outstanding poll requests. 8333 */ 8334 if (!anyyet) { 8335 *phpp = &stp->sd_pollist; 8336 if (headlocked == 0) { 8337 polllock(&stp->sd_pollist, &stp->sd_lock); 8338 headlocked = 1; 8339 } 8340 stp->sd_rput_opt |= SR_POLLIN; 8341 } 8342 if (headlocked) 8343 mutex_exit(&stp->sd_lock); 8344 return (0); 8345 } 8346 8347 /* 8348 * The purpose of putback() is to assure sleeping polls/reads 8349 * are awakened when there are no new messages arriving at the, 8350 * stream head, and a message is placed back on the read queue. 8351 * 8352 * sd_lock must be held when messages are placed back on stream 8353 * head. (getq() holds sd_lock when it removes messages from 8354 * the queue) 8355 */ 8356 8357 static void 8358 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band) 8359 { 8360 mblk_t *qfirst; 8361 ASSERT(MUTEX_HELD(&stp->sd_lock)); 8362 8363 /* 8364 * As a result of lock-step ordering around q_lock and sd_lock, 8365 * it's possible for function calls like putnext() and 8366 * canputnext() to get an inaccurate picture of how much 8367 * data is really being processed at the stream head. 8368 * We only consolidate with existing messages on the queue 8369 * if the length of the message we want to put back is smaller 8370 * than the queue hiwater mark. 8371 */ 8372 if ((stp->sd_rput_opt & SR_CONSOL_DATA) && 8373 (DB_TYPE(bp) == M_DATA) && ((qfirst = q->q_first) != NULL) && 8374 (DB_TYPE(qfirst) == M_DATA) && 8375 ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0) && 8376 ((bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT)) == 0) && 8377 (mp_cont_len(bp, NULL) < q->q_hiwat)) { 8378 /* 8379 * We use the same logic as defined in strrput() 8380 * but in reverse as we are putting back onto the 8381 * queue and want to retain byte ordering. 8382 * Consolidate M_DATA messages with M_DATA ONLY. 8383 * strrput() allows the consolidation of M_DATA onto 8384 * M_PROTO | M_PCPROTO but not the other way round. 8385 * 8386 * The consolidation does not take place if the message 8387 * we are returning to the queue is marked with either 8388 * of the marks or the delim flag or if q_first 8389 * is marked with MSGMARK. The MSGMARK check is needed to 8390 * handle the odd semantics of MSGMARK where essentially 8391 * the whole message is to be treated as marked. 8392 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first 8393 * to the front of the b_cont chain. 8394 */ 8395 rmvq_noenab(q, qfirst); 8396 8397 /* 8398 * The first message in the b_cont list 8399 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 8400 * We need to handle the case where we 8401 * are appending: 8402 * 8403 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 8404 * 2) a MSGMARKNEXT to a plain message. 8405 * 3) a MSGNOTMARKNEXT to a plain message 8406 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 8407 * message. 8408 * 8409 * Thus we never append a MSGMARKNEXT or 8410 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 8411 */ 8412 if (qfirst->b_flag & MSGMARKNEXT) { 8413 bp->b_flag |= MSGMARKNEXT; 8414 bp->b_flag &= ~MSGNOTMARKNEXT; 8415 qfirst->b_flag &= ~MSGMARKNEXT; 8416 } else if (qfirst->b_flag & MSGNOTMARKNEXT) { 8417 bp->b_flag |= MSGNOTMARKNEXT; 8418 qfirst->b_flag &= ~MSGNOTMARKNEXT; 8419 } 8420 8421 linkb(bp, qfirst); 8422 } 8423 (void) putbq(q, bp); 8424 8425 /* 8426 * A message may have come in when the sd_lock was dropped in the 8427 * calling routine. If this is the case and STR*ATMARK info was 8428 * received, need to move that from the stream head to the q_last 8429 * so that SIOCATMARK can return the proper value. 8430 */ 8431 if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) { 8432 unsigned short *flagp = &q->q_last->b_flag; 8433 uint_t b_flag = (uint_t)*flagp; 8434 8435 if (stp->sd_flag & STRATMARK) { 8436 b_flag &= ~MSGNOTMARKNEXT; 8437 b_flag |= MSGMARKNEXT; 8438 stp->sd_flag &= ~STRATMARK; 8439 } else { 8440 b_flag &= ~MSGMARKNEXT; 8441 b_flag |= MSGNOTMARKNEXT; 8442 stp->sd_flag &= ~STRNOTATMARK; 8443 } 8444 *flagp = (unsigned short) b_flag; 8445 } 8446 8447 #ifdef DEBUG 8448 /* 8449 * Make sure that the flags are not messed up. 8450 */ 8451 { 8452 mblk_t *mp; 8453 mp = q->q_last; 8454 while (mp != NULL) { 8455 ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 8456 (MSGMARKNEXT|MSGNOTMARKNEXT)); 8457 mp = mp->b_cont; 8458 } 8459 } 8460 #endif 8461 if (q->q_first == bp) { 8462 short pollevents; 8463 8464 if (stp->sd_flag & RSLEEP) { 8465 stp->sd_flag &= ~RSLEEP; 8466 cv_broadcast(&q->q_wait); 8467 } 8468 if (stp->sd_flag & STRPRI) { 8469 pollevents = POLLPRI; 8470 } else { 8471 if (band == 0) { 8472 if (!(stp->sd_rput_opt & SR_POLLIN)) 8473 return; 8474 stp->sd_rput_opt &= ~SR_POLLIN; 8475 pollevents = POLLIN | POLLRDNORM; 8476 } else { 8477 pollevents = POLLIN | POLLRDBAND; 8478 } 8479 } 8480 mutex_exit(&stp->sd_lock); 8481 pollwakeup(&stp->sd_pollist, pollevents); 8482 mutex_enter(&stp->sd_lock); 8483 } 8484 } 8485 8486 /* 8487 * Return the held vnode attached to the stream head of a 8488 * given queue 8489 * It is the responsibility of the calling routine to ensure 8490 * that the queue does not go away (e.g. pop). 8491 */ 8492 vnode_t * 8493 strq2vp(queue_t *qp) 8494 { 8495 vnode_t *vp; 8496 vp = STREAM(qp)->sd_vnode; 8497 ASSERT(vp != NULL); 8498 VN_HOLD(vp); 8499 return (vp); 8500 } 8501 8502 /* 8503 * return the stream head write queue for the given vp 8504 * It is the responsibility of the calling routine to ensure 8505 * that the stream or vnode do not close. 8506 */ 8507 queue_t * 8508 strvp2wq(vnode_t *vp) 8509 { 8510 ASSERT(vp->v_stream != NULL); 8511 return (vp->v_stream->sd_wrq); 8512 } 8513 8514 /* 8515 * pollwakeup stream head 8516 * It is the responsibility of the calling routine to ensure 8517 * that the stream or vnode do not close. 8518 */ 8519 void 8520 strpollwakeup(vnode_t *vp, short event) 8521 { 8522 ASSERT(vp->v_stream); 8523 pollwakeup(&vp->v_stream->sd_pollist, event); 8524 } 8525 8526 /* 8527 * Mate the stream heads of two vnodes together. If the two vnodes are the 8528 * same, we just make the write-side point at the read-side -- otherwise, 8529 * we do a full mate. Only works on vnodes associated with streams that are 8530 * still being built and thus have only a stream head. 8531 */ 8532 void 8533 strmate(vnode_t *vp1, vnode_t *vp2) 8534 { 8535 queue_t *wrq1 = strvp2wq(vp1); 8536 queue_t *wrq2 = strvp2wq(vp2); 8537 8538 /* 8539 * Verify that there are no modules on the stream yet. We also 8540 * rely on the stream head always having a service procedure to 8541 * avoid tweaking q_nfsrv. 8542 */ 8543 ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL); 8544 ASSERT(wrq1->q_qinfo->qi_srvp != NULL); 8545 ASSERT(wrq2->q_qinfo->qi_srvp != NULL); 8546 8547 /* 8548 * If the queues are the same, just twist; otherwise do a full mate. 8549 */ 8550 if (wrq1 == wrq2) { 8551 wrq1->q_next = _RD(wrq1); 8552 } else { 8553 wrq1->q_next = _RD(wrq2); 8554 wrq2->q_next = _RD(wrq1); 8555 STREAM(wrq1)->sd_mate = STREAM(wrq2); 8556 STREAM(wrq1)->sd_flag |= STRMATE; 8557 STREAM(wrq2)->sd_mate = STREAM(wrq1); 8558 STREAM(wrq2)->sd_flag |= STRMATE; 8559 } 8560 } 8561 8562 /* 8563 * XXX will go away when console is correctly fixed. 8564 * Clean up the console PIDS, from previous I_SETSIG, 8565 * called only for cnopen which never calls strclean(). 8566 */ 8567 void 8568 str_cn_clean(struct vnode *vp) 8569 { 8570 strsig_t *ssp, *pssp, *tssp; 8571 struct stdata *stp; 8572 struct pid *pidp; 8573 int update = 0; 8574 8575 ASSERT(vp->v_stream); 8576 stp = vp->v_stream; 8577 pssp = NULL; 8578 mutex_enter(&stp->sd_lock); 8579 ssp = stp->sd_siglist; 8580 while (ssp) { 8581 mutex_enter(&pidlock); 8582 pidp = ssp->ss_pidp; 8583 /* 8584 * Get rid of PID if the proc is gone. 8585 */ 8586 if (pidp->pid_prinactive) { 8587 tssp = ssp->ss_next; 8588 if (pssp) 8589 pssp->ss_next = tssp; 8590 else 8591 stp->sd_siglist = tssp; 8592 ASSERT(pidp->pid_ref <= 1); 8593 PID_RELE(ssp->ss_pidp); 8594 mutex_exit(&pidlock); 8595 kmem_free(ssp, sizeof (strsig_t)); 8596 update = 1; 8597 ssp = tssp; 8598 continue; 8599 } else 8600 mutex_exit(&pidlock); 8601 pssp = ssp; 8602 ssp = ssp->ss_next; 8603 } 8604 if (update) { 8605 stp->sd_sigflags = 0; 8606 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 8607 stp->sd_sigflags |= ssp->ss_events; 8608 } 8609 mutex_exit(&stp->sd_lock); 8610 } 8611 8612 /* 8613 * Return B_TRUE if there is data in the message, B_FALSE otherwise. 8614 */ 8615 static boolean_t 8616 msghasdata(mblk_t *bp) 8617 { 8618 for (; bp; bp = bp->b_cont) 8619 if (bp->b_datap->db_type == M_DATA) { 8620 ASSERT(bp->b_wptr >= bp->b_rptr); 8621 if (bp->b_wptr > bp->b_rptr) 8622 return (B_TRUE); 8623 } 8624 return (B_FALSE); 8625 } 8626