1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/sysmacros.h> 31 #include <sys/param.h> 32 #include <sys/errno.h> 33 #include <sys/signal.h> 34 #include <sys/stat.h> 35 #include <sys/proc.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/vnode.h> 39 #include <sys/file.h> 40 #include <sys/stream.h> 41 #include <sys/strsubr.h> 42 #include <sys/stropts.h> 43 #include <sys/tihdr.h> 44 #include <sys/var.h> 45 #include <sys/poll.h> 46 #include <sys/termio.h> 47 #include <sys/ttold.h> 48 #include <sys/systm.h> 49 #include <sys/uio.h> 50 #include <sys/cmn_err.h> 51 #include <sys/sad.h> 52 #include <sys/netstack.h> 53 #include <sys/priocntl.h> 54 #include <sys/jioctl.h> 55 #include <sys/procset.h> 56 #include <sys/session.h> 57 #include <sys/kmem.h> 58 #include <sys/filio.h> 59 #include <sys/vtrace.h> 60 #include <sys/debug.h> 61 #include <sys/strredir.h> 62 #include <sys/fs/fifonode.h> 63 #include <sys/fs/snode.h> 64 #include <sys/strlog.h> 65 #include <sys/strsun.h> 66 #include <sys/project.h> 67 #include <sys/kbio.h> 68 #include <sys/msio.h> 69 #include <sys/tty.h> 70 #include <sys/ptyvar.h> 71 #include <sys/vuid_event.h> 72 #include <sys/modctl.h> 73 #include <sys/sunddi.h> 74 #include <sys/sunldi_impl.h> 75 #include <sys/autoconf.h> 76 #include <sys/policy.h> 77 #include <sys/dld.h> 78 #include <sys/zone.h> 79 #include <c2/audit.h> 80 81 /* 82 * This define helps improve the readability of streams code while 83 * still maintaining a very old streams performance enhancement. The 84 * performance enhancement basically involved having all callers 85 * of straccess() perform the first check that straccess() will do 86 * locally before actually calling straccess(). (There by reducing 87 * the number of unnecessary calls to straccess().) 88 */ 89 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \ 90 (stp->sd_vnode->v_type == VFIFO) ? 0 : \ 91 straccess((x), (y))) 92 93 /* 94 * what is mblk_pull_len? 95 * 96 * If a streams message consists of many short messages, 97 * a performance degradation occurs from copyout overhead. 98 * To decrease the per mblk overhead, messages that are 99 * likely to consist of many small mblks are pulled up into 100 * one continuous chunk of memory. 101 * 102 * To avoid the processing overhead of examining every 103 * mblk, a quick heuristic is used. If the first mblk in 104 * the message is shorter than mblk_pull_len, it is likely 105 * that the rest of the mblk will be short. 106 * 107 * This heuristic was decided upon after performance tests 108 * indicated that anything more complex slowed down the main 109 * code path. 110 */ 111 #define MBLK_PULL_LEN 64 112 uint32_t mblk_pull_len = MBLK_PULL_LEN; 113 114 /* 115 * The sgttyb_handling flag controls the handling of the old BSD 116 * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows: 117 * 118 * 0 - Emit no warnings at all and retain old, broken behavior. 119 * 1 - Emit no warnings and silently handle new semantics. 120 * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used 121 * (once per system invocation). Handle with new semantics. 122 * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is 123 * made (so that offenders drop core and are easy to debug). 124 * 125 * The "new semantics" are that TIOCGETP returns B38400 for 126 * sg_[io]speed if the corresponding value is over B38400, and that 127 * TIOCSET[PN] accept B38400 in these cases to mean "retain current 128 * bit rate." 129 */ 130 int sgttyb_handling = 1; 131 static boolean_t sgttyb_complaint; 132 133 /* don't push drcompat module by default on Style-2 streams */ 134 static int push_drcompat = 0; 135 136 /* 137 * id value used to distinguish between different ioctl messages 138 */ 139 static uint32_t ioc_id; 140 141 static void putback(struct stdata *, queue_t *, mblk_t *, int); 142 static void strcleanall(struct vnode *); 143 static int strwsrv(queue_t *); 144 static int strdocmd(struct stdata *, struct strcmd *, cred_t *); 145 146 /* 147 * qinit and module_info structures for stream head read and write queues 148 */ 149 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW }; 150 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 }; 151 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info }; 152 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info }; 153 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT, 154 FIFOLOWAT }; 155 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 }; 156 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info }; 157 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info }; 158 159 extern kmutex_t strresources; /* protects global resources */ 160 extern kmutex_t muxifier; /* single-threads multiplexor creation */ 161 162 static boolean_t msghasdata(mblk_t *bp); 163 #define msgnodata(bp) (!msghasdata(bp)) 164 165 /* 166 * Stream head locking notes: 167 * There are four monitors associated with the stream head: 168 * 1. v_stream monitor: in stropen() and strclose() v_lock 169 * is held while the association of vnode and stream 170 * head is established or tested for. 171 * 2. open/close/push/pop monitor: sd_lock is held while each 172 * thread bids for exclusive access to this monitor 173 * for opening or closing a stream. In addition, this 174 * monitor is entered during pushes and pops. This 175 * guarantees that during plumbing operations there 176 * is only one thread trying to change the plumbing. 177 * Any other threads present in the stream are only 178 * using the plumbing. 179 * 3. read/write monitor: in the case of read, a thread holds 180 * sd_lock while trying to get data from the stream 181 * head queue. if there is none to fulfill a read 182 * request, it sets RSLEEP and calls cv_wait_sig() down 183 * in strwaitq() to await the arrival of new data. 184 * when new data arrives in strrput(), sd_lock is acquired 185 * before testing for RSLEEP and calling cv_broadcast(). 186 * the behavior of strwrite(), strwsrv(), and WSLEEP 187 * mirror this. 188 * 4. ioctl monitor: sd_lock is gotten to ensure that only one 189 * thread is doing an ioctl at a time. 190 */ 191 192 static int 193 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name, 194 int anchor, cred_t *crp, uint_t anchor_zoneid) 195 { 196 int error; 197 fmodsw_impl_t *fp; 198 199 if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) { 200 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO; 201 return (error); 202 } 203 if (stp->sd_pushcnt >= nstrpush) { 204 return (EINVAL); 205 } 206 207 if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) { 208 stp->sd_flag |= STREOPENFAIL; 209 return (EINVAL); 210 } 211 212 /* 213 * push new module and call its open routine via qattach 214 */ 215 if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0) 216 return (error); 217 218 /* 219 * Check to see if caller wants a STREAMS anchor 220 * put at this place in the stream, and add if so. 221 */ 222 mutex_enter(&stp->sd_lock); 223 if (anchor == stp->sd_pushcnt) { 224 stp->sd_anchor = stp->sd_pushcnt; 225 stp->sd_anchorzone = anchor_zoneid; 226 } 227 mutex_exit(&stp->sd_lock); 228 229 return (0); 230 } 231 232 /* 233 * Open a stream device. 234 */ 235 int 236 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp) 237 { 238 struct stdata *stp; 239 queue_t *qp; 240 int s; 241 dev_t dummydev, savedev; 242 struct autopush *ap; 243 struct dlautopush dlap; 244 int error = 0; 245 ssize_t rmin, rmax; 246 int cloneopen; 247 queue_t *brq; 248 major_t major; 249 str_stack_t *ss; 250 zoneid_t zoneid; 251 uint_t anchor; 252 253 if (AU_AUDITING()) 254 audit_stropen(vp, devp, flag, crp); 255 256 /* 257 * If the stream already exists, wait for any open in progress 258 * to complete, then call the open function of each module and 259 * driver in the stream. Otherwise create the stream. 260 */ 261 TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp); 262 retry: 263 mutex_enter(&vp->v_lock); 264 if ((stp = vp->v_stream) != NULL) { 265 266 /* 267 * Waiting for stream to be created to device 268 * due to another open. 269 */ 270 mutex_exit(&vp->v_lock); 271 272 if (STRMATED(stp)) { 273 struct stdata *strmatep = stp->sd_mate; 274 275 STRLOCKMATES(stp); 276 if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 277 if (flag & (FNDELAY|FNONBLOCK)) { 278 error = EAGAIN; 279 mutex_exit(&strmatep->sd_lock); 280 goto ckreturn; 281 } 282 mutex_exit(&stp->sd_lock); 283 if (!cv_wait_sig(&strmatep->sd_monitor, 284 &strmatep->sd_lock)) { 285 error = EINTR; 286 mutex_exit(&strmatep->sd_lock); 287 mutex_enter(&stp->sd_lock); 288 goto ckreturn; 289 } 290 mutex_exit(&strmatep->sd_lock); 291 goto retry; 292 } 293 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 294 if (flag & (FNDELAY|FNONBLOCK)) { 295 error = EAGAIN; 296 mutex_exit(&strmatep->sd_lock); 297 goto ckreturn; 298 } 299 mutex_exit(&strmatep->sd_lock); 300 if (!cv_wait_sig(&stp->sd_monitor, 301 &stp->sd_lock)) { 302 error = EINTR; 303 goto ckreturn; 304 } 305 mutex_exit(&stp->sd_lock); 306 goto retry; 307 } 308 309 if (stp->sd_flag & (STRDERR|STWRERR)) { 310 error = EIO; 311 mutex_exit(&strmatep->sd_lock); 312 goto ckreturn; 313 } 314 315 stp->sd_flag |= STWOPEN; 316 STRUNLOCKMATES(stp); 317 } else { 318 mutex_enter(&stp->sd_lock); 319 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 320 if (flag & (FNDELAY|FNONBLOCK)) { 321 error = EAGAIN; 322 goto ckreturn; 323 } 324 if (!cv_wait_sig(&stp->sd_monitor, 325 &stp->sd_lock)) { 326 error = EINTR; 327 goto ckreturn; 328 } 329 mutex_exit(&stp->sd_lock); 330 goto retry; /* could be clone! */ 331 } 332 333 if (stp->sd_flag & (STRDERR|STWRERR)) { 334 error = EIO; 335 goto ckreturn; 336 } 337 338 stp->sd_flag |= STWOPEN; 339 mutex_exit(&stp->sd_lock); 340 } 341 342 /* 343 * Open all modules and devices down stream to notify 344 * that another user is streaming. For modules, set the 345 * last argument to MODOPEN and do not pass any open flags. 346 * Ignore dummydev since this is not the first open. 347 */ 348 claimstr(stp->sd_wrq); 349 qp = stp->sd_wrq; 350 while (_SAMESTR(qp)) { 351 qp = qp->q_next; 352 if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0) 353 break; 354 } 355 releasestr(stp->sd_wrq); 356 mutex_enter(&stp->sd_lock); 357 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR); 358 stp->sd_rerror = 0; 359 stp->sd_werror = 0; 360 ckreturn: 361 cv_broadcast(&stp->sd_monitor); 362 mutex_exit(&stp->sd_lock); 363 return (error); 364 } 365 366 /* 367 * This vnode isn't streaming. SPECFS already 368 * checked for multiple vnodes pointing to the 369 * same stream, so create a stream to the driver. 370 */ 371 qp = allocq(); 372 stp = shalloc(qp); 373 374 /* 375 * Initialize stream head. shalloc() has given us 376 * exclusive access, and we have the vnode locked; 377 * we can do whatever we want with stp. 378 */ 379 stp->sd_flag = STWOPEN; 380 stp->sd_siglist = NULL; 381 stp->sd_pollist.ph_list = NULL; 382 stp->sd_sigflags = 0; 383 stp->sd_mark = NULL; 384 stp->sd_closetime = STRTIMOUT; 385 stp->sd_sidp = NULL; 386 stp->sd_pgidp = NULL; 387 stp->sd_vnode = vp; 388 stp->sd_rerror = 0; 389 stp->sd_werror = 0; 390 stp->sd_wroff = 0; 391 stp->sd_tail = 0; 392 stp->sd_iocblk = NULL; 393 stp->sd_cmdblk = NULL; 394 stp->sd_pushcnt = 0; 395 stp->sd_qn_minpsz = 0; 396 stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */ 397 stp->sd_maxblk = INFPSZ; 398 qp->q_ptr = _WR(qp)->q_ptr = stp; 399 STREAM(qp) = STREAM(_WR(qp)) = stp; 400 vp->v_stream = stp; 401 mutex_exit(&vp->v_lock); 402 if (vp->v_type == VFIFO) { 403 stp->sd_flag |= OLDNDELAY; 404 /* 405 * This means, both for pipes and fifos 406 * strwrite will send SIGPIPE if the other 407 * end is closed. For putmsg it depends 408 * on whether it is a XPG4_2 application 409 * or not 410 */ 411 stp->sd_wput_opt = SW_SIGPIPE; 412 413 /* setq might sleep in kmem_alloc - avoid holding locks. */ 414 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE, 415 SQ_CI|SQ_CO, B_FALSE); 416 417 set_qend(qp); 418 stp->sd_strtab = fifo_getinfo(); 419 _WR(qp)->q_nfsrv = _WR(qp); 420 qp->q_nfsrv = qp; 421 /* 422 * Wake up others that are waiting for stream to be created. 423 */ 424 mutex_enter(&stp->sd_lock); 425 /* 426 * nothing is be pushed on stream yet, so 427 * optimized stream head packetsizes are just that 428 * of the read queue 429 */ 430 stp->sd_qn_minpsz = qp->q_minpsz; 431 stp->sd_qn_maxpsz = qp->q_maxpsz; 432 stp->sd_flag &= ~STWOPEN; 433 goto fifo_opendone; 434 } 435 /* setq might sleep in kmem_alloc - avoid holding locks. */ 436 setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE); 437 438 set_qend(qp); 439 440 /* 441 * Open driver and create stream to it (via qattach). 442 */ 443 savedev = *devp; 444 cloneopen = (getmajor(*devp) == clone_major); 445 if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) { 446 mutex_enter(&vp->v_lock); 447 vp->v_stream = NULL; 448 mutex_exit(&vp->v_lock); 449 mutex_enter(&stp->sd_lock); 450 cv_broadcast(&stp->sd_monitor); 451 mutex_exit(&stp->sd_lock); 452 freeq(_RD(qp)); 453 shfree(stp); 454 return (error); 455 } 456 /* 457 * Set sd_strtab after open in order to handle clonable drivers 458 */ 459 stp->sd_strtab = STREAMSTAB(getmajor(*devp)); 460 461 /* 462 * Historical note: dummydev used to be be prior to the initial 463 * open (via qattach above), which made the value seen 464 * inconsistent between an I_PUSH and an autopush of a module. 465 */ 466 dummydev = *devp; 467 468 /* 469 * For clone open of old style (Q not associated) network driver, 470 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH 471 */ 472 brq = _RD(_WR(qp)->q_next); 473 major = getmajor(*devp); 474 if (push_drcompat && cloneopen && NETWORK_DRV(major) && 475 ((brq->q_flag & _QASSOCIATED) == 0)) { 476 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0) 477 cmn_err(CE_WARN, "cannot push " DRMODNAME 478 " streams module"); 479 } 480 481 if (!NETWORK_DRV(major)) { 482 savedev = *devp; 483 } else { 484 /* 485 * For network devices, process differently based on the 486 * return value from dld_autopush(): 487 * 488 * 0: the passed-in device points to a GLDv3 datalink with 489 * per-link autopush configuration; use that configuration 490 * and ignore any per-driver autopush configuration. 491 * 492 * 1: the passed-in device points to a physical GLDv3 493 * datalink without per-link autopush configuration. The 494 * passed in device was changed to refer to the actual 495 * physical device (if it's not already); we use that new 496 * device to look up any per-driver autopush configuration. 497 * 498 * -1: neither of the above cases applied; use the initial 499 * device to look up any per-driver autopush configuration. 500 */ 501 switch (dld_autopush(&savedev, &dlap)) { 502 case 0: 503 zoneid = crgetzoneid(crp); 504 for (s = 0; s < dlap.dap_npush; s++) { 505 error = push_mod(qp, &dummydev, stp, 506 dlap.dap_aplist[s], dlap.dap_anchor, crp, 507 zoneid); 508 if (error != 0) 509 break; 510 } 511 goto opendone; 512 case 1: 513 break; 514 case -1: 515 savedev = *devp; 516 break; 517 } 518 } 519 /* 520 * Find the autopush configuration based on "savedev". Start with the 521 * global zone. If not found check in the local zone. 522 */ 523 zoneid = GLOBAL_ZONEID; 524 retryap: 525 ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))-> 526 netstack_str; 527 if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) { 528 netstack_rele(ss->ss_netstack); 529 if (zoneid == GLOBAL_ZONEID) { 530 /* 531 * None found. Also look in the zone's autopush table. 532 */ 533 zoneid = crgetzoneid(crp); 534 if (zoneid != GLOBAL_ZONEID) 535 goto retryap; 536 } 537 goto opendone; 538 } 539 anchor = ap->ap_anchor; 540 zoneid = crgetzoneid(crp); 541 for (s = 0; s < ap->ap_npush; s++) { 542 error = push_mod(qp, &dummydev, stp, ap->ap_list[s], 543 anchor, crp, zoneid); 544 if (error != 0) 545 break; 546 } 547 sad_ap_rele(ap, ss); 548 netstack_rele(ss->ss_netstack); 549 550 opendone: 551 552 /* 553 * let specfs know that open failed part way through 554 */ 555 if (error) { 556 mutex_enter(&stp->sd_lock); 557 stp->sd_flag |= STREOPENFAIL; 558 mutex_exit(&stp->sd_lock); 559 } 560 561 /* 562 * Wake up others that are waiting for stream to be created. 563 */ 564 mutex_enter(&stp->sd_lock); 565 stp->sd_flag &= ~STWOPEN; 566 567 /* 568 * As a performance concern we are caching the values of 569 * q_minpsz and q_maxpsz of the module below the stream 570 * head in the stream head. 571 */ 572 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 573 rmin = stp->sd_wrq->q_next->q_minpsz; 574 rmax = stp->sd_wrq->q_next->q_maxpsz; 575 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 576 577 /* do this processing here as a performance concern */ 578 if (strmsgsz != 0) { 579 if (rmax == INFPSZ) 580 rmax = strmsgsz; 581 else 582 rmax = MIN(strmsgsz, rmax); 583 } 584 585 mutex_enter(QLOCK(stp->sd_wrq)); 586 stp->sd_qn_minpsz = rmin; 587 stp->sd_qn_maxpsz = rmax; 588 mutex_exit(QLOCK(stp->sd_wrq)); 589 590 fifo_opendone: 591 cv_broadcast(&stp->sd_monitor); 592 mutex_exit(&stp->sd_lock); 593 return (error); 594 } 595 596 static int strsink(queue_t *, mblk_t *); 597 static struct qinit deadrend = { 598 strsink, NULL, NULL, NULL, NULL, &strm_info, NULL 599 }; 600 static struct qinit deadwend = { 601 NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL 602 }; 603 604 /* 605 * Close a stream. 606 * This is called from closef() on the last close of an open stream. 607 * Strclean() will already have removed the siglist and pollist 608 * information, so all that remains is to remove all multiplexor links 609 * for the stream, pop all the modules (and the driver), and free the 610 * stream structure. 611 */ 612 613 int 614 strclose(struct vnode *vp, int flag, cred_t *crp) 615 { 616 struct stdata *stp; 617 queue_t *qp; 618 int rval; 619 int freestp = 1; 620 queue_t *rmq; 621 622 if (AU_AUDITING()) 623 audit_strclose(vp, flag, crp); 624 625 TRACE_1(TR_FAC_STREAMS_FR, 626 TR_STRCLOSE, "strclose:%p", vp); 627 ASSERT(vp->v_stream); 628 629 stp = vp->v_stream; 630 ASSERT(!(stp->sd_flag & STPLEX)); 631 qp = stp->sd_wrq; 632 633 /* 634 * Needed so that strpoll will return non-zero for this fd. 635 * Note that with POLLNOERR STRHUP does still cause POLLHUP. 636 */ 637 mutex_enter(&stp->sd_lock); 638 stp->sd_flag |= STRHUP; 639 mutex_exit(&stp->sd_lock); 640 641 /* 642 * If the registered process or process group did not have an 643 * open instance of this stream then strclean would not be 644 * called. Thus at the time of closing all remaining siglist entries 645 * are removed. 646 */ 647 if (stp->sd_siglist != NULL) 648 strcleanall(vp); 649 650 ASSERT(stp->sd_siglist == NULL); 651 ASSERT(stp->sd_sigflags == 0); 652 653 if (STRMATED(stp)) { 654 struct stdata *strmatep = stp->sd_mate; 655 int waited = 1; 656 657 STRLOCKMATES(stp); 658 while (waited) { 659 waited = 0; 660 while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 661 mutex_exit(&strmatep->sd_lock); 662 cv_wait(&stp->sd_monitor, &stp->sd_lock); 663 mutex_exit(&stp->sd_lock); 664 STRLOCKMATES(stp); 665 waited = 1; 666 } 667 while (strmatep->sd_flag & 668 (STWOPEN|STRCLOSE|STRPLUMB)) { 669 mutex_exit(&stp->sd_lock); 670 cv_wait(&strmatep->sd_monitor, 671 &strmatep->sd_lock); 672 mutex_exit(&strmatep->sd_lock); 673 STRLOCKMATES(stp); 674 waited = 1; 675 } 676 } 677 stp->sd_flag |= STRCLOSE; 678 STRUNLOCKMATES(stp); 679 } else { 680 mutex_enter(&stp->sd_lock); 681 stp->sd_flag |= STRCLOSE; 682 mutex_exit(&stp->sd_lock); 683 } 684 685 ASSERT(qp->q_first == NULL); /* No more delayed write */ 686 687 /* Check if an I_LINK was ever done on this stream */ 688 if (stp->sd_flag & STRHASLINKS) { 689 netstack_t *ns; 690 str_stack_t *ss; 691 692 ns = netstack_find_by_cred(crp); 693 ASSERT(ns != NULL); 694 ss = ns->netstack_str; 695 ASSERT(ss != NULL); 696 697 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss); 698 netstack_rele(ss->ss_netstack); 699 } 700 701 while (_SAMESTR(qp)) { 702 /* 703 * Holding sd_lock prevents q_next from changing in 704 * this stream. 705 */ 706 mutex_enter(&stp->sd_lock); 707 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) { 708 709 /* 710 * sleep until awakened by strwsrv() or timeout 711 */ 712 for (;;) { 713 mutex_enter(QLOCK(qp->q_next)); 714 if (!(qp->q_next->q_mblkcnt)) { 715 mutex_exit(QLOCK(qp->q_next)); 716 break; 717 } 718 stp->sd_flag |= WSLEEP; 719 720 /* ensure strwsrv gets enabled */ 721 qp->q_next->q_flag |= QWANTW; 722 mutex_exit(QLOCK(qp->q_next)); 723 /* get out if we timed out or recv'd a signal */ 724 if (str_cv_wait(&qp->q_wait, &stp->sd_lock, 725 stp->sd_closetime, 0) <= 0) { 726 break; 727 } 728 } 729 stp->sd_flag &= ~WSLEEP; 730 } 731 mutex_exit(&stp->sd_lock); 732 733 rmq = qp->q_next; 734 if (rmq->q_flag & QISDRV) { 735 ASSERT(!_SAMESTR(rmq)); 736 wait_sq_svc(_RD(qp)->q_syncq); 737 } 738 739 qdetach(_RD(rmq), 1, flag, crp, B_FALSE); 740 } 741 742 /* 743 * Since we call pollwakeup in close() now, the poll list should 744 * be empty in most cases. The only exception is the layered devices 745 * (e.g. the console drivers with redirection modules pushed on top 746 * of it). We have to do this after calling qdetach() because 747 * the redirection module won't have torn down the console 748 * redirection until after qdetach() has been invoked. 749 */ 750 if (stp->sd_pollist.ph_list != NULL) { 751 pollwakeup(&stp->sd_pollist, POLLERR); 752 pollhead_clean(&stp->sd_pollist); 753 } 754 ASSERT(stp->sd_pollist.ph_list == NULL); 755 ASSERT(stp->sd_sidp == NULL); 756 ASSERT(stp->sd_pgidp == NULL); 757 758 /* Prevent qenable from re-enabling the stream head queue */ 759 disable_svc(_RD(qp)); 760 761 /* 762 * Wait until service procedure of each queue is 763 * run, if QINSERVICE is set. 764 */ 765 wait_svc(_RD(qp)); 766 767 /* 768 * Now, flush both queues. 769 */ 770 flushq(_RD(qp), FLUSHALL); 771 flushq(qp, FLUSHALL); 772 773 /* 774 * If the write queue of the stream head is pointing to a 775 * read queue, we have a twisted stream. If the read queue 776 * is alive, convert the stream head queues into a dead end. 777 * If the read queue is dead, free the dead pair. 778 */ 779 if (qp->q_next && !_SAMESTR(qp)) { 780 if (qp->q_next->q_qinfo == &deadrend) { /* half-closed pipe */ 781 flushq(qp->q_next, FLUSHALL); /* ensure no message */ 782 shfree(qp->q_next->q_stream); 783 freeq(qp->q_next); 784 freeq(_RD(qp)); 785 } else if (qp->q_next == _RD(qp)) { /* fifo */ 786 freeq(_RD(qp)); 787 } else { /* pipe */ 788 freestp = 0; 789 /* 790 * The q_info pointers are never accessed when 791 * SQLOCK is held. 792 */ 793 ASSERT(qp->q_syncq == _RD(qp)->q_syncq); 794 mutex_enter(SQLOCK(qp->q_syncq)); 795 qp->q_qinfo = &deadwend; 796 _RD(qp)->q_qinfo = &deadrend; 797 mutex_exit(SQLOCK(qp->q_syncq)); 798 } 799 } else { 800 freeq(_RD(qp)); /* free stream head queue pair */ 801 } 802 803 mutex_enter(&vp->v_lock); 804 if (stp->sd_iocblk) { 805 if (stp->sd_iocblk != (mblk_t *)-1) { 806 freemsg(stp->sd_iocblk); 807 } 808 stp->sd_iocblk = NULL; 809 } 810 stp->sd_vnode = NULL; 811 vp->v_stream = NULL; 812 mutex_exit(&vp->v_lock); 813 mutex_enter(&stp->sd_lock); 814 freemsg(stp->sd_cmdblk); 815 stp->sd_cmdblk = NULL; 816 stp->sd_flag &= ~STRCLOSE; 817 cv_broadcast(&stp->sd_monitor); 818 mutex_exit(&stp->sd_lock); 819 820 if (freestp) 821 shfree(stp); 822 return (0); 823 } 824 825 static int 826 strsink(queue_t *q, mblk_t *bp) 827 { 828 struct copyresp *resp; 829 830 switch (bp->b_datap->db_type) { 831 case M_FLUSH: 832 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 833 *bp->b_rptr &= ~FLUSHR; 834 bp->b_flag |= MSGNOLOOP; 835 /* 836 * Protect against the driver passing up 837 * messages after it has done a qprocsoff. 838 */ 839 if (_OTHERQ(q)->q_next == NULL) 840 freemsg(bp); 841 else 842 qreply(q, bp); 843 } else { 844 freemsg(bp); 845 } 846 break; 847 848 case M_COPYIN: 849 case M_COPYOUT: 850 if (bp->b_cont) { 851 freemsg(bp->b_cont); 852 bp->b_cont = NULL; 853 } 854 bp->b_datap->db_type = M_IOCDATA; 855 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 856 resp = (struct copyresp *)bp->b_rptr; 857 resp->cp_rval = (caddr_t)1; /* failure */ 858 /* 859 * Protect against the driver passing up 860 * messages after it has done a qprocsoff. 861 */ 862 if (_OTHERQ(q)->q_next == NULL) 863 freemsg(bp); 864 else 865 qreply(q, bp); 866 break; 867 868 case M_IOCTL: 869 if (bp->b_cont) { 870 freemsg(bp->b_cont); 871 bp->b_cont = NULL; 872 } 873 bp->b_datap->db_type = M_IOCNAK; 874 /* 875 * Protect against the driver passing up 876 * messages after it has done a qprocsoff. 877 */ 878 if (_OTHERQ(q)->q_next == NULL) 879 freemsg(bp); 880 else 881 qreply(q, bp); 882 break; 883 884 default: 885 freemsg(bp); 886 break; 887 } 888 889 return (0); 890 } 891 892 /* 893 * Clean up after a process when it closes a stream. This is called 894 * from closef for all closes, whereas strclose is called only for the 895 * last close on a stream. The siglist is scanned for entries for the 896 * current process, and these are removed. 897 */ 898 void 899 strclean(struct vnode *vp) 900 { 901 strsig_t *ssp, *pssp, *tssp; 902 stdata_t *stp; 903 int update = 0; 904 905 TRACE_1(TR_FAC_STREAMS_FR, 906 TR_STRCLEAN, "strclean:%p", vp); 907 stp = vp->v_stream; 908 pssp = NULL; 909 mutex_enter(&stp->sd_lock); 910 ssp = stp->sd_siglist; 911 while (ssp) { 912 if (ssp->ss_pidp == curproc->p_pidp) { 913 tssp = ssp->ss_next; 914 if (pssp) 915 pssp->ss_next = tssp; 916 else 917 stp->sd_siglist = tssp; 918 mutex_enter(&pidlock); 919 PID_RELE(ssp->ss_pidp); 920 mutex_exit(&pidlock); 921 kmem_free(ssp, sizeof (strsig_t)); 922 update = 1; 923 ssp = tssp; 924 } else { 925 pssp = ssp; 926 ssp = ssp->ss_next; 927 } 928 } 929 if (update) { 930 stp->sd_sigflags = 0; 931 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 932 stp->sd_sigflags |= ssp->ss_events; 933 } 934 mutex_exit(&stp->sd_lock); 935 } 936 937 /* 938 * Used on the last close to remove any remaining items on the siglist. 939 * These could be present on the siglist due to I_ESETSIG calls that 940 * use process groups or processed that do not have an open file descriptor 941 * for this stream (Such entries would not be removed by strclean). 942 */ 943 static void 944 strcleanall(struct vnode *vp) 945 { 946 strsig_t *ssp, *nssp; 947 stdata_t *stp; 948 949 stp = vp->v_stream; 950 mutex_enter(&stp->sd_lock); 951 ssp = stp->sd_siglist; 952 stp->sd_siglist = NULL; 953 while (ssp) { 954 nssp = ssp->ss_next; 955 mutex_enter(&pidlock); 956 PID_RELE(ssp->ss_pidp); 957 mutex_exit(&pidlock); 958 kmem_free(ssp, sizeof (strsig_t)); 959 ssp = nssp; 960 } 961 stp->sd_sigflags = 0; 962 mutex_exit(&stp->sd_lock); 963 } 964 965 /* 966 * Retrieve the next message from the logical stream head read queue 967 * using either rwnext (if sync stream) or getq_noenab. 968 * It is the callers responsibility to call qbackenable after 969 * it is finished with the message. The caller should not call 970 * qbackenable until after any putback calls to avoid spurious backenabling. 971 */ 972 mblk_t * 973 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first, 974 int *errorp) 975 { 976 mblk_t *bp; 977 int error; 978 ssize_t rbytes = 0; 979 980 /* Holding sd_lock prevents the read queue from changing */ 981 ASSERT(MUTEX_HELD(&stp->sd_lock)); 982 983 if (uiop != NULL && stp->sd_struiordq != NULL && 984 q->q_first == NULL && 985 (!first || (stp->sd_wakeq & RSLEEP))) { 986 /* 987 * Stream supports rwnext() for the read side. 988 * If this is the first time we're called by e.g. strread 989 * only do the downcall if there is a deferred wakeup 990 * (registered in sd_wakeq). 991 */ 992 struiod_t uiod; 993 994 if (first) 995 stp->sd_wakeq &= ~RSLEEP; 996 997 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 998 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 999 uiod.d_mp = 0; 1000 /* 1001 * Mark that a thread is in rwnext on the read side 1002 * to prevent strrput from nacking ioctls immediately. 1003 * When the last concurrent rwnext returns 1004 * the ioctls are nack'ed. 1005 */ 1006 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1007 stp->sd_struiodnak++; 1008 /* 1009 * Note: rwnext will drop sd_lock. 1010 */ 1011 error = rwnext(q, &uiod); 1012 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 1013 mutex_enter(&stp->sd_lock); 1014 stp->sd_struiodnak--; 1015 while (stp->sd_struiodnak == 0 && 1016 ((bp = stp->sd_struionak) != NULL)) { 1017 stp->sd_struionak = bp->b_next; 1018 bp->b_next = NULL; 1019 bp->b_datap->db_type = M_IOCNAK; 1020 /* 1021 * Protect against the driver passing up 1022 * messages after it has done a qprocsoff. 1023 */ 1024 if (_OTHERQ(q)->q_next == NULL) 1025 freemsg(bp); 1026 else { 1027 mutex_exit(&stp->sd_lock); 1028 qreply(q, bp); 1029 mutex_enter(&stp->sd_lock); 1030 } 1031 } 1032 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1033 if (error == 0 || error == EWOULDBLOCK) { 1034 if ((bp = uiod.d_mp) != NULL) { 1035 *errorp = 0; 1036 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1037 return (bp); 1038 } 1039 error = 0; 1040 } else if (error == EINVAL) { 1041 /* 1042 * The stream plumbing must have 1043 * changed while we were away, so 1044 * just turn off rwnext()s. 1045 */ 1046 error = 0; 1047 } else if (error == EBUSY) { 1048 /* 1049 * The module might have data in transit using putnext 1050 * Fall back on waiting + getq. 1051 */ 1052 error = 0; 1053 } else { 1054 *errorp = error; 1055 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1056 return (NULL); 1057 } 1058 /* 1059 * Try a getq in case a rwnext() generated mblk 1060 * has bubbled up via strrput(). 1061 */ 1062 } 1063 *errorp = 0; 1064 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1065 1066 /* 1067 * If we have a valid uio, try and use this as a guide for how 1068 * many bytes to retrieve from the queue via getq_noenab(). 1069 * Doing this can avoid unneccesary counting of overlong 1070 * messages in putback(). We currently only do this for sockets 1071 * and only if there is no sd_rputdatafunc hook. 1072 * 1073 * The sd_rputdatafunc hook transforms the entire message 1074 * before any bytes in it can be given to a client. So, rbytes 1075 * must be 0 if there is a hook. 1076 */ 1077 if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) && 1078 (stp->sd_rputdatafunc == NULL)) 1079 rbytes = uiop->uio_resid; 1080 1081 return (getq_noenab(q, rbytes)); 1082 } 1083 1084 /* 1085 * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'. 1086 * If the message does not fit in the uio the remainder of it is returned; 1087 * otherwise NULL is returned. Any embedded zero-length mblk_t's are 1088 * consumed, even if uio_resid reaches zero. On error, `*errorp' is set to 1089 * the error code, the message is consumed, and NULL is returned. 1090 */ 1091 static mblk_t * 1092 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp) 1093 { 1094 int error; 1095 ptrdiff_t n; 1096 mblk_t *nbp; 1097 1098 ASSERT(bp->b_wptr >= bp->b_rptr); 1099 1100 do { 1101 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) { 1102 ASSERT(n > 0); 1103 1104 error = uiomove(bp->b_rptr, n, UIO_READ, uiop); 1105 if (error != 0) { 1106 freemsg(bp); 1107 *errorp = error; 1108 return (NULL); 1109 } 1110 } 1111 1112 bp->b_rptr += n; 1113 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) { 1114 nbp = bp; 1115 bp = bp->b_cont; 1116 freeb(nbp); 1117 } 1118 } while (bp != NULL && uiop->uio_resid > 0); 1119 1120 *errorp = 0; 1121 return (bp); 1122 } 1123 1124 /* 1125 * Read a stream according to the mode flags in sd_flag: 1126 * 1127 * (default mode) - Byte stream, msg boundaries are ignored 1128 * RD_MSGDIS (msg discard) - Read on msg boundaries and throw away 1129 * any data remaining in msg 1130 * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back 1131 * any remaining data on head of read queue 1132 * 1133 * Consume readable messages on the front of the queue until 1134 * ttolwp(curthread)->lwp_count 1135 * is satisfied, the readable messages are exhausted, or a message 1136 * boundary is reached in a message mode. If no data was read and 1137 * the stream was not opened with the NDELAY flag, block until data arrives. 1138 * Otherwise return the data read and update the count. 1139 * 1140 * In default mode a 0 length message signifies end-of-file and terminates 1141 * a read in progress. The 0 length message is removed from the queue 1142 * only if it is the only message read (no data is read). 1143 * 1144 * An attempt to read an M_PROTO or M_PCPROTO message results in an 1145 * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set. 1146 * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data. 1147 * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message 1148 * are unlinked from and M_DATA blocks in the message, the protos are 1149 * thrown away, and the data is read. 1150 */ 1151 /* ARGSUSED */ 1152 int 1153 strread(struct vnode *vp, struct uio *uiop, cred_t *crp) 1154 { 1155 struct stdata *stp; 1156 mblk_t *bp, *nbp; 1157 queue_t *q; 1158 int error = 0; 1159 uint_t old_sd_flag; 1160 int first; 1161 char rflg; 1162 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 1163 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 1164 short delim; 1165 unsigned char pri = 0; 1166 char waitflag; 1167 unsigned char type; 1168 1169 TRACE_1(TR_FAC_STREAMS_FR, 1170 TR_STRREAD_ENTER, "strread:%p", vp); 1171 ASSERT(vp->v_stream); 1172 stp = vp->v_stream; 1173 1174 mutex_enter(&stp->sd_lock); 1175 1176 if ((error = i_straccess(stp, JCREAD)) != 0) { 1177 mutex_exit(&stp->sd_lock); 1178 return (error); 1179 } 1180 1181 if (stp->sd_flag & (STRDERR|STPLEX)) { 1182 error = strgeterr(stp, STRDERR|STPLEX, 0); 1183 if (error != 0) { 1184 mutex_exit(&stp->sd_lock); 1185 return (error); 1186 } 1187 } 1188 1189 /* 1190 * Loop terminates when uiop->uio_resid == 0. 1191 */ 1192 rflg = 0; 1193 waitflag = READWAIT; 1194 q = _RD(stp->sd_wrq); 1195 for (;;) { 1196 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1197 old_sd_flag = stp->sd_flag; 1198 mark = 0; 1199 delim = 0; 1200 first = 1; 1201 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) { 1202 int done = 0; 1203 1204 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1205 1206 if (error != 0) 1207 goto oops; 1208 1209 if (stp->sd_flag & (STRHUP|STREOF)) { 1210 goto oops; 1211 } 1212 if (rflg && !(stp->sd_flag & STRDELIM)) { 1213 goto oops; 1214 } 1215 /* 1216 * If a read(fd,buf,0) has been done, there is no 1217 * need to sleep. We always have zero bytes to 1218 * return. 1219 */ 1220 if (uiop->uio_resid == 0) { 1221 goto oops; 1222 } 1223 1224 qbackenable(q, 0); 1225 1226 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT, 1227 "strread calls strwaitq:%p, %p, %p", 1228 vp, uiop, crp); 1229 if ((error = strwaitq(stp, waitflag, uiop->uio_resid, 1230 uiop->uio_fmode, -1, &done)) != 0 || done) { 1231 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE, 1232 "strread error or done:%p, %p, %p", 1233 vp, uiop, crp); 1234 if ((uiop->uio_fmode & FNDELAY) && 1235 (stp->sd_flag & OLDNDELAY) && 1236 (error == EAGAIN)) 1237 error = 0; 1238 goto oops; 1239 } 1240 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE, 1241 "strread awakes:%p, %p, %p", vp, uiop, crp); 1242 if ((error = i_straccess(stp, JCREAD)) != 0) { 1243 goto oops; 1244 } 1245 first = 0; 1246 } 1247 1248 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1249 ASSERT(bp); 1250 pri = bp->b_band; 1251 /* 1252 * Extract any mark information. If the message is not 1253 * completely consumed this information will be put in the mblk 1254 * that is putback. 1255 * If MSGMARKNEXT is set and the message is completely consumed 1256 * the STRATMARK flag will be set below. Likewise, if 1257 * MSGNOTMARKNEXT is set and the message is 1258 * completely consumed STRNOTATMARK will be set. 1259 * 1260 * For some unknown reason strread only breaks the read at the 1261 * last mark. 1262 */ 1263 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 1264 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 1265 (MSGMARKNEXT|MSGNOTMARKNEXT)); 1266 if (mark != 0 && bp == stp->sd_mark) { 1267 if (rflg) { 1268 putback(stp, q, bp, pri); 1269 goto oops; 1270 } 1271 mark |= _LASTMARK; 1272 stp->sd_mark = NULL; 1273 } 1274 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM)) 1275 delim = 1; 1276 mutex_exit(&stp->sd_lock); 1277 1278 if (STREAM_NEEDSERVICE(stp)) 1279 stream_runservice(stp); 1280 1281 type = bp->b_datap->db_type; 1282 1283 switch (type) { 1284 1285 case M_DATA: 1286 ismdata: 1287 if (msgnodata(bp)) { 1288 if (mark || delim) { 1289 freemsg(bp); 1290 } else if (rflg) { 1291 1292 /* 1293 * If already read data put zero 1294 * length message back on queue else 1295 * free msg and return 0. 1296 */ 1297 bp->b_band = pri; 1298 mutex_enter(&stp->sd_lock); 1299 putback(stp, q, bp, pri); 1300 mutex_exit(&stp->sd_lock); 1301 } else { 1302 freemsg(bp); 1303 } 1304 error = 0; 1305 goto oops1; 1306 } 1307 1308 rflg = 1; 1309 waitflag |= NOINTR; 1310 bp = struiocopyout(bp, uiop, &error); 1311 if (error != 0) 1312 goto oops1; 1313 1314 mutex_enter(&stp->sd_lock); 1315 if (bp) { 1316 /* 1317 * Have remaining data in message. 1318 * Free msg if in discard mode. 1319 */ 1320 if (stp->sd_read_opt & RD_MSGDIS) { 1321 freemsg(bp); 1322 } else { 1323 bp->b_band = pri; 1324 if ((mark & _LASTMARK) && 1325 (stp->sd_mark == NULL)) 1326 stp->sd_mark = bp; 1327 bp->b_flag |= mark & ~_LASTMARK; 1328 if (delim) 1329 bp->b_flag |= MSGDELIM; 1330 if (msgnodata(bp)) 1331 freemsg(bp); 1332 else 1333 putback(stp, q, bp, pri); 1334 } 1335 } else { 1336 /* 1337 * Consumed the complete message. 1338 * Move the MSG*MARKNEXT information 1339 * to the stream head just in case 1340 * the read queue becomes empty. 1341 * 1342 * If the stream head was at the mark 1343 * (STRATMARK) before we dropped sd_lock above 1344 * and some data was consumed then we have 1345 * moved past the mark thus STRATMARK is 1346 * cleared. However, if a message arrived in 1347 * strrput during the copyout above causing 1348 * STRATMARK to be set we can not clear that 1349 * flag. 1350 */ 1351 if (mark & 1352 (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 1353 if (mark & MSGMARKNEXT) { 1354 stp->sd_flag &= ~STRNOTATMARK; 1355 stp->sd_flag |= STRATMARK; 1356 } else if (mark & MSGNOTMARKNEXT) { 1357 stp->sd_flag &= ~STRATMARK; 1358 stp->sd_flag |= STRNOTATMARK; 1359 } else { 1360 stp->sd_flag &= 1361 ~(STRATMARK|STRNOTATMARK); 1362 } 1363 } else if (rflg && (old_sd_flag & STRATMARK)) { 1364 stp->sd_flag &= ~STRATMARK; 1365 } 1366 } 1367 1368 /* 1369 * Check for signal messages at the front of the read 1370 * queue and generate the signal(s) if appropriate. 1371 * The only signal that can be on queue is M_SIG at 1372 * this point. 1373 */ 1374 while ((((bp = q->q_first)) != NULL) && 1375 (bp->b_datap->db_type == M_SIG)) { 1376 bp = getq_noenab(q, 0); 1377 /* 1378 * sd_lock is held so the content of the 1379 * read queue can not change. 1380 */ 1381 ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG); 1382 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 1383 mutex_exit(&stp->sd_lock); 1384 freemsg(bp); 1385 if (STREAM_NEEDSERVICE(stp)) 1386 stream_runservice(stp); 1387 mutex_enter(&stp->sd_lock); 1388 } 1389 1390 if ((uiop->uio_resid == 0) || (mark & _LASTMARK) || 1391 delim || 1392 (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) { 1393 goto oops; 1394 } 1395 continue; 1396 1397 case M_SIG: 1398 strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band); 1399 freemsg(bp); 1400 mutex_enter(&stp->sd_lock); 1401 continue; 1402 1403 case M_PROTO: 1404 case M_PCPROTO: 1405 /* 1406 * Only data messages are readable. 1407 * Any others generate an error, unless 1408 * RD_PROTDIS or RD_PROTDAT is set. 1409 */ 1410 if (stp->sd_read_opt & RD_PROTDAT) { 1411 for (nbp = bp; nbp; nbp = nbp->b_next) { 1412 if ((nbp->b_datap->db_type == 1413 M_PROTO) || 1414 (nbp->b_datap->db_type == 1415 M_PCPROTO)) { 1416 nbp->b_datap->db_type = M_DATA; 1417 } else { 1418 break; 1419 } 1420 } 1421 /* 1422 * clear stream head hi pri flag based on 1423 * first message 1424 */ 1425 if (type == M_PCPROTO) { 1426 mutex_enter(&stp->sd_lock); 1427 stp->sd_flag &= ~STRPRI; 1428 mutex_exit(&stp->sd_lock); 1429 } 1430 goto ismdata; 1431 } else if (stp->sd_read_opt & RD_PROTDIS) { 1432 /* 1433 * discard non-data messages 1434 */ 1435 while (bp && 1436 ((bp->b_datap->db_type == M_PROTO) || 1437 (bp->b_datap->db_type == M_PCPROTO))) { 1438 nbp = unlinkb(bp); 1439 freeb(bp); 1440 bp = nbp; 1441 } 1442 /* 1443 * clear stream head hi pri flag based on 1444 * first message 1445 */ 1446 if (type == M_PCPROTO) { 1447 mutex_enter(&stp->sd_lock); 1448 stp->sd_flag &= ~STRPRI; 1449 mutex_exit(&stp->sd_lock); 1450 } 1451 if (bp) { 1452 bp->b_band = pri; 1453 goto ismdata; 1454 } else { 1455 break; 1456 } 1457 } 1458 /* FALLTHRU */ 1459 case M_PASSFP: 1460 if ((bp->b_datap->db_type == M_PASSFP) && 1461 (stp->sd_read_opt & RD_PROTDIS)) { 1462 freemsg(bp); 1463 break; 1464 } 1465 mutex_enter(&stp->sd_lock); 1466 putback(stp, q, bp, pri); 1467 mutex_exit(&stp->sd_lock); 1468 if (rflg == 0) 1469 error = EBADMSG; 1470 goto oops1; 1471 1472 default: 1473 /* 1474 * Garbage on stream head read queue. 1475 */ 1476 cmn_err(CE_WARN, "bad %x found at stream head\n", 1477 bp->b_datap->db_type); 1478 freemsg(bp); 1479 goto oops1; 1480 } 1481 mutex_enter(&stp->sd_lock); 1482 } 1483 oops: 1484 mutex_exit(&stp->sd_lock); 1485 oops1: 1486 qbackenable(q, pri); 1487 return (error); 1488 #undef _LASTMARK 1489 } 1490 1491 /* 1492 * Default processing of M_PROTO/M_PCPROTO messages. 1493 * Determine which wakeups and signals are needed. 1494 * This can be replaced by a user-specified procedure for kernel users 1495 * of STREAMS. 1496 */ 1497 /* ARGSUSED */ 1498 mblk_t * 1499 strrput_proto(vnode_t *vp, mblk_t *mp, 1500 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1501 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1502 { 1503 *wakeups = RSLEEP; 1504 *allmsgsigs = 0; 1505 1506 switch (mp->b_datap->db_type) { 1507 case M_PROTO: 1508 if (mp->b_band == 0) { 1509 *firstmsgsigs = S_INPUT | S_RDNORM; 1510 *pollwakeups = POLLIN | POLLRDNORM; 1511 } else { 1512 *firstmsgsigs = S_INPUT | S_RDBAND; 1513 *pollwakeups = POLLIN | POLLRDBAND; 1514 } 1515 break; 1516 case M_PCPROTO: 1517 *firstmsgsigs = S_HIPRI; 1518 *pollwakeups = POLLPRI; 1519 break; 1520 } 1521 return (mp); 1522 } 1523 1524 /* 1525 * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and 1526 * M_PASSFP messages. 1527 * Determine which wakeups and signals are needed. 1528 * This can be replaced by a user-specified procedure for kernel users 1529 * of STREAMS. 1530 */ 1531 /* ARGSUSED */ 1532 mblk_t * 1533 strrput_misc(vnode_t *vp, mblk_t *mp, 1534 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1535 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1536 { 1537 *wakeups = 0; 1538 *firstmsgsigs = 0; 1539 *allmsgsigs = 0; 1540 *pollwakeups = 0; 1541 return (mp); 1542 } 1543 1544 /* 1545 * Stream read put procedure. Called from downstream driver/module 1546 * with messages for the stream head. Data, protocol, and in-stream 1547 * signal messages are placed on the queue, others are handled directly. 1548 */ 1549 int 1550 strrput(queue_t *q, mblk_t *bp) 1551 { 1552 struct stdata *stp; 1553 ulong_t rput_opt; 1554 strwakeup_t wakeups; 1555 strsigset_t firstmsgsigs; /* Signals if first message on queue */ 1556 strsigset_t allmsgsigs; /* Signals for all messages */ 1557 strsigset_t signals; /* Signals events to generate */ 1558 strpollset_t pollwakeups; 1559 mblk_t *nextbp; 1560 uchar_t band = 0; 1561 int hipri_sig; 1562 1563 stp = (struct stdata *)q->q_ptr; 1564 /* 1565 * Use rput_opt for optimized access to the SR_ flags except 1566 * SR_POLLIN. That flag has to be checked under sd_lock since it 1567 * is modified by strpoll(). 1568 */ 1569 rput_opt = stp->sd_rput_opt; 1570 1571 ASSERT(qclaimed(q)); 1572 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER, 1573 "strrput called with message type:q %p bp %p", q, bp); 1574 1575 /* 1576 * Perform initial processing and pass to the parameterized functions. 1577 */ 1578 ASSERT(bp->b_next == NULL); 1579 1580 switch (bp->b_datap->db_type) { 1581 case M_DATA: 1582 /* 1583 * sockfs is the only consumer of STREOF and when it is set, 1584 * it implies that the receiver is not interested in receiving 1585 * any more data, hence the mblk is freed to prevent unnecessary 1586 * message queueing at the stream head. 1587 */ 1588 if (stp->sd_flag == STREOF) { 1589 freemsg(bp); 1590 return (0); 1591 } 1592 if ((rput_opt & SR_IGN_ZEROLEN) && 1593 bp->b_rptr == bp->b_wptr && msgnodata(bp)) { 1594 /* 1595 * Ignore zero-length M_DATA messages. These might be 1596 * generated by some transports. 1597 * The zero-length M_DATA messages, even if they 1598 * are ignored, should effect the atmark tracking and 1599 * should wake up a thread sleeping in strwaitmark. 1600 */ 1601 mutex_enter(&stp->sd_lock); 1602 if (bp->b_flag & MSGMARKNEXT) { 1603 /* 1604 * Record the position of the mark either 1605 * in q_last or in STRATMARK. 1606 */ 1607 if (q->q_last != NULL) { 1608 q->q_last->b_flag &= ~MSGNOTMARKNEXT; 1609 q->q_last->b_flag |= MSGMARKNEXT; 1610 } else { 1611 stp->sd_flag &= ~STRNOTATMARK; 1612 stp->sd_flag |= STRATMARK; 1613 } 1614 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1615 /* 1616 * Record that this is not the position of 1617 * the mark either in q_last or in 1618 * STRNOTATMARK. 1619 */ 1620 if (q->q_last != NULL) { 1621 q->q_last->b_flag &= ~MSGMARKNEXT; 1622 q->q_last->b_flag |= MSGNOTMARKNEXT; 1623 } else { 1624 stp->sd_flag &= ~STRATMARK; 1625 stp->sd_flag |= STRNOTATMARK; 1626 } 1627 } 1628 if (stp->sd_flag & RSLEEP) { 1629 stp->sd_flag &= ~RSLEEP; 1630 cv_broadcast(&q->q_wait); 1631 } 1632 mutex_exit(&stp->sd_lock); 1633 freemsg(bp); 1634 return (0); 1635 } 1636 wakeups = RSLEEP; 1637 if (bp->b_band == 0) { 1638 firstmsgsigs = S_INPUT | S_RDNORM; 1639 pollwakeups = POLLIN | POLLRDNORM; 1640 } else { 1641 firstmsgsigs = S_INPUT | S_RDBAND; 1642 pollwakeups = POLLIN | POLLRDBAND; 1643 } 1644 if (rput_opt & SR_SIGALLDATA) 1645 allmsgsigs = firstmsgsigs; 1646 else 1647 allmsgsigs = 0; 1648 1649 mutex_enter(&stp->sd_lock); 1650 if ((rput_opt & SR_CONSOL_DATA) && 1651 (q->q_last != NULL) && 1652 (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) { 1653 /* 1654 * Consolidate an M_DATA message onto an M_DATA, 1655 * M_PROTO, or M_PCPROTO by merging it with q_last. 1656 * The consolidation does not take place if 1657 * the old message is marked with either of the 1658 * marks or the delim flag or if the new 1659 * message is marked with MSGMARK. The MSGMARK 1660 * check is needed to handle the odd semantics of 1661 * MSGMARK where essentially the whole message 1662 * is to be treated as marked. 1663 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from the 1664 * new message to the front of the b_cont chain. 1665 */ 1666 mblk_t *lbp = q->q_last; 1667 unsigned char db_type = lbp->b_datap->db_type; 1668 1669 if ((db_type == M_DATA || db_type == M_PROTO || 1670 db_type == M_PCPROTO) && 1671 !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) { 1672 rmvq_noenab(q, lbp); 1673 /* 1674 * The first message in the b_cont list 1675 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 1676 * We need to handle the case where we 1677 * are appending: 1678 * 1679 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 1680 * 2) a MSGMARKNEXT to a plain message. 1681 * 3) a MSGNOTMARKNEXT to a plain message 1682 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 1683 * message. 1684 * 1685 * Thus we never append a MSGMARKNEXT or 1686 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 1687 */ 1688 if (bp->b_flag & MSGMARKNEXT) { 1689 lbp->b_flag |= MSGMARKNEXT; 1690 lbp->b_flag &= ~MSGNOTMARKNEXT; 1691 bp->b_flag &= ~MSGMARKNEXT; 1692 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1693 lbp->b_flag |= MSGNOTMARKNEXT; 1694 bp->b_flag &= ~MSGNOTMARKNEXT; 1695 } 1696 1697 linkb(lbp, bp); 1698 bp = lbp; 1699 /* 1700 * The new message logically isn't the first 1701 * even though the q_first check below thinks 1702 * it is. Clear the firstmsgsigs to make it 1703 * not appear to be first. 1704 */ 1705 firstmsgsigs = 0; 1706 } 1707 } 1708 break; 1709 1710 case M_PASSFP: 1711 wakeups = RSLEEP; 1712 allmsgsigs = 0; 1713 if (bp->b_band == 0) { 1714 firstmsgsigs = S_INPUT | S_RDNORM; 1715 pollwakeups = POLLIN | POLLRDNORM; 1716 } else { 1717 firstmsgsigs = S_INPUT | S_RDBAND; 1718 pollwakeups = POLLIN | POLLRDBAND; 1719 } 1720 mutex_enter(&stp->sd_lock); 1721 break; 1722 1723 case M_PROTO: 1724 case M_PCPROTO: 1725 ASSERT(stp->sd_rprotofunc != NULL); 1726 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp, 1727 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1728 #define ALLSIG (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\ 1729 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG) 1730 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\ 1731 POLLWRBAND) 1732 1733 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1734 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1735 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1736 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1737 1738 mutex_enter(&stp->sd_lock); 1739 break; 1740 1741 default: 1742 ASSERT(stp->sd_rmiscfunc != NULL); 1743 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp, 1744 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1745 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1746 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1747 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1748 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1749 #undef ALLSIG 1750 #undef ALLPOLL 1751 mutex_enter(&stp->sd_lock); 1752 break; 1753 } 1754 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1755 1756 /* By default generate superset of signals */ 1757 signals = (firstmsgsigs | allmsgsigs); 1758 1759 /* 1760 * The proto and misc functions can return multiple messages 1761 * as a b_next chain. Such messages are processed separately. 1762 */ 1763 one_more: 1764 hipri_sig = 0; 1765 if (bp == NULL) { 1766 nextbp = NULL; 1767 } else { 1768 nextbp = bp->b_next; 1769 bp->b_next = NULL; 1770 1771 switch (bp->b_datap->db_type) { 1772 case M_PCPROTO: 1773 /* 1774 * Only one priority protocol message is allowed at the 1775 * stream head at a time. 1776 */ 1777 if (stp->sd_flag & STRPRI) { 1778 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR, 1779 "M_PCPROTO already at head"); 1780 freemsg(bp); 1781 mutex_exit(&stp->sd_lock); 1782 goto done; 1783 } 1784 stp->sd_flag |= STRPRI; 1785 hipri_sig = 1; 1786 /* FALLTHRU */ 1787 case M_DATA: 1788 case M_PROTO: 1789 case M_PASSFP: 1790 band = bp->b_band; 1791 /* 1792 * Marking doesn't work well when messages 1793 * are marked in more than one band. We only 1794 * remember the last message received, even if 1795 * it is placed on the queue ahead of other 1796 * marked messages. 1797 */ 1798 if (bp->b_flag & MSGMARK) 1799 stp->sd_mark = bp; 1800 (void) putq(q, bp); 1801 1802 /* 1803 * If message is a PCPROTO message, always use 1804 * firstmsgsigs to determine if a signal should be 1805 * sent as strrput is the only place to send 1806 * signals for PCPROTO. Other messages are based on 1807 * the STRGETINPROG flag. The flag determines if 1808 * strrput or (k)strgetmsg will be responsible for 1809 * sending the signals, in the firstmsgsigs case. 1810 */ 1811 if ((hipri_sig == 1) || 1812 (((stp->sd_flag & STRGETINPROG) == 0) && 1813 (q->q_first == bp))) 1814 signals = (firstmsgsigs | allmsgsigs); 1815 else 1816 signals = allmsgsigs; 1817 break; 1818 1819 default: 1820 mutex_exit(&stp->sd_lock); 1821 (void) strrput_nondata(q, bp); 1822 mutex_enter(&stp->sd_lock); 1823 break; 1824 } 1825 } 1826 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1827 /* 1828 * Wake sleeping read/getmsg and cancel deferred wakeup 1829 */ 1830 if (wakeups & RSLEEP) 1831 stp->sd_wakeq &= ~RSLEEP; 1832 1833 wakeups &= stp->sd_flag; 1834 if (wakeups & RSLEEP) { 1835 stp->sd_flag &= ~RSLEEP; 1836 cv_broadcast(&q->q_wait); 1837 } 1838 if (wakeups & WSLEEP) { 1839 stp->sd_flag &= ~WSLEEP; 1840 cv_broadcast(&_WR(q)->q_wait); 1841 } 1842 1843 if (pollwakeups != 0) { 1844 if (pollwakeups == (POLLIN | POLLRDNORM)) { 1845 /* 1846 * Can't use rput_opt since it was not 1847 * read when sd_lock was held and SR_POLLIN is changed 1848 * by strpoll() under sd_lock. 1849 */ 1850 if (!(stp->sd_rput_opt & SR_POLLIN)) 1851 goto no_pollwake; 1852 stp->sd_rput_opt &= ~SR_POLLIN; 1853 } 1854 mutex_exit(&stp->sd_lock); 1855 pollwakeup(&stp->sd_pollist, pollwakeups); 1856 mutex_enter(&stp->sd_lock); 1857 } 1858 no_pollwake: 1859 1860 /* 1861 * strsendsig can handle multiple signals with a 1862 * single call. 1863 */ 1864 if (stp->sd_sigflags & signals) 1865 strsendsig(stp->sd_siglist, signals, band, 0); 1866 mutex_exit(&stp->sd_lock); 1867 1868 1869 done: 1870 if (nextbp == NULL) 1871 return (0); 1872 1873 /* 1874 * Any signals were handled the first time. 1875 * Wakeups and pollwakeups are redone to avoid any race 1876 * conditions - all the messages are not queued until the 1877 * last message has been processed by strrput. 1878 */ 1879 bp = nextbp; 1880 signals = firstmsgsigs = allmsgsigs = 0; 1881 mutex_enter(&stp->sd_lock); 1882 goto one_more; 1883 } 1884 1885 static void 1886 log_dupioc(queue_t *rq, mblk_t *bp) 1887 { 1888 queue_t *wq, *qp; 1889 char *modnames, *mnp, *dname; 1890 size_t maxmodstr; 1891 boolean_t islast; 1892 1893 /* 1894 * Allocate a buffer large enough to hold the names of nstrpush modules 1895 * and one driver, with spaces between and NUL terminator. If we can't 1896 * get memory, then we'll just log the driver name. 1897 */ 1898 maxmodstr = nstrpush * (FMNAMESZ + 1); 1899 mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP); 1900 1901 /* march down write side to print log message down to the driver */ 1902 wq = WR(rq); 1903 1904 /* make sure q_next doesn't shift around while we're grabbing data */ 1905 claimstr(wq); 1906 qp = wq->q_next; 1907 do { 1908 dname = Q2NAME(qp); 1909 islast = !SAMESTR(qp) || qp->q_next == NULL; 1910 if (modnames == NULL) { 1911 /* 1912 * If we don't have memory, then get the driver name in 1913 * the log where we can see it. Note that memory 1914 * pressure is a possible cause of these sorts of bugs. 1915 */ 1916 if (islast) { 1917 modnames = dname; 1918 maxmodstr = 0; 1919 } 1920 } else { 1921 mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname); 1922 if (!islast) 1923 *mnp++ = ' '; 1924 } 1925 qp = qp->q_next; 1926 } while (!islast); 1927 releasestr(wq); 1928 /* Cannot happen unless stream head is corrupt. */ 1929 ASSERT(modnames != NULL); 1930 (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1, 1931 SL_CONSOLE|SL_TRACE|SL_ERROR, 1932 "Warning: stream %p received duplicate %X M_IOC%s; module list: %s", 1933 rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd, 1934 (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames); 1935 if (maxmodstr != 0) 1936 kmem_free(modnames, maxmodstr); 1937 } 1938 1939 int 1940 strrput_nondata(queue_t *q, mblk_t *bp) 1941 { 1942 struct stdata *stp; 1943 struct iocblk *iocbp; 1944 struct stroptions *sop; 1945 struct copyreq *reqp; 1946 struct copyresp *resp; 1947 unsigned char bpri; 1948 unsigned char flushed_already = 0; 1949 1950 stp = (struct stdata *)q->q_ptr; 1951 1952 ASSERT(!(stp->sd_flag & STPLEX)); 1953 ASSERT(qclaimed(q)); 1954 1955 switch (bp->b_datap->db_type) { 1956 case M_ERROR: 1957 /* 1958 * An error has occurred downstream, the errno is in the first 1959 * bytes of the message. 1960 */ 1961 if ((bp->b_wptr - bp->b_rptr) == 2) { /* New flavor */ 1962 unsigned char rw = 0; 1963 1964 mutex_enter(&stp->sd_lock); 1965 if (*bp->b_rptr != NOERROR) { /* read error */ 1966 if (*bp->b_rptr != 0) { 1967 if (stp->sd_flag & STRDERR) 1968 flushed_already |= FLUSHR; 1969 stp->sd_flag |= STRDERR; 1970 rw |= FLUSHR; 1971 } else { 1972 stp->sd_flag &= ~STRDERR; 1973 } 1974 stp->sd_rerror = *bp->b_rptr; 1975 } 1976 bp->b_rptr++; 1977 if (*bp->b_rptr != NOERROR) { /* write error */ 1978 if (*bp->b_rptr != 0) { 1979 if (stp->sd_flag & STWRERR) 1980 flushed_already |= FLUSHW; 1981 stp->sd_flag |= STWRERR; 1982 rw |= FLUSHW; 1983 } else { 1984 stp->sd_flag &= ~STWRERR; 1985 } 1986 stp->sd_werror = *bp->b_rptr; 1987 } 1988 if (rw) { 1989 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE, 1990 "strrput cv_broadcast:q %p, bp %p", 1991 q, bp); 1992 cv_broadcast(&q->q_wait); /* readers */ 1993 cv_broadcast(&_WR(q)->q_wait); /* writers */ 1994 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 1995 1996 mutex_exit(&stp->sd_lock); 1997 pollwakeup(&stp->sd_pollist, POLLERR); 1998 mutex_enter(&stp->sd_lock); 1999 2000 if (stp->sd_sigflags & S_ERROR) 2001 strsendsig(stp->sd_siglist, S_ERROR, 0, 2002 ((rw & FLUSHR) ? stp->sd_rerror : 2003 stp->sd_werror)); 2004 mutex_exit(&stp->sd_lock); 2005 /* 2006 * Send the M_FLUSH only 2007 * for the first M_ERROR 2008 * message on the stream 2009 */ 2010 if (flushed_already == rw) { 2011 freemsg(bp); 2012 return (0); 2013 } 2014 2015 bp->b_datap->db_type = M_FLUSH; 2016 *bp->b_rptr = rw; 2017 bp->b_wptr = bp->b_rptr + 1; 2018 /* 2019 * Protect against the driver 2020 * passing up messages after 2021 * it has done a qprocsoff 2022 */ 2023 if (_OTHERQ(q)->q_next == NULL) 2024 freemsg(bp); 2025 else 2026 qreply(q, bp); 2027 return (0); 2028 } else 2029 mutex_exit(&stp->sd_lock); 2030 } else if (*bp->b_rptr != 0) { /* Old flavor */ 2031 if (stp->sd_flag & (STRDERR|STWRERR)) 2032 flushed_already = FLUSHRW; 2033 mutex_enter(&stp->sd_lock); 2034 stp->sd_flag |= (STRDERR|STWRERR); 2035 stp->sd_rerror = *bp->b_rptr; 2036 stp->sd_werror = *bp->b_rptr; 2037 TRACE_2(TR_FAC_STREAMS_FR, 2038 TR_STRRPUT_WAKE2, 2039 "strrput wakeup #2:q %p, bp %p", q, bp); 2040 cv_broadcast(&q->q_wait); /* the readers */ 2041 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2042 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 2043 2044 mutex_exit(&stp->sd_lock); 2045 pollwakeup(&stp->sd_pollist, POLLERR); 2046 mutex_enter(&stp->sd_lock); 2047 2048 if (stp->sd_sigflags & S_ERROR) 2049 strsendsig(stp->sd_siglist, S_ERROR, 0, 2050 (stp->sd_werror ? stp->sd_werror : 2051 stp->sd_rerror)); 2052 mutex_exit(&stp->sd_lock); 2053 2054 /* 2055 * Send the M_FLUSH only 2056 * for the first M_ERROR 2057 * message on the stream 2058 */ 2059 if (flushed_already != FLUSHRW) { 2060 bp->b_datap->db_type = M_FLUSH; 2061 *bp->b_rptr = FLUSHRW; 2062 /* 2063 * Protect against the driver passing up 2064 * messages after it has done a 2065 * qprocsoff. 2066 */ 2067 if (_OTHERQ(q)->q_next == NULL) 2068 freemsg(bp); 2069 else 2070 qreply(q, bp); 2071 return (0); 2072 } 2073 } 2074 freemsg(bp); 2075 return (0); 2076 2077 case M_HANGUP: 2078 2079 freemsg(bp); 2080 mutex_enter(&stp->sd_lock); 2081 stp->sd_werror = ENXIO; 2082 stp->sd_flag |= STRHUP; 2083 stp->sd_flag &= ~(WSLEEP|RSLEEP); 2084 2085 /* 2086 * send signal if controlling tty 2087 */ 2088 2089 if (stp->sd_sidp) { 2090 prsignal(stp->sd_sidp, SIGHUP); 2091 if (stp->sd_sidp != stp->sd_pgidp) 2092 pgsignal(stp->sd_pgidp, SIGTSTP); 2093 } 2094 2095 /* 2096 * wake up read, write, and exception pollers and 2097 * reset wakeup mechanism. 2098 */ 2099 cv_broadcast(&q->q_wait); /* the readers */ 2100 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2101 cv_broadcast(&stp->sd_monitor); /* the ioctllers */ 2102 strhup(stp); 2103 mutex_exit(&stp->sd_lock); 2104 return (0); 2105 2106 case M_UNHANGUP: 2107 freemsg(bp); 2108 mutex_enter(&stp->sd_lock); 2109 stp->sd_werror = 0; 2110 stp->sd_flag &= ~STRHUP; 2111 mutex_exit(&stp->sd_lock); 2112 return (0); 2113 2114 case M_SIG: 2115 /* 2116 * Someone downstream wants to post a signal. The 2117 * signal to post is contained in the first byte of the 2118 * message. If the message would go on the front of 2119 * the queue, send a signal to the process group 2120 * (if not SIGPOLL) or to the siglist processes 2121 * (SIGPOLL). If something is already on the queue, 2122 * OR if we are delivering a delayed suspend (*sigh* 2123 * another "tty" hack) and there's no one sleeping already, 2124 * just enqueue the message. 2125 */ 2126 mutex_enter(&stp->sd_lock); 2127 if (q->q_first || (*bp->b_rptr == SIGTSTP && 2128 !(stp->sd_flag & RSLEEP))) { 2129 (void) putq(q, bp); 2130 mutex_exit(&stp->sd_lock); 2131 return (0); 2132 } 2133 mutex_exit(&stp->sd_lock); 2134 /* FALLTHRU */ 2135 2136 case M_PCSIG: 2137 /* 2138 * Don't enqueue, just post the signal. 2139 */ 2140 strsignal(stp, *bp->b_rptr, 0L); 2141 freemsg(bp); 2142 return (0); 2143 2144 case M_CMD: 2145 if (MBLKL(bp) != sizeof (cmdblk_t)) { 2146 freemsg(bp); 2147 return (0); 2148 } 2149 2150 mutex_enter(&stp->sd_lock); 2151 if (stp->sd_flag & STRCMDWAIT) { 2152 ASSERT(stp->sd_cmdblk == NULL); 2153 stp->sd_cmdblk = bp; 2154 cv_broadcast(&stp->sd_monitor); 2155 mutex_exit(&stp->sd_lock); 2156 } else { 2157 mutex_exit(&stp->sd_lock); 2158 freemsg(bp); 2159 } 2160 return (0); 2161 2162 case M_FLUSH: 2163 /* 2164 * Flush queues. The indication of which queues to flush 2165 * is in the first byte of the message. If the read queue 2166 * is specified, then flush it. If FLUSHBAND is set, just 2167 * flush the band specified by the second byte of the message. 2168 * 2169 * If a module has issued a M_SETOPT to not flush hi 2170 * priority messages off of the stream head, then pass this 2171 * flag into the flushq code to preserve such messages. 2172 */ 2173 2174 if (*bp->b_rptr & FLUSHR) { 2175 mutex_enter(&stp->sd_lock); 2176 if (*bp->b_rptr & FLUSHBAND) { 2177 ASSERT((bp->b_wptr - bp->b_rptr) >= 2); 2178 flushband(q, *(bp->b_rptr + 1), FLUSHALL); 2179 } else 2180 flushq_common(q, FLUSHALL, 2181 stp->sd_read_opt & RFLUSHPCPROT); 2182 if ((q->q_first == NULL) || 2183 (q->q_first->b_datap->db_type < QPCTL)) 2184 stp->sd_flag &= ~STRPRI; 2185 else { 2186 ASSERT(stp->sd_flag & STRPRI); 2187 } 2188 mutex_exit(&stp->sd_lock); 2189 } 2190 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 2191 *bp->b_rptr &= ~FLUSHR; 2192 bp->b_flag |= MSGNOLOOP; 2193 /* 2194 * Protect against the driver passing up 2195 * messages after it has done a qprocsoff. 2196 */ 2197 if (_OTHERQ(q)->q_next == NULL) 2198 freemsg(bp); 2199 else 2200 qreply(q, bp); 2201 return (0); 2202 } 2203 freemsg(bp); 2204 return (0); 2205 2206 case M_IOCACK: 2207 case M_IOCNAK: 2208 iocbp = (struct iocblk *)bp->b_rptr; 2209 /* 2210 * If not waiting for ACK or NAK then just free msg. 2211 * If incorrect id sequence number then just free msg. 2212 * If already have ACK or NAK for user then this is a 2213 * duplicate, display a warning and free the msg. 2214 */ 2215 mutex_enter(&stp->sd_lock); 2216 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2217 (stp->sd_iocid != iocbp->ioc_id)) { 2218 /* 2219 * If the ACK/NAK is a dup, display a message 2220 * Dup is when sd_iocid == ioc_id, and 2221 * sd_iocblk == <valid ptr> or -1 (the former 2222 * is when an ioctl has been put on the stream 2223 * head, but has not yet been consumed, the 2224 * later is when it has been consumed). 2225 */ 2226 if ((stp->sd_iocid == iocbp->ioc_id) && 2227 (stp->sd_iocblk != NULL)) { 2228 log_dupioc(q, bp); 2229 } 2230 freemsg(bp); 2231 mutex_exit(&stp->sd_lock); 2232 return (0); 2233 } 2234 2235 /* 2236 * Assign ACK or NAK to user and wake up. 2237 */ 2238 stp->sd_iocblk = bp; 2239 cv_broadcast(&stp->sd_monitor); 2240 mutex_exit(&stp->sd_lock); 2241 return (0); 2242 2243 case M_COPYIN: 2244 case M_COPYOUT: 2245 reqp = (struct copyreq *)bp->b_rptr; 2246 2247 /* 2248 * If not waiting for ACK or NAK then just fail request. 2249 * If already have ACK, NAK, or copy request, then just 2250 * fail request. 2251 * If incorrect id sequence number then just fail request. 2252 */ 2253 mutex_enter(&stp->sd_lock); 2254 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2255 (stp->sd_iocid != reqp->cq_id)) { 2256 if (bp->b_cont) { 2257 freemsg(bp->b_cont); 2258 bp->b_cont = NULL; 2259 } 2260 bp->b_datap->db_type = M_IOCDATA; 2261 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 2262 resp = (struct copyresp *)bp->b_rptr; 2263 resp->cp_rval = (caddr_t)1; /* failure */ 2264 mutex_exit(&stp->sd_lock); 2265 putnext(stp->sd_wrq, bp); 2266 return (0); 2267 } 2268 2269 /* 2270 * Assign copy request to user and wake up. 2271 */ 2272 stp->sd_iocblk = bp; 2273 cv_broadcast(&stp->sd_monitor); 2274 mutex_exit(&stp->sd_lock); 2275 return (0); 2276 2277 case M_SETOPTS: 2278 /* 2279 * Set stream head options (read option, write offset, 2280 * min/max packet size, and/or high/low water marks for 2281 * the read side only). 2282 */ 2283 2284 bpri = 0; 2285 sop = (struct stroptions *)bp->b_rptr; 2286 mutex_enter(&stp->sd_lock); 2287 if (sop->so_flags & SO_READOPT) { 2288 switch (sop->so_readopt & RMODEMASK) { 2289 case RNORM: 2290 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 2291 break; 2292 2293 case RMSGD: 2294 stp->sd_read_opt = 2295 ((stp->sd_read_opt & ~RD_MSGNODIS) | 2296 RD_MSGDIS); 2297 break; 2298 2299 case RMSGN: 2300 stp->sd_read_opt = 2301 ((stp->sd_read_opt & ~RD_MSGDIS) | 2302 RD_MSGNODIS); 2303 break; 2304 } 2305 switch (sop->so_readopt & RPROTMASK) { 2306 case RPROTNORM: 2307 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 2308 break; 2309 2310 case RPROTDAT: 2311 stp->sd_read_opt = 2312 ((stp->sd_read_opt & ~RD_PROTDIS) | 2313 RD_PROTDAT); 2314 break; 2315 2316 case RPROTDIS: 2317 stp->sd_read_opt = 2318 ((stp->sd_read_opt & ~RD_PROTDAT) | 2319 RD_PROTDIS); 2320 break; 2321 } 2322 switch (sop->so_readopt & RFLUSHMASK) { 2323 case RFLUSHPCPROT: 2324 /* 2325 * This sets the stream head to NOT flush 2326 * M_PCPROTO messages. 2327 */ 2328 stp->sd_read_opt |= RFLUSHPCPROT; 2329 break; 2330 } 2331 } 2332 if (sop->so_flags & SO_ERROPT) { 2333 switch (sop->so_erropt & RERRMASK) { 2334 case RERRNORM: 2335 stp->sd_flag &= ~STRDERRNONPERSIST; 2336 break; 2337 case RERRNONPERSIST: 2338 stp->sd_flag |= STRDERRNONPERSIST; 2339 break; 2340 } 2341 switch (sop->so_erropt & WERRMASK) { 2342 case WERRNORM: 2343 stp->sd_flag &= ~STWRERRNONPERSIST; 2344 break; 2345 case WERRNONPERSIST: 2346 stp->sd_flag |= STWRERRNONPERSIST; 2347 break; 2348 } 2349 } 2350 if (sop->so_flags & SO_COPYOPT) { 2351 if (sop->so_copyopt & ZCVMSAFE) { 2352 stp->sd_copyflag |= STZCVMSAFE; 2353 stp->sd_copyflag &= ~STZCVMUNSAFE; 2354 } else if (sop->so_copyopt & ZCVMUNSAFE) { 2355 stp->sd_copyflag |= STZCVMUNSAFE; 2356 stp->sd_copyflag &= ~STZCVMSAFE; 2357 } 2358 2359 if (sop->so_copyopt & COPYCACHED) { 2360 stp->sd_copyflag |= STRCOPYCACHED; 2361 } 2362 } 2363 if (sop->so_flags & SO_WROFF) 2364 stp->sd_wroff = sop->so_wroff; 2365 if (sop->so_flags & SO_TAIL) 2366 stp->sd_tail = sop->so_tail; 2367 if (sop->so_flags & SO_MINPSZ) 2368 q->q_minpsz = sop->so_minpsz; 2369 if (sop->so_flags & SO_MAXPSZ) 2370 q->q_maxpsz = sop->so_maxpsz; 2371 if (sop->so_flags & SO_MAXBLK) 2372 stp->sd_maxblk = sop->so_maxblk; 2373 if (sop->so_flags & SO_HIWAT) { 2374 if (sop->so_flags & SO_BAND) { 2375 if (strqset(q, QHIWAT, 2376 sop->so_band, sop->so_hiwat)) { 2377 cmn_err(CE_WARN, "strrput: could not " 2378 "allocate qband\n"); 2379 } else { 2380 bpri = sop->so_band; 2381 } 2382 } else { 2383 q->q_hiwat = sop->so_hiwat; 2384 } 2385 } 2386 if (sop->so_flags & SO_LOWAT) { 2387 if (sop->so_flags & SO_BAND) { 2388 if (strqset(q, QLOWAT, 2389 sop->so_band, sop->so_lowat)) { 2390 cmn_err(CE_WARN, "strrput: could not " 2391 "allocate qband\n"); 2392 } else { 2393 bpri = sop->so_band; 2394 } 2395 } else { 2396 q->q_lowat = sop->so_lowat; 2397 } 2398 } 2399 if (sop->so_flags & SO_MREADON) 2400 stp->sd_flag |= SNDMREAD; 2401 if (sop->so_flags & SO_MREADOFF) 2402 stp->sd_flag &= ~SNDMREAD; 2403 if (sop->so_flags & SO_NDELON) 2404 stp->sd_flag |= OLDNDELAY; 2405 if (sop->so_flags & SO_NDELOFF) 2406 stp->sd_flag &= ~OLDNDELAY; 2407 if (sop->so_flags & SO_ISTTY) 2408 stp->sd_flag |= STRISTTY; 2409 if (sop->so_flags & SO_ISNTTY) 2410 stp->sd_flag &= ~STRISTTY; 2411 if (sop->so_flags & SO_TOSTOP) 2412 stp->sd_flag |= STRTOSTOP; 2413 if (sop->so_flags & SO_TONSTOP) 2414 stp->sd_flag &= ~STRTOSTOP; 2415 if (sop->so_flags & SO_DELIM) 2416 stp->sd_flag |= STRDELIM; 2417 if (sop->so_flags & SO_NODELIM) 2418 stp->sd_flag &= ~STRDELIM; 2419 2420 mutex_exit(&stp->sd_lock); 2421 freemsg(bp); 2422 2423 /* Check backenable in case the water marks changed */ 2424 qbackenable(q, bpri); 2425 return (0); 2426 2427 /* 2428 * The following set of cases deal with situations where two stream 2429 * heads are connected to each other (twisted streams). These messages 2430 * have no meaning at the stream head. 2431 */ 2432 case M_BREAK: 2433 case M_CTL: 2434 case M_DELAY: 2435 case M_START: 2436 case M_STOP: 2437 case M_IOCDATA: 2438 case M_STARTI: 2439 case M_STOPI: 2440 freemsg(bp); 2441 return (0); 2442 2443 case M_IOCTL: 2444 /* 2445 * Always NAK this condition 2446 * (makes no sense) 2447 * If there is one or more threads in the read side 2448 * rwnext we have to defer the nacking until that thread 2449 * returns (in strget). 2450 */ 2451 mutex_enter(&stp->sd_lock); 2452 if (stp->sd_struiodnak != 0) { 2453 /* 2454 * Defer NAK to the streamhead. Queue at the end 2455 * the list. 2456 */ 2457 mblk_t *mp = stp->sd_struionak; 2458 2459 while (mp && mp->b_next) 2460 mp = mp->b_next; 2461 if (mp) 2462 mp->b_next = bp; 2463 else 2464 stp->sd_struionak = bp; 2465 bp->b_next = NULL; 2466 mutex_exit(&stp->sd_lock); 2467 return (0); 2468 } 2469 mutex_exit(&stp->sd_lock); 2470 2471 bp->b_datap->db_type = M_IOCNAK; 2472 /* 2473 * Protect against the driver passing up 2474 * messages after it has done a qprocsoff. 2475 */ 2476 if (_OTHERQ(q)->q_next == NULL) 2477 freemsg(bp); 2478 else 2479 qreply(q, bp); 2480 return (0); 2481 2482 default: 2483 #ifdef DEBUG 2484 cmn_err(CE_WARN, 2485 "bad message type %x received at stream head\n", 2486 bp->b_datap->db_type); 2487 #endif 2488 freemsg(bp); 2489 return (0); 2490 } 2491 2492 /* NOTREACHED */ 2493 } 2494 2495 /* 2496 * Check if the stream pointed to by `stp' can be written to, and return an 2497 * error code if not. If `eiohup' is set, then return EIO if STRHUP is set. 2498 * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream, 2499 * then always return EPIPE and send a SIGPIPE to the invoking thread. 2500 */ 2501 static int 2502 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok) 2503 { 2504 int error; 2505 2506 ASSERT(MUTEX_HELD(&stp->sd_lock)); 2507 2508 /* 2509 * For modem support, POSIX states that on writes, EIO should 2510 * be returned if the stream has been hung up. 2511 */ 2512 if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP) 2513 error = EIO; 2514 else 2515 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0); 2516 2517 if (error != 0) { 2518 if (!(stp->sd_flag & STPLEX) && 2519 (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) { 2520 tsignal(curthread, SIGPIPE); 2521 error = EPIPE; 2522 } 2523 } 2524 2525 return (error); 2526 } 2527 2528 /* 2529 * Copyin and send data down a stream. 2530 * The caller will allocate and copyin any control part that precedes the 2531 * message and pass that in as mctl. 2532 * 2533 * Caller should *not* hold sd_lock. 2534 * When EWOULDBLOCK is returned the caller has to redo the canputnext 2535 * under sd_lock in order to avoid missing a backenabling wakeup. 2536 * 2537 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA. 2538 * 2539 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages. 2540 * For sync streams we can only ignore flow control by reverting to using 2541 * putnext. 2542 * 2543 * If sd_maxblk is less than *iosize this routine might return without 2544 * transferring all of *iosize. In all cases, on return *iosize will contain 2545 * the amount of data that was transferred. 2546 */ 2547 static int 2548 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize, 2549 int b_flag, int pri, int flags) 2550 { 2551 struiod_t uiod; 2552 mblk_t *mp; 2553 queue_t *wqp = stp->sd_wrq; 2554 int error = 0; 2555 ssize_t count = *iosize; 2556 2557 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 2558 2559 if (uiop != NULL && count >= 0) 2560 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0; 2561 2562 if (!(flags & STRUIO_POSTPONE)) { 2563 /* 2564 * Use regular canputnext, strmakedata, putnext sequence. 2565 */ 2566 if (pri == 0) { 2567 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2568 freemsg(mctl); 2569 return (EWOULDBLOCK); 2570 } 2571 } else { 2572 if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) { 2573 freemsg(mctl); 2574 return (EWOULDBLOCK); 2575 } 2576 } 2577 2578 if ((error = strmakedata(iosize, uiop, stp, flags, 2579 &mp)) != 0) { 2580 freemsg(mctl); 2581 /* 2582 * need to change return code to ENOMEM 2583 * so that this is not confused with 2584 * flow control, EAGAIN. 2585 */ 2586 2587 if (error == EAGAIN) 2588 return (ENOMEM); 2589 else 2590 return (error); 2591 } 2592 if (mctl != NULL) { 2593 if (mctl->b_cont == NULL) 2594 mctl->b_cont = mp; 2595 else if (mp != NULL) 2596 linkb(mctl, mp); 2597 mp = mctl; 2598 } else if (mp == NULL) 2599 return (0); 2600 2601 mp->b_flag |= b_flag; 2602 mp->b_band = (uchar_t)pri; 2603 2604 if (flags & MSG_IGNFLOW) { 2605 /* 2606 * XXX Hack: Don't get stuck running service 2607 * procedures. This is needed for sockfs when 2608 * sending the unbind message out of the rput 2609 * procedure - we don't want a put procedure 2610 * to run service procedures. 2611 */ 2612 putnext(wqp, mp); 2613 } else { 2614 stream_willservice(stp); 2615 putnext(wqp, mp); 2616 stream_runservice(stp); 2617 } 2618 return (0); 2619 } 2620 /* 2621 * Stream supports rwnext() for the write side. 2622 */ 2623 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) { 2624 freemsg(mctl); 2625 /* 2626 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled". 2627 */ 2628 return (error == EAGAIN ? ENOMEM : error); 2629 } 2630 if (mctl != NULL) { 2631 if (mctl->b_cont == NULL) 2632 mctl->b_cont = mp; 2633 else if (mp != NULL) 2634 linkb(mctl, mp); 2635 mp = mctl; 2636 } else if (mp == NULL) { 2637 return (0); 2638 } 2639 2640 mp->b_flag |= b_flag; 2641 mp->b_band = (uchar_t)pri; 2642 2643 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 2644 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 2645 uiod.d_uio.uio_offset = 0; 2646 uiod.d_mp = mp; 2647 error = rwnext(wqp, &uiod); 2648 if (! uiod.d_mp) { 2649 uioskip(uiop, *iosize); 2650 return (error); 2651 } 2652 ASSERT(mp == uiod.d_mp); 2653 if (error == EINVAL) { 2654 /* 2655 * The stream plumbing must have changed while 2656 * we were away, so just turn off rwnext()s. 2657 */ 2658 error = 0; 2659 } else if (error == EBUSY || error == EWOULDBLOCK) { 2660 /* 2661 * Couldn't enter a perimeter or took a page fault, 2662 * so fall-back to putnext(). 2663 */ 2664 error = 0; 2665 } else { 2666 freemsg(mp); 2667 return (error); 2668 } 2669 /* Have to check canput before consuming data from the uio */ 2670 if (pri == 0) { 2671 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2672 freemsg(mp); 2673 return (EWOULDBLOCK); 2674 } 2675 } else { 2676 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) { 2677 freemsg(mp); 2678 return (EWOULDBLOCK); 2679 } 2680 } 2681 ASSERT(mp == uiod.d_mp); 2682 /* Copyin data from the uio */ 2683 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) { 2684 freemsg(mp); 2685 return (error); 2686 } 2687 uioskip(uiop, *iosize); 2688 if (flags & MSG_IGNFLOW) { 2689 /* 2690 * XXX Hack: Don't get stuck running service procedures. 2691 * This is needed for sockfs when sending the unbind message 2692 * out of the rput procedure - we don't want a put procedure 2693 * to run service procedures. 2694 */ 2695 putnext(wqp, mp); 2696 } else { 2697 stream_willservice(stp); 2698 putnext(wqp, mp); 2699 stream_runservice(stp); 2700 } 2701 return (0); 2702 } 2703 2704 /* 2705 * Write attempts to break the write request into messages conforming 2706 * with the minimum and maximum packet sizes set downstream. 2707 * 2708 * Write will not block if downstream queue is full and 2709 * O_NDELAY is set, otherwise it will block waiting for the queue to get room. 2710 * 2711 * A write of zero bytes gets packaged into a zero length message and sent 2712 * downstream like any other message. 2713 * 2714 * If buffers of the requested sizes are not available, the write will 2715 * sleep until the buffers become available. 2716 * 2717 * Write (if specified) will supply a write offset in a message if it 2718 * makes sense. This can be specified by downstream modules as part of 2719 * a M_SETOPTS message. Write will not supply the write offset if it 2720 * cannot supply any data in a buffer. In other words, write will never 2721 * send down an empty packet due to a write offset. 2722 */ 2723 /* ARGSUSED2 */ 2724 int 2725 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp) 2726 { 2727 return (strwrite_common(vp, uiop, crp, 0)); 2728 } 2729 2730 /* ARGSUSED2 */ 2731 int 2732 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) 2733 { 2734 struct stdata *stp; 2735 struct queue *wqp; 2736 ssize_t rmin, rmax; 2737 ssize_t iosize; 2738 int waitflag; 2739 int tempmode; 2740 int error = 0; 2741 int b_flag; 2742 2743 ASSERT(vp->v_stream); 2744 stp = vp->v_stream; 2745 2746 mutex_enter(&stp->sd_lock); 2747 2748 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2749 mutex_exit(&stp->sd_lock); 2750 return (error); 2751 } 2752 2753 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 2754 error = strwriteable(stp, B_TRUE, B_TRUE); 2755 if (error != 0) { 2756 mutex_exit(&stp->sd_lock); 2757 return (error); 2758 } 2759 } 2760 2761 mutex_exit(&stp->sd_lock); 2762 2763 wqp = stp->sd_wrq; 2764 2765 /* get these values from them cached in the stream head */ 2766 rmin = stp->sd_qn_minpsz; 2767 rmax = stp->sd_qn_maxpsz; 2768 2769 /* 2770 * Check the min/max packet size constraints. If min packet size 2771 * is non-zero, the write cannot be split into multiple messages 2772 * and still guarantee the size constraints. 2773 */ 2774 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp); 2775 2776 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 2777 if (rmax == 0) { 2778 return (0); 2779 } 2780 if (rmin > 0) { 2781 if (uiop->uio_resid < rmin) { 2782 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2783 "strwrite out:q %p out %d error %d", 2784 wqp, 0, ERANGE); 2785 return (ERANGE); 2786 } 2787 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) { 2788 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2789 "strwrite out:q %p out %d error %d", 2790 wqp, 1, ERANGE); 2791 return (ERANGE); 2792 } 2793 } 2794 2795 /* 2796 * Do until count satisfied or error. 2797 */ 2798 waitflag = WRITEWAIT | wflag; 2799 if (stp->sd_flag & OLDNDELAY) 2800 tempmode = uiop->uio_fmode & ~FNDELAY; 2801 else 2802 tempmode = uiop->uio_fmode; 2803 2804 if (rmax == INFPSZ) 2805 rmax = uiop->uio_resid; 2806 2807 /* 2808 * Note that tempmode does not get used in strput/strmakedata 2809 * but only in strwaitq. The other routines use uio_fmode 2810 * unmodified. 2811 */ 2812 2813 /* LINTED: constant in conditional context */ 2814 while (1) { /* breaks when uio_resid reaches zero */ 2815 /* 2816 * Determine the size of the next message to be 2817 * packaged. May have to break write into several 2818 * messages based on max packet size. 2819 */ 2820 iosize = MIN(uiop->uio_resid, rmax); 2821 2822 /* 2823 * Put block downstream when flow control allows it. 2824 */ 2825 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize)) 2826 b_flag = MSGDELIM; 2827 else 2828 b_flag = 0; 2829 2830 for (;;) { 2831 int done = 0; 2832 2833 error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0); 2834 if (error == 0) 2835 break; 2836 if (error != EWOULDBLOCK) 2837 goto out; 2838 2839 mutex_enter(&stp->sd_lock); 2840 /* 2841 * Check for a missed wakeup. 2842 * Needed since strput did not hold sd_lock across 2843 * the canputnext. 2844 */ 2845 if (canputnext(wqp)) { 2846 /* Try again */ 2847 mutex_exit(&stp->sd_lock); 2848 continue; 2849 } 2850 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT, 2851 "strwrite wait:q %p wait", wqp); 2852 if ((error = strwaitq(stp, waitflag, (ssize_t)0, 2853 tempmode, -1, &done)) != 0 || done) { 2854 mutex_exit(&stp->sd_lock); 2855 if ((vp->v_type == VFIFO) && 2856 (uiop->uio_fmode & FNDELAY) && 2857 (error == EAGAIN)) 2858 error = 0; 2859 goto out; 2860 } 2861 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE, 2862 "strwrite wake:q %p awakes", wqp); 2863 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2864 mutex_exit(&stp->sd_lock); 2865 goto out; 2866 } 2867 mutex_exit(&stp->sd_lock); 2868 } 2869 waitflag |= NOINTR; 2870 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID, 2871 "strwrite resid:q %p uiop %p", wqp, uiop); 2872 if (uiop->uio_resid) { 2873 /* Recheck for errors - needed for sockets */ 2874 if ((stp->sd_wput_opt & SW_RECHECK_ERR) && 2875 (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 2876 mutex_enter(&stp->sd_lock); 2877 error = strwriteable(stp, B_FALSE, B_TRUE); 2878 mutex_exit(&stp->sd_lock); 2879 if (error != 0) 2880 return (error); 2881 } 2882 continue; 2883 } 2884 break; 2885 } 2886 out: 2887 /* 2888 * For historical reasons, applications expect EAGAIN when a data 2889 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN. 2890 */ 2891 if (error == ENOMEM) 2892 error = EAGAIN; 2893 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2894 "strwrite out:q %p out %d error %d", wqp, 2, error); 2895 return (error); 2896 } 2897 2898 /* 2899 * Stream head write service routine. 2900 * Its job is to wake up any sleeping writers when a queue 2901 * downstream needs data (part of the flow control in putq and getq). 2902 * It also must wake anyone sleeping on a poll(). 2903 * For stream head right below mux module, it must also invoke put procedure 2904 * of next downstream module. 2905 */ 2906 int 2907 strwsrv(queue_t *q) 2908 { 2909 struct stdata *stp; 2910 queue_t *tq; 2911 qband_t *qbp; 2912 int i; 2913 qband_t *myqbp; 2914 int isevent; 2915 unsigned char qbf[NBAND]; /* band flushing backenable flags */ 2916 2917 TRACE_1(TR_FAC_STREAMS_FR, 2918 TR_STRWSRV, "strwsrv:q %p", q); 2919 stp = (struct stdata *)q->q_ptr; 2920 ASSERT(qclaimed(q)); 2921 mutex_enter(&stp->sd_lock); 2922 ASSERT(!(stp->sd_flag & STPLEX)); 2923 2924 if (stp->sd_flag & WSLEEP) { 2925 stp->sd_flag &= ~WSLEEP; 2926 cv_broadcast(&q->q_wait); 2927 } 2928 mutex_exit(&stp->sd_lock); 2929 2930 /* The other end of a stream pipe went away. */ 2931 if ((tq = q->q_next) == NULL) { 2932 return (0); 2933 } 2934 2935 /* Find the next module forward that has a service procedure */ 2936 claimstr(q); 2937 tq = q->q_nfsrv; 2938 ASSERT(tq != NULL); 2939 2940 if ((q->q_flag & QBACK)) { 2941 if ((tq->q_flag & QFULL)) { 2942 mutex_enter(QLOCK(tq)); 2943 if (!(tq->q_flag & QFULL)) { 2944 mutex_exit(QLOCK(tq)); 2945 goto wakeup; 2946 } 2947 /* 2948 * The queue must have become full again. Set QWANTW 2949 * again so strwsrv will be back enabled when 2950 * the queue becomes non-full next time. 2951 */ 2952 tq->q_flag |= QWANTW; 2953 mutex_exit(QLOCK(tq)); 2954 } else { 2955 wakeup: 2956 pollwakeup(&stp->sd_pollist, POLLWRNORM); 2957 mutex_enter(&stp->sd_lock); 2958 if (stp->sd_sigflags & S_WRNORM) 2959 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 2960 mutex_exit(&stp->sd_lock); 2961 } 2962 } 2963 2964 isevent = 0; 2965 i = 1; 2966 bzero((caddr_t)qbf, NBAND); 2967 mutex_enter(QLOCK(tq)); 2968 if ((myqbp = q->q_bandp) != NULL) 2969 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) { 2970 ASSERT(myqbp); 2971 if ((myqbp->qb_flag & QB_BACK)) { 2972 if (qbp->qb_flag & QB_FULL) { 2973 /* 2974 * The band must have become full again. 2975 * Set QB_WANTW again so strwsrv will 2976 * be back enabled when the band becomes 2977 * non-full next time. 2978 */ 2979 qbp->qb_flag |= QB_WANTW; 2980 } else { 2981 isevent = 1; 2982 qbf[i] = 1; 2983 } 2984 } 2985 myqbp = myqbp->qb_next; 2986 i++; 2987 } 2988 mutex_exit(QLOCK(tq)); 2989 2990 if (isevent) { 2991 for (i = tq->q_nband; i; i--) { 2992 if (qbf[i]) { 2993 pollwakeup(&stp->sd_pollist, POLLWRBAND); 2994 mutex_enter(&stp->sd_lock); 2995 if (stp->sd_sigflags & S_WRBAND) 2996 strsendsig(stp->sd_siglist, S_WRBAND, 2997 (uchar_t)i, 0); 2998 mutex_exit(&stp->sd_lock); 2999 } 3000 } 3001 } 3002 3003 releasestr(q); 3004 return (0); 3005 } 3006 3007 /* 3008 * Special case of strcopyin/strcopyout for copying 3009 * struct strioctl that can deal with both data 3010 * models. 3011 */ 3012 3013 #ifdef _LP64 3014 3015 static int 3016 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3017 { 3018 struct strioctl32 strioc32; 3019 struct strioctl *striocp; 3020 3021 if (copyflag & U_TO_K) { 3022 ASSERT((copyflag & K_TO_K) == 0); 3023 3024 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3025 if (copyin(from, &strioc32, sizeof (strioc32))) 3026 return (EFAULT); 3027 3028 striocp = (struct strioctl *)to; 3029 striocp->ic_cmd = strioc32.ic_cmd; 3030 striocp->ic_timout = strioc32.ic_timout; 3031 striocp->ic_len = strioc32.ic_len; 3032 striocp->ic_dp = (char *)(uintptr_t)strioc32.ic_dp; 3033 3034 } else { /* NATIVE data model */ 3035 if (copyin(from, to, sizeof (struct strioctl))) { 3036 return (EFAULT); 3037 } else { 3038 return (0); 3039 } 3040 } 3041 } else { 3042 ASSERT(copyflag & K_TO_K); 3043 bcopy(from, to, sizeof (struct strioctl)); 3044 } 3045 return (0); 3046 } 3047 3048 static int 3049 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3050 { 3051 struct strioctl32 strioc32; 3052 struct strioctl *striocp; 3053 3054 if (copyflag & U_TO_K) { 3055 ASSERT((copyflag & K_TO_K) == 0); 3056 3057 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3058 striocp = (struct strioctl *)from; 3059 strioc32.ic_cmd = striocp->ic_cmd; 3060 strioc32.ic_timout = striocp->ic_timout; 3061 strioc32.ic_len = striocp->ic_len; 3062 strioc32.ic_dp = (caddr32_t)(uintptr_t)striocp->ic_dp; 3063 ASSERT((char *)(uintptr_t)strioc32.ic_dp == 3064 striocp->ic_dp); 3065 3066 if (copyout(&strioc32, to, sizeof (strioc32))) 3067 return (EFAULT); 3068 3069 } else { /* NATIVE data model */ 3070 if (copyout(from, to, sizeof (struct strioctl))) { 3071 return (EFAULT); 3072 } else { 3073 return (0); 3074 } 3075 } 3076 } else { 3077 ASSERT(copyflag & K_TO_K); 3078 bcopy(from, to, sizeof (struct strioctl)); 3079 } 3080 return (0); 3081 } 3082 3083 #else /* ! _LP64 */ 3084 3085 /* ARGSUSED2 */ 3086 static int 3087 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3088 { 3089 return (strcopyin(from, to, sizeof (struct strioctl), copyflag)); 3090 } 3091 3092 /* ARGSUSED2 */ 3093 static int 3094 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3095 { 3096 return (strcopyout(from, to, sizeof (struct strioctl), copyflag)); 3097 } 3098 3099 #endif /* _LP64 */ 3100 3101 /* 3102 * Determine type of job control semantics expected by user. The 3103 * possibilities are: 3104 * JCREAD - Behaves like read() on fd; send SIGTTIN 3105 * JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set 3106 * JCSETP - Sets a value in the stream; send SIGTTOU, ignore TOSTOP 3107 * JCGETP - Gets a value in the stream; no signals. 3108 * See straccess in strsubr.c for usage of these values. 3109 * 3110 * This routine also returns -1 for I_STR as a special case; the 3111 * caller must call again with the real ioctl number for 3112 * classification. 3113 */ 3114 static int 3115 job_control_type(int cmd) 3116 { 3117 switch (cmd) { 3118 case I_STR: 3119 return (-1); 3120 3121 case I_RECVFD: 3122 case I_E_RECVFD: 3123 return (JCREAD); 3124 3125 case I_FDINSERT: 3126 case I_SENDFD: 3127 return (JCWRITE); 3128 3129 case TCSETA: 3130 case TCSETAW: 3131 case TCSETAF: 3132 case TCSBRK: 3133 case TCXONC: 3134 case TCFLSH: 3135 case TCDSET: /* Obsolete */ 3136 case TIOCSWINSZ: 3137 case TCSETS: 3138 case TCSETSW: 3139 case TCSETSF: 3140 case TIOCSETD: 3141 case TIOCHPCL: 3142 case TIOCSETP: 3143 case TIOCSETN: 3144 case TIOCEXCL: 3145 case TIOCNXCL: 3146 case TIOCFLUSH: 3147 case TIOCSETC: 3148 case TIOCLBIS: 3149 case TIOCLBIC: 3150 case TIOCLSET: 3151 case TIOCSBRK: 3152 case TIOCCBRK: 3153 case TIOCSDTR: 3154 case TIOCCDTR: 3155 case TIOCSLTC: 3156 case TIOCSTOP: 3157 case TIOCSTART: 3158 case TIOCSTI: 3159 case TIOCSPGRP: 3160 case TIOCMSET: 3161 case TIOCMBIS: 3162 case TIOCMBIC: 3163 case TIOCREMOTE: 3164 case TIOCSIGNAL: 3165 case LDSETT: 3166 case LDSMAP: /* Obsolete */ 3167 case DIOCSETP: 3168 case I_FLUSH: 3169 case I_SRDOPT: 3170 case I_SETSIG: 3171 case I_SWROPT: 3172 case I_FLUSHBAND: 3173 case I_SETCLTIME: 3174 case I_SERROPT: 3175 case I_ESETSIG: 3176 case FIONBIO: 3177 case FIOASYNC: 3178 case FIOSETOWN: 3179 case JBOOT: /* Obsolete */ 3180 case JTERM: /* Obsolete */ 3181 case JTIMOM: /* Obsolete */ 3182 case JZOMBOOT: /* Obsolete */ 3183 case JAGENT: /* Obsolete */ 3184 case JTRUN: /* Obsolete */ 3185 case JXTPROTO: /* Obsolete */ 3186 case TIOCSETLD: 3187 return (JCSETP); 3188 } 3189 3190 return (JCGETP); 3191 } 3192 3193 /* 3194 * ioctl for streams 3195 */ 3196 int 3197 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, 3198 cred_t *crp, int *rvalp) 3199 { 3200 struct stdata *stp; 3201 struct strcmd *scp; 3202 struct strioctl strioc; 3203 struct uio uio; 3204 struct iovec iov; 3205 int access; 3206 mblk_t *mp; 3207 int error = 0; 3208 int done = 0; 3209 ssize_t rmin, rmax; 3210 queue_t *wrq; 3211 queue_t *rdq; 3212 boolean_t kioctl = B_FALSE; 3213 uint32_t auditing = AU_AUDITING(); 3214 3215 if (flag & FKIOCTL) { 3216 copyflag = K_TO_K; 3217 kioctl = B_TRUE; 3218 } 3219 ASSERT(vp->v_stream); 3220 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 3221 stp = vp->v_stream; 3222 3223 TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER, 3224 "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg); 3225 3226 if (auditing) 3227 audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp); 3228 3229 /* 3230 * If the copy is kernel to kernel, make sure that the FNATIVE 3231 * flag is set. After this it would be a serious error to have 3232 * no model flag. 3233 */ 3234 if (copyflag == K_TO_K) 3235 flag = (flag & ~FMODELS) | FNATIVE; 3236 3237 ASSERT((flag & FMODELS) != 0); 3238 3239 wrq = stp->sd_wrq; 3240 rdq = _RD(wrq); 3241 3242 access = job_control_type(cmd); 3243 3244 /* We should never see these here, should be handled by iwscn */ 3245 if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR) 3246 return (EINVAL); 3247 3248 mutex_enter(&stp->sd_lock); 3249 if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) { 3250 mutex_exit(&stp->sd_lock); 3251 return (error); 3252 } 3253 mutex_exit(&stp->sd_lock); 3254 3255 /* 3256 * Check for sgttyb-related ioctls first, and complain as 3257 * necessary. 3258 */ 3259 switch (cmd) { 3260 case TIOCGETP: 3261 case TIOCSETP: 3262 case TIOCSETN: 3263 if (sgttyb_handling >= 2 && !sgttyb_complaint) { 3264 sgttyb_complaint = B_TRUE; 3265 cmn_err(CE_NOTE, 3266 "application used obsolete TIOC[GS]ET"); 3267 } 3268 if (sgttyb_handling >= 3) { 3269 tsignal(curthread, SIGSYS); 3270 return (EIO); 3271 } 3272 break; 3273 } 3274 3275 mutex_enter(&stp->sd_lock); 3276 3277 switch (cmd) { 3278 case I_RECVFD: 3279 case I_E_RECVFD: 3280 case I_PEEK: 3281 case I_NREAD: 3282 case FIONREAD: 3283 case FIORDCHK: 3284 case I_ATMARK: 3285 case FIONBIO: 3286 case FIOASYNC: 3287 if (stp->sd_flag & (STRDERR|STPLEX)) { 3288 error = strgeterr(stp, STRDERR|STPLEX, 0); 3289 if (error != 0) { 3290 mutex_exit(&stp->sd_lock); 3291 return (error); 3292 } 3293 } 3294 break; 3295 3296 default: 3297 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) { 3298 error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0); 3299 if (error != 0) { 3300 mutex_exit(&stp->sd_lock); 3301 return (error); 3302 } 3303 } 3304 } 3305 3306 mutex_exit(&stp->sd_lock); 3307 3308 switch (cmd) { 3309 default: 3310 /* 3311 * The stream head has hardcoded knowledge of a 3312 * miscellaneous collection of terminal-, keyboard- and 3313 * mouse-related ioctls, enumerated below. This hardcoded 3314 * knowledge allows the stream head to automatically 3315 * convert transparent ioctl requests made by userland 3316 * programs into I_STR ioctls which many old STREAMS 3317 * modules and drivers require. 3318 * 3319 * No new ioctls should ever be added to this list. 3320 * Instead, the STREAMS module or driver should be written 3321 * to either handle transparent ioctls or require any 3322 * userland programs to use I_STR ioctls (by returning 3323 * EINVAL to any transparent ioctl requests). 3324 * 3325 * More importantly, removing ioctls from this list should 3326 * be done with the utmost care, since our STREAMS modules 3327 * and drivers *count* on the stream head performing this 3328 * conversion, and thus may panic while processing 3329 * transparent ioctl request for one of these ioctls (keep 3330 * in mind that third party modules and drivers may have 3331 * similar problems). 3332 */ 3333 if (((cmd & IOCTYPE) == LDIOC) || 3334 ((cmd & IOCTYPE) == tIOC) || 3335 ((cmd & IOCTYPE) == TIOC) || 3336 ((cmd & IOCTYPE) == KIOC) || 3337 ((cmd & IOCTYPE) == MSIOC) || 3338 ((cmd & IOCTYPE) == VUIOC)) { 3339 /* 3340 * The ioctl is a tty ioctl - set up strioc buffer 3341 * and call strdoioctl() to do the work. 3342 */ 3343 if (stp->sd_flag & STRHUP) 3344 return (ENXIO); 3345 strioc.ic_cmd = cmd; 3346 strioc.ic_timout = INFTIM; 3347 3348 switch (cmd) { 3349 3350 case TCXONC: 3351 case TCSBRK: 3352 case TCFLSH: 3353 case TCDSET: 3354 { 3355 int native_arg = (int)arg; 3356 strioc.ic_len = sizeof (int); 3357 strioc.ic_dp = (char *)&native_arg; 3358 return (strdoioctl(stp, &strioc, flag, 3359 K_TO_K, crp, rvalp)); 3360 } 3361 3362 case TCSETA: 3363 case TCSETAW: 3364 case TCSETAF: 3365 strioc.ic_len = sizeof (struct termio); 3366 strioc.ic_dp = (char *)arg; 3367 return (strdoioctl(stp, &strioc, flag, 3368 copyflag, crp, rvalp)); 3369 3370 case TCSETS: 3371 case TCSETSW: 3372 case TCSETSF: 3373 strioc.ic_len = sizeof (struct termios); 3374 strioc.ic_dp = (char *)arg; 3375 return (strdoioctl(stp, &strioc, flag, 3376 copyflag, crp, rvalp)); 3377 3378 case LDSETT: 3379 strioc.ic_len = sizeof (struct termcb); 3380 strioc.ic_dp = (char *)arg; 3381 return (strdoioctl(stp, &strioc, flag, 3382 copyflag, crp, rvalp)); 3383 3384 case TIOCSETP: 3385 strioc.ic_len = sizeof (struct sgttyb); 3386 strioc.ic_dp = (char *)arg; 3387 return (strdoioctl(stp, &strioc, flag, 3388 copyflag, crp, rvalp)); 3389 3390 case TIOCSTI: 3391 if ((flag & FREAD) == 0 && 3392 secpolicy_sti(crp) != 0) { 3393 return (EPERM); 3394 } 3395 mutex_enter(&stp->sd_lock); 3396 mutex_enter(&curproc->p_splock); 3397 if (stp->sd_sidp != curproc->p_sessp->s_sidp && 3398 secpolicy_sti(crp) != 0) { 3399 mutex_exit(&curproc->p_splock); 3400 mutex_exit(&stp->sd_lock); 3401 return (EACCES); 3402 } 3403 mutex_exit(&curproc->p_splock); 3404 mutex_exit(&stp->sd_lock); 3405 3406 strioc.ic_len = sizeof (char); 3407 strioc.ic_dp = (char *)arg; 3408 return (strdoioctl(stp, &strioc, flag, 3409 copyflag, crp, rvalp)); 3410 3411 case TIOCSWINSZ: 3412 strioc.ic_len = sizeof (struct winsize); 3413 strioc.ic_dp = (char *)arg; 3414 return (strdoioctl(stp, &strioc, flag, 3415 copyflag, crp, rvalp)); 3416 3417 case TIOCSSIZE: 3418 strioc.ic_len = sizeof (struct ttysize); 3419 strioc.ic_dp = (char *)arg; 3420 return (strdoioctl(stp, &strioc, flag, 3421 copyflag, crp, rvalp)); 3422 3423 case TIOCSSOFTCAR: 3424 case KIOCTRANS: 3425 case KIOCTRANSABLE: 3426 case KIOCCMD: 3427 case KIOCSDIRECT: 3428 case KIOCSCOMPAT: 3429 case KIOCSKABORTEN: 3430 case KIOCSRPTDELAY: 3431 case KIOCSRPTRATE: 3432 case VUIDSFORMAT: 3433 case TIOCSPPS: 3434 strioc.ic_len = sizeof (int); 3435 strioc.ic_dp = (char *)arg; 3436 return (strdoioctl(stp, &strioc, flag, 3437 copyflag, crp, rvalp)); 3438 3439 case KIOCSETKEY: 3440 case KIOCGETKEY: 3441 strioc.ic_len = sizeof (struct kiockey); 3442 strioc.ic_dp = (char *)arg; 3443 return (strdoioctl(stp, &strioc, flag, 3444 copyflag, crp, rvalp)); 3445 3446 case KIOCSKEY: 3447 case KIOCGKEY: 3448 strioc.ic_len = sizeof (struct kiockeymap); 3449 strioc.ic_dp = (char *)arg; 3450 return (strdoioctl(stp, &strioc, flag, 3451 copyflag, crp, rvalp)); 3452 3453 case KIOCSLED: 3454 /* arg is a pointer to char */ 3455 strioc.ic_len = sizeof (char); 3456 strioc.ic_dp = (char *)arg; 3457 return (strdoioctl(stp, &strioc, flag, 3458 copyflag, crp, rvalp)); 3459 3460 case MSIOSETPARMS: 3461 strioc.ic_len = sizeof (Ms_parms); 3462 strioc.ic_dp = (char *)arg; 3463 return (strdoioctl(stp, &strioc, flag, 3464 copyflag, crp, rvalp)); 3465 3466 case VUIDSADDR: 3467 case VUIDGADDR: 3468 strioc.ic_len = sizeof (struct vuid_addr_probe); 3469 strioc.ic_dp = (char *)arg; 3470 return (strdoioctl(stp, &strioc, flag, 3471 copyflag, crp, rvalp)); 3472 3473 /* 3474 * These M_IOCTL's don't require any data to be sent 3475 * downstream, and the driver will allocate and link 3476 * on its own mblk_t upon M_IOCACK -- thus we set 3477 * ic_len to zero and set ic_dp to arg so we know 3478 * where to copyout to later. 3479 */ 3480 case TIOCGSOFTCAR: 3481 case TIOCGWINSZ: 3482 case TIOCGSIZE: 3483 case KIOCGTRANS: 3484 case KIOCGTRANSABLE: 3485 case KIOCTYPE: 3486 case KIOCGDIRECT: 3487 case KIOCGCOMPAT: 3488 case KIOCLAYOUT: 3489 case KIOCGLED: 3490 case MSIOGETPARMS: 3491 case MSIOBUTTONS: 3492 case VUIDGFORMAT: 3493 case TIOCGPPS: 3494 case TIOCGPPSEV: 3495 case TCGETA: 3496 case TCGETS: 3497 case LDGETT: 3498 case TIOCGETP: 3499 case KIOCGRPTDELAY: 3500 case KIOCGRPTRATE: 3501 strioc.ic_len = 0; 3502 strioc.ic_dp = (char *)arg; 3503 return (strdoioctl(stp, &strioc, flag, 3504 copyflag, crp, rvalp)); 3505 } 3506 } 3507 3508 /* 3509 * Unknown cmd - send it down as a transparent ioctl. 3510 */ 3511 strioc.ic_cmd = cmd; 3512 strioc.ic_timout = INFTIM; 3513 strioc.ic_len = TRANSPARENT; 3514 strioc.ic_dp = (char *)&arg; 3515 3516 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp)); 3517 3518 case I_STR: 3519 /* 3520 * Stream ioctl. Read in an strioctl buffer from the user 3521 * along with any data specified and send it downstream. 3522 * Strdoioctl will wait allow only one ioctl message at 3523 * a time, and waits for the acknowledgement. 3524 */ 3525 3526 if (stp->sd_flag & STRHUP) 3527 return (ENXIO); 3528 3529 error = strcopyin_strioctl((void *)arg, &strioc, flag, 3530 copyflag); 3531 if (error != 0) 3532 return (error); 3533 3534 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1)) 3535 return (EINVAL); 3536 3537 access = job_control_type(strioc.ic_cmd); 3538 mutex_enter(&stp->sd_lock); 3539 if ((access != -1) && 3540 ((error = i_straccess(stp, access)) != 0)) { 3541 mutex_exit(&stp->sd_lock); 3542 return (error); 3543 } 3544 mutex_exit(&stp->sd_lock); 3545 3546 /* 3547 * The I_STR facility provides a trap door for malicious 3548 * code to send down bogus streamio(7I) ioctl commands to 3549 * unsuspecting STREAMS modules and drivers which expect to 3550 * only get these messages from the stream head. 3551 * Explicitly prohibit any streamio ioctls which can be 3552 * passed downstream by the stream head. Note that we do 3553 * not block all streamio ioctls because the ioctl 3554 * numberspace is not well managed and thus it's possible 3555 * that a module or driver's ioctl numbers may accidentally 3556 * collide with them. 3557 */ 3558 switch (strioc.ic_cmd) { 3559 case I_LINK: 3560 case I_PLINK: 3561 case I_UNLINK: 3562 case I_PUNLINK: 3563 case _I_GETPEERCRED: 3564 case _I_PLINK_LH: 3565 return (EINVAL); 3566 } 3567 3568 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp); 3569 if (error == 0) { 3570 error = strcopyout_strioctl(&strioc, (void *)arg, 3571 flag, copyflag); 3572 } 3573 return (error); 3574 3575 case _I_CMD: 3576 /* 3577 * Like I_STR, but without using M_IOC* messages and without 3578 * copyins/copyouts beyond the passed-in argument. 3579 */ 3580 if (stp->sd_flag & STRHUP) 3581 return (ENXIO); 3582 3583 if ((scp = kmem_alloc(sizeof (strcmd_t), KM_NOSLEEP)) == NULL) 3584 return (ENOMEM); 3585 3586 if (copyin((void *)arg, scp, sizeof (strcmd_t))) { 3587 kmem_free(scp, sizeof (strcmd_t)); 3588 return (EFAULT); 3589 } 3590 3591 access = job_control_type(scp->sc_cmd); 3592 mutex_enter(&stp->sd_lock); 3593 if (access != -1 && (error = i_straccess(stp, access)) != 0) { 3594 mutex_exit(&stp->sd_lock); 3595 kmem_free(scp, sizeof (strcmd_t)); 3596 return (error); 3597 } 3598 mutex_exit(&stp->sd_lock); 3599 3600 *rvalp = 0; 3601 if ((error = strdocmd(stp, scp, crp)) == 0) { 3602 if (copyout(scp, (void *)arg, sizeof (strcmd_t))) 3603 error = EFAULT; 3604 } 3605 kmem_free(scp, sizeof (strcmd_t)); 3606 return (error); 3607 3608 case I_NREAD: 3609 /* 3610 * Return number of bytes of data in first message 3611 * in queue in "arg" and return the number of messages 3612 * in queue in return value. 3613 */ 3614 { 3615 size_t size; 3616 int retval; 3617 int count = 0; 3618 3619 mutex_enter(QLOCK(rdq)); 3620 3621 size = msgdsize(rdq->q_first); 3622 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3623 count++; 3624 3625 mutex_exit(QLOCK(rdq)); 3626 if (stp->sd_struiordq) { 3627 infod_t infod; 3628 3629 infod.d_cmd = INFOD_COUNT; 3630 infod.d_count = 0; 3631 if (count == 0) { 3632 infod.d_cmd |= INFOD_FIRSTBYTES; 3633 infod.d_bytes = 0; 3634 } 3635 infod.d_res = 0; 3636 (void) infonext(rdq, &infod); 3637 count += infod.d_count; 3638 if (infod.d_res & INFOD_FIRSTBYTES) 3639 size = infod.d_bytes; 3640 } 3641 3642 /* 3643 * Drop down from size_t to the "int" required by the 3644 * interface. Cap at INT_MAX. 3645 */ 3646 retval = MIN(size, INT_MAX); 3647 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3648 copyflag); 3649 if (!error) 3650 *rvalp = count; 3651 return (error); 3652 } 3653 3654 case FIONREAD: 3655 /* 3656 * Return number of bytes of data in all data messages 3657 * in queue in "arg". 3658 */ 3659 { 3660 size_t size = 0; 3661 int retval; 3662 3663 mutex_enter(QLOCK(rdq)); 3664 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3665 size += msgdsize(mp); 3666 mutex_exit(QLOCK(rdq)); 3667 3668 if (stp->sd_struiordq) { 3669 infod_t infod; 3670 3671 infod.d_cmd = INFOD_BYTES; 3672 infod.d_res = 0; 3673 infod.d_bytes = 0; 3674 (void) infonext(rdq, &infod); 3675 size += infod.d_bytes; 3676 } 3677 3678 /* 3679 * Drop down from size_t to the "int" required by the 3680 * interface. Cap at INT_MAX. 3681 */ 3682 retval = MIN(size, INT_MAX); 3683 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3684 copyflag); 3685 3686 *rvalp = 0; 3687 return (error); 3688 } 3689 case FIORDCHK: 3690 /* 3691 * FIORDCHK does not use arg value (like FIONREAD), 3692 * instead a count is returned. I_NREAD value may 3693 * not be accurate but safe. The real thing to do is 3694 * to add the msgdsizes of all data messages until 3695 * a non-data message. 3696 */ 3697 { 3698 size_t size = 0; 3699 3700 mutex_enter(QLOCK(rdq)); 3701 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3702 size += msgdsize(mp); 3703 mutex_exit(QLOCK(rdq)); 3704 3705 if (stp->sd_struiordq) { 3706 infod_t infod; 3707 3708 infod.d_cmd = INFOD_BYTES; 3709 infod.d_res = 0; 3710 infod.d_bytes = 0; 3711 (void) infonext(rdq, &infod); 3712 size += infod.d_bytes; 3713 } 3714 3715 /* 3716 * Since ioctl returns an int, and memory sizes under 3717 * LP64 may not fit, we return INT_MAX if the count was 3718 * actually greater. 3719 */ 3720 *rvalp = MIN(size, INT_MAX); 3721 return (0); 3722 } 3723 3724 case I_FIND: 3725 /* 3726 * Get module name. 3727 */ 3728 { 3729 char mname[FMNAMESZ + 1]; 3730 queue_t *q; 3731 3732 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3733 mname, FMNAMESZ + 1, NULL); 3734 if (error) 3735 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3736 3737 /* 3738 * Return EINVAL if we're handed a bogus module name. 3739 */ 3740 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) { 3741 TRACE_0(TR_FAC_STREAMS_FR, 3742 TR_I_CANT_FIND, "couldn't I_FIND"); 3743 return (EINVAL); 3744 } 3745 3746 *rvalp = 0; 3747 3748 /* Look downstream to see if module is there. */ 3749 claimstr(stp->sd_wrq); 3750 for (q = stp->sd_wrq->q_next; q; q = q->q_next) { 3751 if (q->q_flag & QREADR) { 3752 q = NULL; 3753 break; 3754 } 3755 if (strcmp(mname, Q2NAME(q)) == 0) 3756 break; 3757 } 3758 releasestr(stp->sd_wrq); 3759 3760 *rvalp = (q ? 1 : 0); 3761 return (error); 3762 } 3763 3764 case I_PUSH: 3765 case __I_PUSH_NOCTTY: 3766 /* 3767 * Push a module. 3768 * For the case __I_PUSH_NOCTTY push a module but 3769 * do not allocate controlling tty. See bugid 4025044 3770 */ 3771 3772 { 3773 char mname[FMNAMESZ + 1]; 3774 fmodsw_impl_t *fp; 3775 dev_t dummydev; 3776 3777 if (stp->sd_flag & STRHUP) 3778 return (ENXIO); 3779 3780 /* 3781 * Get module name and look up in fmodsw. 3782 */ 3783 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3784 mname, FMNAMESZ + 1, NULL); 3785 if (error) 3786 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3787 3788 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) == 3789 NULL) 3790 return (EINVAL); 3791 3792 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH, 3793 "I_PUSH:fp %p stp %p", fp, stp); 3794 3795 if (error = strstartplumb(stp, flag, cmd)) { 3796 fmodsw_rele(fp); 3797 return (error); 3798 } 3799 3800 /* 3801 * See if any more modules can be pushed on this stream. 3802 * Note that this check must be done after strstartplumb() 3803 * since otherwise multiple threads issuing I_PUSHes on 3804 * the same stream will be able to exceed nstrpush. 3805 */ 3806 mutex_enter(&stp->sd_lock); 3807 if (stp->sd_pushcnt >= nstrpush) { 3808 fmodsw_rele(fp); 3809 strendplumb(stp); 3810 mutex_exit(&stp->sd_lock); 3811 return (EINVAL); 3812 } 3813 mutex_exit(&stp->sd_lock); 3814 3815 /* 3816 * Push new module and call its open routine 3817 * via qattach(). Modules don't change device 3818 * numbers, so just ignore dummydev here. 3819 */ 3820 dummydev = vp->v_rdev; 3821 if ((error = qattach(rdq, &dummydev, 0, crp, fp, 3822 B_FALSE)) == 0) { 3823 if (vp->v_type == VCHR && /* sorry, no pipes allowed */ 3824 (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) { 3825 /* 3826 * try to allocate it as a controlling terminal 3827 */ 3828 (void) strctty(stp); 3829 } 3830 } 3831 3832 mutex_enter(&stp->sd_lock); 3833 3834 /* 3835 * As a performance concern we are caching the values of 3836 * q_minpsz and q_maxpsz of the module below the stream 3837 * head in the stream head. 3838 */ 3839 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 3840 rmin = stp->sd_wrq->q_next->q_minpsz; 3841 rmax = stp->sd_wrq->q_next->q_maxpsz; 3842 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 3843 3844 /* Do this processing here as a performance concern */ 3845 if (strmsgsz != 0) { 3846 if (rmax == INFPSZ) 3847 rmax = strmsgsz; 3848 else { 3849 if (vp->v_type == VFIFO) 3850 rmax = MIN(PIPE_BUF, rmax); 3851 else rmax = MIN(strmsgsz, rmax); 3852 } 3853 } 3854 3855 mutex_enter(QLOCK(wrq)); 3856 stp->sd_qn_minpsz = rmin; 3857 stp->sd_qn_maxpsz = rmax; 3858 mutex_exit(QLOCK(wrq)); 3859 3860 strendplumb(stp); 3861 mutex_exit(&stp->sd_lock); 3862 return (error); 3863 } 3864 3865 case I_POP: 3866 { 3867 queue_t *q; 3868 3869 if (stp->sd_flag & STRHUP) 3870 return (ENXIO); 3871 if (!wrq->q_next) /* for broken pipes */ 3872 return (EINVAL); 3873 3874 if (error = strstartplumb(stp, flag, cmd)) 3875 return (error); 3876 3877 /* 3878 * If there is an anchor on this stream and popping 3879 * the current module would attempt to pop through the 3880 * anchor, then disallow the pop unless we have sufficient 3881 * privileges; take the cheapest (non-locking) check 3882 * first. 3883 */ 3884 if (secpolicy_ip_config(crp, B_TRUE) != 0 || 3885 (stp->sd_anchorzone != crgetzoneid(crp))) { 3886 mutex_enter(&stp->sd_lock); 3887 /* 3888 * Anchors only apply if there's at least one 3889 * module on the stream (sd_pushcnt > 0). 3890 */ 3891 if (stp->sd_pushcnt > 0 && 3892 stp->sd_pushcnt == stp->sd_anchor && 3893 stp->sd_vnode->v_type != VFIFO) { 3894 strendplumb(stp); 3895 mutex_exit(&stp->sd_lock); 3896 if (stp->sd_anchorzone != crgetzoneid(crp)) 3897 return (EINVAL); 3898 /* Audit and report error */ 3899 return (secpolicy_ip_config(crp, B_FALSE)); 3900 } 3901 mutex_exit(&stp->sd_lock); 3902 } 3903 3904 q = wrq->q_next; 3905 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP, 3906 "I_POP:%p from %p", q, stp); 3907 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) { 3908 error = EINVAL; 3909 } else { 3910 qdetach(_RD(q), 1, flag, crp, B_FALSE); 3911 error = 0; 3912 } 3913 mutex_enter(&stp->sd_lock); 3914 3915 /* 3916 * As a performance concern we are caching the values of 3917 * q_minpsz and q_maxpsz of the module below the stream 3918 * head in the stream head. 3919 */ 3920 mutex_enter(QLOCK(wrq->q_next)); 3921 rmin = wrq->q_next->q_minpsz; 3922 rmax = wrq->q_next->q_maxpsz; 3923 mutex_exit(QLOCK(wrq->q_next)); 3924 3925 /* Do this processing here as a performance concern */ 3926 if (strmsgsz != 0) { 3927 if (rmax == INFPSZ) 3928 rmax = strmsgsz; 3929 else { 3930 if (vp->v_type == VFIFO) 3931 rmax = MIN(PIPE_BUF, rmax); 3932 else rmax = MIN(strmsgsz, rmax); 3933 } 3934 } 3935 3936 mutex_enter(QLOCK(wrq)); 3937 stp->sd_qn_minpsz = rmin; 3938 stp->sd_qn_maxpsz = rmax; 3939 mutex_exit(QLOCK(wrq)); 3940 3941 /* If we popped through the anchor, then reset the anchor. */ 3942 if (stp->sd_pushcnt < stp->sd_anchor) { 3943 stp->sd_anchor = 0; 3944 stp->sd_anchorzone = 0; 3945 } 3946 strendplumb(stp); 3947 mutex_exit(&stp->sd_lock); 3948 return (error); 3949 } 3950 3951 case _I_MUXID2FD: 3952 { 3953 /* 3954 * Create a fd for a I_PLINK'ed lower stream with a given 3955 * muxid. With the fd, application can send down ioctls, 3956 * like I_LIST, to the previously I_PLINK'ed stream. Note 3957 * that after getting the fd, the application has to do an 3958 * I_PUNLINK on the muxid before it can do any operation 3959 * on the lower stream. This is required by spec1170. 3960 * 3961 * The fd used to do this ioctl should point to the same 3962 * controlling device used to do the I_PLINK. If it uses 3963 * a different stream or an invalid muxid, I_MUXID2FD will 3964 * fail. The error code is set to EINVAL. 3965 * 3966 * The intended use of this interface is the following. 3967 * An application I_PLINK'ed a stream and exits. The fd 3968 * to the lower stream is gone. Another application 3969 * wants to get a fd to the lower stream, it uses I_MUXID2FD. 3970 */ 3971 int muxid = (int)arg; 3972 int fd; 3973 linkinfo_t *linkp; 3974 struct file *fp; 3975 netstack_t *ns; 3976 str_stack_t *ss; 3977 3978 /* 3979 * Do not allow the wildcard muxid. This ioctl is not 3980 * intended to find arbitrary link. 3981 */ 3982 if (muxid == 0) { 3983 return (EINVAL); 3984 } 3985 3986 ns = netstack_find_by_cred(crp); 3987 ASSERT(ns != NULL); 3988 ss = ns->netstack_str; 3989 ASSERT(ss != NULL); 3990 3991 mutex_enter(&muxifier); 3992 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss); 3993 if (linkp == NULL) { 3994 mutex_exit(&muxifier); 3995 netstack_rele(ss->ss_netstack); 3996 return (EINVAL); 3997 } 3998 3999 if ((fd = ufalloc(0)) == -1) { 4000 mutex_exit(&muxifier); 4001 netstack_rele(ss->ss_netstack); 4002 return (EMFILE); 4003 } 4004 fp = linkp->li_fpdown; 4005 mutex_enter(&fp->f_tlock); 4006 fp->f_count++; 4007 mutex_exit(&fp->f_tlock); 4008 mutex_exit(&muxifier); 4009 setf(fd, fp); 4010 *rvalp = fd; 4011 netstack_rele(ss->ss_netstack); 4012 return (0); 4013 } 4014 4015 case _I_INSERT: 4016 { 4017 /* 4018 * To insert a module to a given position in a stream. 4019 * In the first release, only allow privileged user 4020 * to use this ioctl. Furthermore, the insert is only allowed 4021 * below an anchor if the zoneid is the same as the zoneid 4022 * which created the anchor. 4023 * 4024 * Note that we do not plan to support this ioctl 4025 * on pipes in the first release. We want to learn more 4026 * about the implications of these ioctls before extending 4027 * their support. And we do not think these features are 4028 * valuable for pipes. 4029 */ 4030 STRUCT_DECL(strmodconf, strmodinsert); 4031 char mod_name[FMNAMESZ + 1]; 4032 fmodsw_impl_t *fp; 4033 dev_t dummydev; 4034 queue_t *tmp_wrq; 4035 int pos; 4036 boolean_t is_insert; 4037 4038 STRUCT_INIT(strmodinsert, flag); 4039 if (stp->sd_flag & STRHUP) 4040 return (ENXIO); 4041 if (STRMATED(stp)) 4042 return (EINVAL); 4043 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4044 return (error); 4045 if (stp->sd_anchor != 0 && 4046 stp->sd_anchorzone != crgetzoneid(crp)) 4047 return (EINVAL); 4048 4049 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert), 4050 STRUCT_SIZE(strmodinsert), copyflag); 4051 if (error) 4052 return (error); 4053 4054 /* 4055 * Get module name and look up in fmodsw. 4056 */ 4057 error = (copyflag & U_TO_K ? copyinstr : 4058 copystr)(STRUCT_FGETP(strmodinsert, mod_name), 4059 mod_name, FMNAMESZ + 1, NULL); 4060 if (error) 4061 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4062 4063 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) == 4064 NULL) 4065 return (EINVAL); 4066 4067 if (error = strstartplumb(stp, flag, cmd)) { 4068 fmodsw_rele(fp); 4069 return (error); 4070 } 4071 4072 /* 4073 * Is this _I_INSERT just like an I_PUSH? We need to know 4074 * this because we do some optimizations if this is a 4075 * module being pushed. 4076 */ 4077 pos = STRUCT_FGET(strmodinsert, pos); 4078 is_insert = (pos != 0); 4079 4080 /* 4081 * Make sure pos is valid. Even though it is not an I_PUSH, 4082 * we impose the same limit on the number of modules in a 4083 * stream. 4084 */ 4085 mutex_enter(&stp->sd_lock); 4086 if (stp->sd_pushcnt >= nstrpush || pos < 0 || 4087 pos > stp->sd_pushcnt) { 4088 fmodsw_rele(fp); 4089 strendplumb(stp); 4090 mutex_exit(&stp->sd_lock); 4091 return (EINVAL); 4092 } 4093 if (stp->sd_anchor != 0) { 4094 /* 4095 * Is this insert below the anchor? 4096 * Pushcnt hasn't been increased yet hence 4097 * we test for greater than here, and greater or 4098 * equal after qattach. 4099 */ 4100 if (pos > (stp->sd_pushcnt - stp->sd_anchor) && 4101 stp->sd_anchorzone != crgetzoneid(crp)) { 4102 fmodsw_rele(fp); 4103 strendplumb(stp); 4104 mutex_exit(&stp->sd_lock); 4105 return (EPERM); 4106 } 4107 } 4108 4109 mutex_exit(&stp->sd_lock); 4110 4111 /* 4112 * First find the correct position this module to 4113 * be inserted. We don't need to call claimstr() 4114 * as the stream should not be changing at this point. 4115 * 4116 * Insert new module and call its open routine 4117 * via qattach(). Modules don't change device 4118 * numbers, so just ignore dummydev here. 4119 */ 4120 for (tmp_wrq = stp->sd_wrq; pos > 0; 4121 tmp_wrq = tmp_wrq->q_next, pos--) { 4122 ASSERT(SAMESTR(tmp_wrq)); 4123 } 4124 dummydev = vp->v_rdev; 4125 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp, 4126 fp, is_insert)) != 0) { 4127 mutex_enter(&stp->sd_lock); 4128 strendplumb(stp); 4129 mutex_exit(&stp->sd_lock); 4130 return (error); 4131 } 4132 4133 mutex_enter(&stp->sd_lock); 4134 4135 /* 4136 * As a performance concern we are caching the values of 4137 * q_minpsz and q_maxpsz of the module below the stream 4138 * head in the stream head. 4139 */ 4140 if (!is_insert) { 4141 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 4142 rmin = stp->sd_wrq->q_next->q_minpsz; 4143 rmax = stp->sd_wrq->q_next->q_maxpsz; 4144 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 4145 4146 /* Do this processing here as a performance concern */ 4147 if (strmsgsz != 0) { 4148 if (rmax == INFPSZ) { 4149 rmax = strmsgsz; 4150 } else { 4151 rmax = MIN(strmsgsz, rmax); 4152 } 4153 } 4154 4155 mutex_enter(QLOCK(wrq)); 4156 stp->sd_qn_minpsz = rmin; 4157 stp->sd_qn_maxpsz = rmax; 4158 mutex_exit(QLOCK(wrq)); 4159 } 4160 4161 /* 4162 * Need to update the anchor value if this module is 4163 * inserted below the anchor point. 4164 */ 4165 if (stp->sd_anchor != 0) { 4166 pos = STRUCT_FGET(strmodinsert, pos); 4167 if (pos >= (stp->sd_pushcnt - stp->sd_anchor)) 4168 stp->sd_anchor++; 4169 } 4170 4171 strendplumb(stp); 4172 mutex_exit(&stp->sd_lock); 4173 return (0); 4174 } 4175 4176 case _I_REMOVE: 4177 { 4178 /* 4179 * To remove a module with a given name in a stream. The 4180 * caller of this ioctl needs to provide both the name and 4181 * the position of the module to be removed. This eliminates 4182 * the ambiguity of removal if a module is inserted/pushed 4183 * multiple times in a stream. In the first release, only 4184 * allow privileged user to use this ioctl. 4185 * Furthermore, the remove is only allowed 4186 * below an anchor if the zoneid is the same as the zoneid 4187 * which created the anchor. 4188 * 4189 * Note that we do not plan to support this ioctl 4190 * on pipes in the first release. We want to learn more 4191 * about the implications of these ioctls before extending 4192 * their support. And we do not think these features are 4193 * valuable for pipes. 4194 * 4195 * Also note that _I_REMOVE cannot be used to remove a 4196 * driver or the stream head. 4197 */ 4198 STRUCT_DECL(strmodconf, strmodremove); 4199 queue_t *q; 4200 int pos; 4201 char mod_name[FMNAMESZ + 1]; 4202 boolean_t is_remove; 4203 4204 STRUCT_INIT(strmodremove, flag); 4205 if (stp->sd_flag & STRHUP) 4206 return (ENXIO); 4207 if (STRMATED(stp)) 4208 return (EINVAL); 4209 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4210 return (error); 4211 if (stp->sd_anchor != 0 && 4212 stp->sd_anchorzone != crgetzoneid(crp)) 4213 return (EINVAL); 4214 4215 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove), 4216 STRUCT_SIZE(strmodremove), copyflag); 4217 if (error) 4218 return (error); 4219 4220 error = (copyflag & U_TO_K ? copyinstr : 4221 copystr)(STRUCT_FGETP(strmodremove, mod_name), 4222 mod_name, FMNAMESZ + 1, NULL); 4223 if (error) 4224 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4225 4226 if ((error = strstartplumb(stp, flag, cmd)) != 0) 4227 return (error); 4228 4229 /* 4230 * Match the name of given module to the name of module at 4231 * the given position. 4232 */ 4233 pos = STRUCT_FGET(strmodremove, pos); 4234 4235 is_remove = (pos != 0); 4236 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0; 4237 q = q->q_next, pos--) 4238 ; 4239 if (pos > 0 || !SAMESTR(q) || 4240 strcmp(Q2NAME(q), mod_name) != 0) { 4241 mutex_enter(&stp->sd_lock); 4242 strendplumb(stp); 4243 mutex_exit(&stp->sd_lock); 4244 return (EINVAL); 4245 } 4246 4247 /* 4248 * If the position is at or below an anchor, then the zoneid 4249 * must match the zoneid that created the anchor. 4250 */ 4251 if (stp->sd_anchor != 0) { 4252 pos = STRUCT_FGET(strmodremove, pos); 4253 if (pos >= (stp->sd_pushcnt - stp->sd_anchor) && 4254 stp->sd_anchorzone != crgetzoneid(crp)) { 4255 mutex_enter(&stp->sd_lock); 4256 strendplumb(stp); 4257 mutex_exit(&stp->sd_lock); 4258 return (EPERM); 4259 } 4260 } 4261 4262 4263 ASSERT(!(q->q_flag & QREADR)); 4264 qdetach(_RD(q), 1, flag, crp, is_remove); 4265 4266 mutex_enter(&stp->sd_lock); 4267 4268 /* 4269 * As a performance concern we are caching the values of 4270 * q_minpsz and q_maxpsz of the module below the stream 4271 * head in the stream head. 4272 */ 4273 if (!is_remove) { 4274 mutex_enter(QLOCK(wrq->q_next)); 4275 rmin = wrq->q_next->q_minpsz; 4276 rmax = wrq->q_next->q_maxpsz; 4277 mutex_exit(QLOCK(wrq->q_next)); 4278 4279 /* Do this processing here as a performance concern */ 4280 if (strmsgsz != 0) { 4281 if (rmax == INFPSZ) 4282 rmax = strmsgsz; 4283 else { 4284 if (vp->v_type == VFIFO) 4285 rmax = MIN(PIPE_BUF, rmax); 4286 else rmax = MIN(strmsgsz, rmax); 4287 } 4288 } 4289 4290 mutex_enter(QLOCK(wrq)); 4291 stp->sd_qn_minpsz = rmin; 4292 stp->sd_qn_maxpsz = rmax; 4293 mutex_exit(QLOCK(wrq)); 4294 } 4295 4296 /* 4297 * Need to update the anchor value if this module is removed 4298 * at or below the anchor point. If the removed module is at 4299 * the anchor point, remove the anchor for this stream if 4300 * there is no module above the anchor point. Otherwise, if 4301 * the removed module is below the anchor point, decrement the 4302 * anchor point by 1. 4303 */ 4304 if (stp->sd_anchor != 0) { 4305 pos = STRUCT_FGET(strmodremove, pos); 4306 if (pos == stp->sd_pushcnt - stp->sd_anchor + 1) 4307 stp->sd_anchor = 0; 4308 else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1)) 4309 stp->sd_anchor--; 4310 } 4311 4312 strendplumb(stp); 4313 mutex_exit(&stp->sd_lock); 4314 return (0); 4315 } 4316 4317 case I_ANCHOR: 4318 /* 4319 * Set the anchor position on the stream to reside at 4320 * the top module (in other words, the top module 4321 * cannot be popped). Anchors with a FIFO make no 4322 * obvious sense, so they're not allowed. 4323 */ 4324 mutex_enter(&stp->sd_lock); 4325 4326 if (stp->sd_vnode->v_type == VFIFO) { 4327 mutex_exit(&stp->sd_lock); 4328 return (EINVAL); 4329 } 4330 /* Only allow the same zoneid to update the anchor */ 4331 if (stp->sd_anchor != 0 && 4332 stp->sd_anchorzone != crgetzoneid(crp)) { 4333 mutex_exit(&stp->sd_lock); 4334 return (EINVAL); 4335 } 4336 stp->sd_anchor = stp->sd_pushcnt; 4337 stp->sd_anchorzone = crgetzoneid(crp); 4338 mutex_exit(&stp->sd_lock); 4339 return (0); 4340 4341 case I_LOOK: 4342 /* 4343 * Get name of first module downstream. 4344 * If no module, return an error. 4345 */ 4346 claimstr(wrq); 4347 if (_SAMESTR(wrq) && wrq->q_next->q_next != NULL) { 4348 char *name = Q2NAME(wrq->q_next); 4349 4350 error = strcopyout(name, (void *)arg, strlen(name) + 1, 4351 copyflag); 4352 releasestr(wrq); 4353 return (error); 4354 } 4355 releasestr(wrq); 4356 return (EINVAL); 4357 4358 case I_LINK: 4359 case I_PLINK: 4360 /* 4361 * Link a multiplexor. 4362 */ 4363 return (mlink(vp, cmd, (int)arg, crp, rvalp, 0)); 4364 4365 case _I_PLINK_LH: 4366 /* 4367 * Link a multiplexor: Call must originate from kernel. 4368 */ 4369 if (kioctl) 4370 return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp)); 4371 4372 return (EINVAL); 4373 case I_UNLINK: 4374 case I_PUNLINK: 4375 /* 4376 * Unlink a multiplexor. 4377 * If arg is -1, unlink all links for which this is the 4378 * controlling stream. Otherwise, arg is an index number 4379 * for a link to be removed. 4380 */ 4381 { 4382 struct linkinfo *linkp; 4383 int native_arg = (int)arg; 4384 int type; 4385 netstack_t *ns; 4386 str_stack_t *ss; 4387 4388 TRACE_1(TR_FAC_STREAMS_FR, 4389 TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp); 4390 if (vp->v_type == VFIFO) { 4391 return (EINVAL); 4392 } 4393 if (cmd == I_UNLINK) 4394 type = LINKNORMAL; 4395 else /* I_PUNLINK */ 4396 type = LINKPERSIST; 4397 if (native_arg == 0) { 4398 return (EINVAL); 4399 } 4400 ns = netstack_find_by_cred(crp); 4401 ASSERT(ns != NULL); 4402 ss = ns->netstack_str; 4403 ASSERT(ss != NULL); 4404 4405 if (native_arg == MUXID_ALL) 4406 error = munlinkall(stp, type, crp, rvalp, ss); 4407 else { 4408 mutex_enter(&muxifier); 4409 if (!(linkp = findlinks(stp, (int)arg, type, ss))) { 4410 /* invalid user supplied index number */ 4411 mutex_exit(&muxifier); 4412 netstack_rele(ss->ss_netstack); 4413 return (EINVAL); 4414 } 4415 /* munlink drops the muxifier lock */ 4416 error = munlink(stp, linkp, type, crp, rvalp, ss); 4417 } 4418 netstack_rele(ss->ss_netstack); 4419 return (error); 4420 } 4421 4422 case I_FLUSH: 4423 /* 4424 * send a flush message downstream 4425 * flush message can indicate 4426 * FLUSHR - flush read queue 4427 * FLUSHW - flush write queue 4428 * FLUSHRW - flush read/write queue 4429 */ 4430 if (stp->sd_flag & STRHUP) 4431 return (ENXIO); 4432 if (arg & ~FLUSHRW) 4433 return (EINVAL); 4434 4435 for (;;) { 4436 if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) { 4437 break; 4438 } 4439 if (error = strwaitbuf(1, BPRI_HI)) { 4440 return (error); 4441 } 4442 } 4443 4444 /* 4445 * Send down an unsupported ioctl and wait for the nack 4446 * in order to allow the M_FLUSH to propagate back 4447 * up to the stream head. 4448 * Replaces if (qready()) runqueues(); 4449 */ 4450 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4451 strioc.ic_timout = 0; 4452 strioc.ic_len = 0; 4453 strioc.ic_dp = NULL; 4454 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4455 *rvalp = 0; 4456 return (0); 4457 4458 case I_FLUSHBAND: 4459 { 4460 struct bandinfo binfo; 4461 4462 error = strcopyin((void *)arg, &binfo, sizeof (binfo), 4463 copyflag); 4464 if (error) 4465 return (error); 4466 if (stp->sd_flag & STRHUP) 4467 return (ENXIO); 4468 if (binfo.bi_flag & ~FLUSHRW) 4469 return (EINVAL); 4470 while (!(mp = allocb(2, BPRI_HI))) { 4471 if (error = strwaitbuf(2, BPRI_HI)) 4472 return (error); 4473 } 4474 mp->b_datap->db_type = M_FLUSH; 4475 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND; 4476 *mp->b_wptr++ = binfo.bi_pri; 4477 putnext(stp->sd_wrq, mp); 4478 /* 4479 * Send down an unsupported ioctl and wait for the nack 4480 * in order to allow the M_FLUSH to propagate back 4481 * up to the stream head. 4482 * Replaces if (qready()) runqueues(); 4483 */ 4484 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4485 strioc.ic_timout = 0; 4486 strioc.ic_len = 0; 4487 strioc.ic_dp = NULL; 4488 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4489 *rvalp = 0; 4490 return (0); 4491 } 4492 4493 case I_SRDOPT: 4494 /* 4495 * Set read options 4496 * 4497 * RNORM - default stream mode 4498 * RMSGN - message no discard 4499 * RMSGD - message discard 4500 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs 4501 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs 4502 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs 4503 */ 4504 if (arg & ~(RMODEMASK | RPROTMASK)) 4505 return (EINVAL); 4506 4507 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN)) 4508 return (EINVAL); 4509 4510 mutex_enter(&stp->sd_lock); 4511 switch (arg & RMODEMASK) { 4512 case RNORM: 4513 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 4514 break; 4515 case RMSGD: 4516 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) | 4517 RD_MSGDIS; 4518 break; 4519 case RMSGN: 4520 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) | 4521 RD_MSGNODIS; 4522 break; 4523 } 4524 4525 switch (arg & RPROTMASK) { 4526 case RPROTNORM: 4527 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 4528 break; 4529 4530 case RPROTDAT: 4531 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) | 4532 RD_PROTDAT); 4533 break; 4534 4535 case RPROTDIS: 4536 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) | 4537 RD_PROTDIS); 4538 break; 4539 } 4540 mutex_exit(&stp->sd_lock); 4541 return (0); 4542 4543 case I_GRDOPT: 4544 /* 4545 * Get read option and return the value 4546 * to spot pointed to by arg 4547 */ 4548 { 4549 int rdopt; 4550 4551 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD : 4552 ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM)); 4553 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT : 4554 ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM)); 4555 4556 return (strcopyout(&rdopt, (void *)arg, sizeof (int), 4557 copyflag)); 4558 } 4559 4560 case I_SERROPT: 4561 /* 4562 * Set error options 4563 * 4564 * RERRNORM - persistent read errors 4565 * RERRNONPERSIST - non-persistent read errors 4566 * WERRNORM - persistent write errors 4567 * WERRNONPERSIST - non-persistent write errors 4568 */ 4569 if (arg & ~(RERRMASK | WERRMASK)) 4570 return (EINVAL); 4571 4572 mutex_enter(&stp->sd_lock); 4573 switch (arg & RERRMASK) { 4574 case RERRNORM: 4575 stp->sd_flag &= ~STRDERRNONPERSIST; 4576 break; 4577 case RERRNONPERSIST: 4578 stp->sd_flag |= STRDERRNONPERSIST; 4579 break; 4580 } 4581 switch (arg & WERRMASK) { 4582 case WERRNORM: 4583 stp->sd_flag &= ~STWRERRNONPERSIST; 4584 break; 4585 case WERRNONPERSIST: 4586 stp->sd_flag |= STWRERRNONPERSIST; 4587 break; 4588 } 4589 mutex_exit(&stp->sd_lock); 4590 return (0); 4591 4592 case I_GERROPT: 4593 /* 4594 * Get error option and return the value 4595 * to spot pointed to by arg 4596 */ 4597 { 4598 int erropt = 0; 4599 4600 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST : 4601 RERRNORM; 4602 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST : 4603 WERRNORM; 4604 return (strcopyout(&erropt, (void *)arg, sizeof (int), 4605 copyflag)); 4606 } 4607 4608 case I_SETSIG: 4609 /* 4610 * Register the calling proc to receive the SIGPOLL 4611 * signal based on the events given in arg. If 4612 * arg is zero, remove the proc from register list. 4613 */ 4614 { 4615 strsig_t *ssp, *pssp; 4616 struct pid *pidp; 4617 4618 pssp = NULL; 4619 pidp = curproc->p_pidp; 4620 /* 4621 * Hold sd_lock to prevent traversal of sd_siglist while 4622 * it is modified. 4623 */ 4624 mutex_enter(&stp->sd_lock); 4625 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp); 4626 pssp = ssp, ssp = ssp->ss_next) 4627 ; 4628 4629 if (arg) { 4630 if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4631 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4632 mutex_exit(&stp->sd_lock); 4633 return (EINVAL); 4634 } 4635 if ((arg & S_BANDURG) && !(arg & S_RDBAND)) { 4636 mutex_exit(&stp->sd_lock); 4637 return (EINVAL); 4638 } 4639 4640 /* 4641 * If proc not already registered, add it 4642 * to list. 4643 */ 4644 if (!ssp) { 4645 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4646 ssp->ss_pidp = pidp; 4647 ssp->ss_pid = pidp->pid_id; 4648 ssp->ss_next = NULL; 4649 if (pssp) 4650 pssp->ss_next = ssp; 4651 else 4652 stp->sd_siglist = ssp; 4653 mutex_enter(&pidlock); 4654 PID_HOLD(pidp); 4655 mutex_exit(&pidlock); 4656 } 4657 4658 /* 4659 * Set events. 4660 */ 4661 ssp->ss_events = (int)arg; 4662 } else { 4663 /* 4664 * Remove proc from register list. 4665 */ 4666 if (ssp) { 4667 mutex_enter(&pidlock); 4668 PID_RELE(pidp); 4669 mutex_exit(&pidlock); 4670 if (pssp) 4671 pssp->ss_next = ssp->ss_next; 4672 else 4673 stp->sd_siglist = ssp->ss_next; 4674 kmem_free(ssp, sizeof (strsig_t)); 4675 } else { 4676 mutex_exit(&stp->sd_lock); 4677 return (EINVAL); 4678 } 4679 } 4680 4681 /* 4682 * Recalculate OR of sig events. 4683 */ 4684 stp->sd_sigflags = 0; 4685 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4686 stp->sd_sigflags |= ssp->ss_events; 4687 mutex_exit(&stp->sd_lock); 4688 return (0); 4689 } 4690 4691 case I_GETSIG: 4692 /* 4693 * Return (in arg) the current registration of events 4694 * for which the calling proc is to be signaled. 4695 */ 4696 { 4697 struct strsig *ssp; 4698 struct pid *pidp; 4699 4700 pidp = curproc->p_pidp; 4701 mutex_enter(&stp->sd_lock); 4702 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4703 if (ssp->ss_pidp == pidp) { 4704 error = strcopyout(&ssp->ss_events, (void *)arg, 4705 sizeof (int), copyflag); 4706 mutex_exit(&stp->sd_lock); 4707 return (error); 4708 } 4709 mutex_exit(&stp->sd_lock); 4710 return (EINVAL); 4711 } 4712 4713 case I_ESETSIG: 4714 /* 4715 * Register the ss_pid to receive the SIGPOLL 4716 * signal based on the events is ss_events arg. If 4717 * ss_events is zero, remove the proc from register list. 4718 */ 4719 { 4720 struct strsig *ssp, *pssp; 4721 struct proc *proc; 4722 struct pid *pidp; 4723 pid_t pid; 4724 struct strsigset ss; 4725 4726 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4727 if (error) 4728 return (error); 4729 4730 pid = ss.ss_pid; 4731 4732 if (ss.ss_events != 0) { 4733 /* 4734 * Permissions check by sending signal 0. 4735 * Note that when kill fails it does a set_errno 4736 * causing the system call to fail. 4737 */ 4738 error = kill(pid, 0); 4739 if (error) { 4740 return (error); 4741 } 4742 } 4743 mutex_enter(&pidlock); 4744 if (pid == 0) 4745 proc = curproc; 4746 else if (pid < 0) 4747 proc = pgfind(-pid); 4748 else 4749 proc = prfind(pid); 4750 if (proc == NULL) { 4751 mutex_exit(&pidlock); 4752 return (ESRCH); 4753 } 4754 if (pid < 0) 4755 pidp = proc->p_pgidp; 4756 else 4757 pidp = proc->p_pidp; 4758 ASSERT(pidp); 4759 /* 4760 * Get a hold on the pid structure while referencing it. 4761 * There is a separate PID_HOLD should it be inserted 4762 * in the list below. 4763 */ 4764 PID_HOLD(pidp); 4765 mutex_exit(&pidlock); 4766 4767 pssp = NULL; 4768 /* 4769 * Hold sd_lock to prevent traversal of sd_siglist while 4770 * it is modified. 4771 */ 4772 mutex_enter(&stp->sd_lock); 4773 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid); 4774 pssp = ssp, ssp = ssp->ss_next) 4775 ; 4776 4777 if (ss.ss_events) { 4778 if (ss.ss_events & 4779 ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4780 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4781 mutex_exit(&stp->sd_lock); 4782 mutex_enter(&pidlock); 4783 PID_RELE(pidp); 4784 mutex_exit(&pidlock); 4785 return (EINVAL); 4786 } 4787 if ((ss.ss_events & S_BANDURG) && 4788 !(ss.ss_events & S_RDBAND)) { 4789 mutex_exit(&stp->sd_lock); 4790 mutex_enter(&pidlock); 4791 PID_RELE(pidp); 4792 mutex_exit(&pidlock); 4793 return (EINVAL); 4794 } 4795 4796 /* 4797 * If proc not already registered, add it 4798 * to list. 4799 */ 4800 if (!ssp) { 4801 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4802 ssp->ss_pidp = pidp; 4803 ssp->ss_pid = pid; 4804 ssp->ss_next = NULL; 4805 if (pssp) 4806 pssp->ss_next = ssp; 4807 else 4808 stp->sd_siglist = ssp; 4809 mutex_enter(&pidlock); 4810 PID_HOLD(pidp); 4811 mutex_exit(&pidlock); 4812 } 4813 4814 /* 4815 * Set events. 4816 */ 4817 ssp->ss_events = ss.ss_events; 4818 } else { 4819 /* 4820 * Remove proc from register list. 4821 */ 4822 if (ssp) { 4823 mutex_enter(&pidlock); 4824 PID_RELE(pidp); 4825 mutex_exit(&pidlock); 4826 if (pssp) 4827 pssp->ss_next = ssp->ss_next; 4828 else 4829 stp->sd_siglist = ssp->ss_next; 4830 kmem_free(ssp, sizeof (strsig_t)); 4831 } else { 4832 mutex_exit(&stp->sd_lock); 4833 mutex_enter(&pidlock); 4834 PID_RELE(pidp); 4835 mutex_exit(&pidlock); 4836 return (EINVAL); 4837 } 4838 } 4839 4840 /* 4841 * Recalculate OR of sig events. 4842 */ 4843 stp->sd_sigflags = 0; 4844 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4845 stp->sd_sigflags |= ssp->ss_events; 4846 mutex_exit(&stp->sd_lock); 4847 mutex_enter(&pidlock); 4848 PID_RELE(pidp); 4849 mutex_exit(&pidlock); 4850 return (0); 4851 } 4852 4853 case I_EGETSIG: 4854 /* 4855 * Return (in arg) the current registration of events 4856 * for which the calling proc is to be signaled. 4857 */ 4858 { 4859 struct strsig *ssp; 4860 struct proc *proc; 4861 pid_t pid; 4862 struct pid *pidp; 4863 struct strsigset ss; 4864 4865 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4866 if (error) 4867 return (error); 4868 4869 pid = ss.ss_pid; 4870 mutex_enter(&pidlock); 4871 if (pid == 0) 4872 proc = curproc; 4873 else if (pid < 0) 4874 proc = pgfind(-pid); 4875 else 4876 proc = prfind(pid); 4877 if (proc == NULL) { 4878 mutex_exit(&pidlock); 4879 return (ESRCH); 4880 } 4881 if (pid < 0) 4882 pidp = proc->p_pgidp; 4883 else 4884 pidp = proc->p_pidp; 4885 4886 /* Prevent the pidp from being reassigned */ 4887 PID_HOLD(pidp); 4888 mutex_exit(&pidlock); 4889 4890 mutex_enter(&stp->sd_lock); 4891 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4892 if (ssp->ss_pid == pid) { 4893 ss.ss_pid = ssp->ss_pid; 4894 ss.ss_events = ssp->ss_events; 4895 error = strcopyout(&ss, (void *)arg, 4896 sizeof (struct strsigset), copyflag); 4897 mutex_exit(&stp->sd_lock); 4898 mutex_enter(&pidlock); 4899 PID_RELE(pidp); 4900 mutex_exit(&pidlock); 4901 return (error); 4902 } 4903 mutex_exit(&stp->sd_lock); 4904 mutex_enter(&pidlock); 4905 PID_RELE(pidp); 4906 mutex_exit(&pidlock); 4907 return (EINVAL); 4908 } 4909 4910 case I_PEEK: 4911 { 4912 STRUCT_DECL(strpeek, strpeek); 4913 size_t n; 4914 mblk_t *fmp, *tmp_mp = NULL; 4915 4916 STRUCT_INIT(strpeek, flag); 4917 4918 error = strcopyin((void *)arg, STRUCT_BUF(strpeek), 4919 STRUCT_SIZE(strpeek), copyflag); 4920 if (error) 4921 return (error); 4922 4923 mutex_enter(QLOCK(rdq)); 4924 /* 4925 * Skip the invalid messages 4926 */ 4927 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 4928 if (mp->b_datap->db_type != M_SIG) 4929 break; 4930 4931 /* 4932 * If user has requested to peek at a high priority message 4933 * and first message is not, return 0 4934 */ 4935 if (mp != NULL) { 4936 if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) && 4937 queclass(mp) == QNORM) { 4938 *rvalp = 0; 4939 mutex_exit(QLOCK(rdq)); 4940 return (0); 4941 } 4942 } else if (stp->sd_struiordq == NULL || 4943 (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) { 4944 /* 4945 * No mblks to look at at the streamhead and 4946 * 1). This isn't a synch stream or 4947 * 2). This is a synch stream but caller wants high 4948 * priority messages which is not supported by 4949 * the synch stream. (it only supports QNORM) 4950 */ 4951 *rvalp = 0; 4952 mutex_exit(QLOCK(rdq)); 4953 return (0); 4954 } 4955 4956 fmp = mp; 4957 4958 if (mp && mp->b_datap->db_type == M_PASSFP) { 4959 mutex_exit(QLOCK(rdq)); 4960 return (EBADMSG); 4961 } 4962 4963 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO || 4964 mp->b_datap->db_type == M_PROTO || 4965 mp->b_datap->db_type == M_DATA); 4966 4967 if (mp && mp->b_datap->db_type == M_PCPROTO) { 4968 STRUCT_FSET(strpeek, flags, RS_HIPRI); 4969 } else { 4970 STRUCT_FSET(strpeek, flags, 0); 4971 } 4972 4973 4974 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) { 4975 mutex_exit(QLOCK(rdq)); 4976 return (ENOSR); 4977 } 4978 mutex_exit(QLOCK(rdq)); 4979 4980 /* 4981 * set mp = tmp_mp, so that I_PEEK processing can continue. 4982 * tmp_mp is used to free the dup'd message. 4983 */ 4984 mp = tmp_mp; 4985 4986 uio.uio_fmode = 0; 4987 uio.uio_extflg = UIO_COPY_CACHED; 4988 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 4989 UIO_SYSSPACE; 4990 uio.uio_limit = 0; 4991 /* 4992 * First process PROTO blocks, if any. 4993 * If user doesn't want to get ctl info by setting maxlen <= 0, 4994 * then set len to -1/0 and skip control blocks part. 4995 */ 4996 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0) 4997 STRUCT_FSET(strpeek, ctlbuf.len, -1); 4998 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0) 4999 STRUCT_FSET(strpeek, ctlbuf.len, 0); 5000 else { 5001 int ctl_part = 0; 5002 5003 iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf); 5004 iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen); 5005 uio.uio_iov = &iov; 5006 uio.uio_resid = iov.iov_len; 5007 uio.uio_loffset = 0; 5008 uio.uio_iovcnt = 1; 5009 while (mp && mp->b_datap->db_type != M_DATA && 5010 uio.uio_resid >= 0) { 5011 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ? 5012 mp->b_datap->db_type == M_PROTO : 5013 mp->b_datap->db_type == M_PCPROTO); 5014 5015 if ((n = MIN(uio.uio_resid, 5016 mp->b_wptr - mp->b_rptr)) != 0 && 5017 (error = uiomove((char *)mp->b_rptr, n, 5018 UIO_READ, &uio)) != 0) { 5019 freemsg(tmp_mp); 5020 return (error); 5021 } 5022 ctl_part = 1; 5023 mp = mp->b_cont; 5024 } 5025 /* No ctl message */ 5026 if (ctl_part == 0) 5027 STRUCT_FSET(strpeek, ctlbuf.len, -1); 5028 else 5029 STRUCT_FSET(strpeek, ctlbuf.len, 5030 STRUCT_FGET(strpeek, ctlbuf.maxlen) - 5031 uio.uio_resid); 5032 } 5033 5034 /* 5035 * Now process DATA blocks, if any. 5036 * If user doesn't want to get data info by setting maxlen <= 0, 5037 * then set len to -1/0 and skip data blocks part. 5038 */ 5039 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0) 5040 STRUCT_FSET(strpeek, databuf.len, -1); 5041 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0) 5042 STRUCT_FSET(strpeek, databuf.len, 0); 5043 else { 5044 int data_part = 0; 5045 5046 iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf); 5047 iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen); 5048 uio.uio_iov = &iov; 5049 uio.uio_resid = iov.iov_len; 5050 uio.uio_loffset = 0; 5051 uio.uio_iovcnt = 1; 5052 while (mp && uio.uio_resid) { 5053 if (mp->b_datap->db_type == M_DATA) { 5054 if ((n = MIN(uio.uio_resid, 5055 mp->b_wptr - mp->b_rptr)) != 0 && 5056 (error = uiomove((char *)mp->b_rptr, 5057 n, UIO_READ, &uio)) != 0) { 5058 freemsg(tmp_mp); 5059 return (error); 5060 } 5061 data_part = 1; 5062 } 5063 ASSERT(data_part == 0 || 5064 mp->b_datap->db_type == M_DATA); 5065 mp = mp->b_cont; 5066 } 5067 /* No data message */ 5068 if (data_part == 0) 5069 STRUCT_FSET(strpeek, databuf.len, -1); 5070 else 5071 STRUCT_FSET(strpeek, databuf.len, 5072 STRUCT_FGET(strpeek, databuf.maxlen) - 5073 uio.uio_resid); 5074 } 5075 freemsg(tmp_mp); 5076 5077 /* 5078 * It is a synch stream and user wants to get 5079 * data (maxlen > 0). 5080 * uio setup is done by the codes that process DATA 5081 * blocks above. 5082 */ 5083 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) { 5084 infod_t infod; 5085 5086 infod.d_cmd = INFOD_COPYOUT; 5087 infod.d_res = 0; 5088 infod.d_uiop = &uio; 5089 error = infonext(rdq, &infod); 5090 if (error == EINVAL || error == EBUSY) 5091 error = 0; 5092 if (error) 5093 return (error); 5094 STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek, 5095 databuf.maxlen) - uio.uio_resid); 5096 if (STRUCT_FGET(strpeek, databuf.len) == 0) { 5097 /* 5098 * No data found by the infonext(). 5099 */ 5100 STRUCT_FSET(strpeek, databuf.len, -1); 5101 } 5102 } 5103 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg, 5104 STRUCT_SIZE(strpeek), copyflag); 5105 if (error) { 5106 return (error); 5107 } 5108 /* 5109 * If there is no message retrieved, set return code to 0 5110 * otherwise, set it to 1. 5111 */ 5112 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 && 5113 STRUCT_FGET(strpeek, databuf.len) == -1) 5114 *rvalp = 0; 5115 else 5116 *rvalp = 1; 5117 return (0); 5118 } 5119 5120 case I_FDINSERT: 5121 { 5122 STRUCT_DECL(strfdinsert, strfdinsert); 5123 struct file *resftp; 5124 struct stdata *resstp; 5125 t_uscalar_t ival; 5126 ssize_t msgsize; 5127 struct strbuf mctl; 5128 5129 STRUCT_INIT(strfdinsert, flag); 5130 if (stp->sd_flag & STRHUP) 5131 return (ENXIO); 5132 /* 5133 * STRDERR, STWRERR and STPLEX tested above. 5134 */ 5135 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert), 5136 STRUCT_SIZE(strfdinsert), copyflag); 5137 if (error) 5138 return (error); 5139 5140 if (STRUCT_FGET(strfdinsert, offset) < 0 || 5141 (STRUCT_FGET(strfdinsert, offset) % 5142 sizeof (t_uscalar_t)) != 0) 5143 return (EINVAL); 5144 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) { 5145 if ((resstp = resftp->f_vnode->v_stream) == NULL) { 5146 releasef(STRUCT_FGET(strfdinsert, fildes)); 5147 return (EINVAL); 5148 } 5149 } else 5150 return (EINVAL); 5151 5152 mutex_enter(&resstp->sd_lock); 5153 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) { 5154 error = strgeterr(resstp, 5155 STRDERR|STWRERR|STRHUP|STPLEX, 0); 5156 if (error != 0) { 5157 mutex_exit(&resstp->sd_lock); 5158 releasef(STRUCT_FGET(strfdinsert, fildes)); 5159 return (error); 5160 } 5161 } 5162 mutex_exit(&resstp->sd_lock); 5163 5164 #ifdef _ILP32 5165 { 5166 queue_t *q; 5167 queue_t *mate = NULL; 5168 5169 /* get read queue of stream terminus */ 5170 claimstr(resstp->sd_wrq); 5171 for (q = resstp->sd_wrq->q_next; q->q_next != NULL; 5172 q = q->q_next) 5173 if (!STRMATED(resstp) && STREAM(q) != resstp && 5174 mate == NULL) { 5175 ASSERT(q->q_qinfo->qi_srvp); 5176 ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp); 5177 claimstr(q); 5178 mate = q; 5179 } 5180 q = _RD(q); 5181 if (mate) 5182 releasestr(mate); 5183 releasestr(resstp->sd_wrq); 5184 ival = (t_uscalar_t)q; 5185 } 5186 #else 5187 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev); 5188 #endif /* _ILP32 */ 5189 5190 if (STRUCT_FGET(strfdinsert, ctlbuf.len) < 5191 STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) { 5192 releasef(STRUCT_FGET(strfdinsert, fildes)); 5193 return (EINVAL); 5194 } 5195 5196 /* 5197 * Check for legal flag value. 5198 */ 5199 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) { 5200 releasef(STRUCT_FGET(strfdinsert, fildes)); 5201 return (EINVAL); 5202 } 5203 5204 /* get these values from those cached in the stream head */ 5205 mutex_enter(QLOCK(stp->sd_wrq)); 5206 rmin = stp->sd_qn_minpsz; 5207 rmax = stp->sd_qn_maxpsz; 5208 mutex_exit(QLOCK(stp->sd_wrq)); 5209 5210 /* 5211 * Make sure ctl and data sizes together fall within 5212 * the limits of the max and min receive packet sizes 5213 * and do not exceed system limit. A negative data 5214 * length means that no data part is to be sent. 5215 */ 5216 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 5217 if (rmax == 0) { 5218 releasef(STRUCT_FGET(strfdinsert, fildes)); 5219 return (ERANGE); 5220 } 5221 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0) 5222 msgsize = 0; 5223 if ((msgsize < rmin) || 5224 ((msgsize > rmax) && (rmax != INFPSZ)) || 5225 (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) { 5226 releasef(STRUCT_FGET(strfdinsert, fildes)); 5227 return (ERANGE); 5228 } 5229 5230 mutex_enter(&stp->sd_lock); 5231 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) && 5232 !canputnext(stp->sd_wrq)) { 5233 if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, 5234 flag, -1, &done)) != 0 || done) { 5235 mutex_exit(&stp->sd_lock); 5236 releasef(STRUCT_FGET(strfdinsert, fildes)); 5237 return (error); 5238 } 5239 if ((error = i_straccess(stp, access)) != 0) { 5240 mutex_exit(&stp->sd_lock); 5241 releasef( 5242 STRUCT_FGET(strfdinsert, fildes)); 5243 return (error); 5244 } 5245 } 5246 mutex_exit(&stp->sd_lock); 5247 5248 /* 5249 * Copy strfdinsert.ctlbuf into native form of 5250 * ctlbuf to pass down into strmakemsg(). 5251 */ 5252 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen); 5253 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len); 5254 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf); 5255 5256 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf); 5257 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len); 5258 uio.uio_iov = &iov; 5259 uio.uio_iovcnt = 1; 5260 uio.uio_loffset = 0; 5261 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 5262 UIO_SYSSPACE; 5263 uio.uio_fmode = 0; 5264 uio.uio_extflg = UIO_COPY_CACHED; 5265 uio.uio_resid = iov.iov_len; 5266 if ((error = strmakemsg(&mctl, 5267 &msgsize, &uio, stp, 5268 STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) { 5269 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5270 releasef(STRUCT_FGET(strfdinsert, fildes)); 5271 return (error); 5272 } 5273 5274 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5275 5276 /* 5277 * Place the possibly reencoded queue pointer 'offset' bytes 5278 * from the start of the control portion of the message. 5279 */ 5280 *((t_uscalar_t *)(mp->b_rptr + 5281 STRUCT_FGET(strfdinsert, offset))) = ival; 5282 5283 /* 5284 * Put message downstream. 5285 */ 5286 stream_willservice(stp); 5287 putnext(stp->sd_wrq, mp); 5288 stream_runservice(stp); 5289 releasef(STRUCT_FGET(strfdinsert, fildes)); 5290 return (error); 5291 } 5292 5293 case I_SENDFD: 5294 { 5295 struct file *fp; 5296 5297 if ((fp = getf((int)arg)) == NULL) 5298 return (EBADF); 5299 error = do_sendfp(stp, fp, crp); 5300 if (auditing) { 5301 audit_fdsend((int)arg, fp, error); 5302 } 5303 releasef((int)arg); 5304 return (error); 5305 } 5306 5307 case I_RECVFD: 5308 case I_E_RECVFD: 5309 { 5310 struct k_strrecvfd *srf; 5311 int i, fd; 5312 5313 mutex_enter(&stp->sd_lock); 5314 while (!(mp = getq(rdq))) { 5315 if (stp->sd_flag & (STRHUP|STREOF)) { 5316 mutex_exit(&stp->sd_lock); 5317 return (ENXIO); 5318 } 5319 if ((error = strwaitq(stp, GETWAIT, (ssize_t)0, 5320 flag, -1, &done)) != 0 || done) { 5321 mutex_exit(&stp->sd_lock); 5322 return (error); 5323 } 5324 if ((error = i_straccess(stp, access)) != 0) { 5325 mutex_exit(&stp->sd_lock); 5326 return (error); 5327 } 5328 } 5329 if (mp->b_datap->db_type != M_PASSFP) { 5330 putback(stp, rdq, mp, mp->b_band); 5331 mutex_exit(&stp->sd_lock); 5332 return (EBADMSG); 5333 } 5334 mutex_exit(&stp->sd_lock); 5335 5336 srf = (struct k_strrecvfd *)mp->b_rptr; 5337 if ((fd = ufalloc(0)) == -1) { 5338 mutex_enter(&stp->sd_lock); 5339 putback(stp, rdq, mp, mp->b_band); 5340 mutex_exit(&stp->sd_lock); 5341 return (EMFILE); 5342 } 5343 if (cmd == I_RECVFD) { 5344 struct o_strrecvfd ostrfd; 5345 5346 /* check to see if uid/gid values are too large. */ 5347 5348 if (srf->uid > (o_uid_t)USHRT_MAX || 5349 srf->gid > (o_gid_t)USHRT_MAX) { 5350 mutex_enter(&stp->sd_lock); 5351 putback(stp, rdq, mp, mp->b_band); 5352 mutex_exit(&stp->sd_lock); 5353 setf(fd, NULL); /* release fd entry */ 5354 return (EOVERFLOW); 5355 } 5356 5357 ostrfd.fd = fd; 5358 ostrfd.uid = (o_uid_t)srf->uid; 5359 ostrfd.gid = (o_gid_t)srf->gid; 5360 5361 /* Null the filler bits */ 5362 for (i = 0; i < 8; i++) 5363 ostrfd.fill[i] = 0; 5364 5365 error = strcopyout(&ostrfd, (void *)arg, 5366 sizeof (struct o_strrecvfd), copyflag); 5367 } else { /* I_E_RECVFD */ 5368 struct strrecvfd strfd; 5369 5370 strfd.fd = fd; 5371 strfd.uid = srf->uid; 5372 strfd.gid = srf->gid; 5373 5374 /* null the filler bits */ 5375 for (i = 0; i < 8; i++) 5376 strfd.fill[i] = 0; 5377 5378 error = strcopyout(&strfd, (void *)arg, 5379 sizeof (struct strrecvfd), copyflag); 5380 } 5381 5382 if (error) { 5383 setf(fd, NULL); /* release fd entry */ 5384 mutex_enter(&stp->sd_lock); 5385 putback(stp, rdq, mp, mp->b_band); 5386 mutex_exit(&stp->sd_lock); 5387 return (error); 5388 } 5389 if (auditing) { 5390 audit_fdrecv(fd, srf->fp); 5391 } 5392 5393 /* 5394 * Always increment f_count since the freemsg() below will 5395 * always call free_passfp() which performs a closef(). 5396 */ 5397 mutex_enter(&srf->fp->f_tlock); 5398 srf->fp->f_count++; 5399 mutex_exit(&srf->fp->f_tlock); 5400 setf(fd, srf->fp); 5401 freemsg(mp); 5402 return (0); 5403 } 5404 5405 case I_SWROPT: 5406 /* 5407 * Set/clear the write options. arg is a bit 5408 * mask with any of the following bits set... 5409 * SNDZERO - send zero length message 5410 * SNDPIPE - send sigpipe to process if 5411 * sd_werror is set and process is 5412 * doing a write or putmsg. 5413 * The new stream head write options should reflect 5414 * what is in arg. 5415 */ 5416 if (arg & ~(SNDZERO|SNDPIPE)) 5417 return (EINVAL); 5418 5419 mutex_enter(&stp->sd_lock); 5420 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO); 5421 if (arg & SNDZERO) 5422 stp->sd_wput_opt |= SW_SNDZERO; 5423 if (arg & SNDPIPE) 5424 stp->sd_wput_opt |= SW_SIGPIPE; 5425 mutex_exit(&stp->sd_lock); 5426 return (0); 5427 5428 case I_GWROPT: 5429 { 5430 int wropt = 0; 5431 5432 if (stp->sd_wput_opt & SW_SNDZERO) 5433 wropt |= SNDZERO; 5434 if (stp->sd_wput_opt & SW_SIGPIPE) 5435 wropt |= SNDPIPE; 5436 return (strcopyout(&wropt, (void *)arg, sizeof (wropt), 5437 copyflag)); 5438 } 5439 5440 case I_LIST: 5441 /* 5442 * Returns all the modules found on this stream, 5443 * upto the driver. If argument is NULL, return the 5444 * number of modules (including driver). If argument 5445 * is not NULL, copy the names into the structure 5446 * provided. 5447 */ 5448 5449 { 5450 queue_t *q; 5451 char *qname; 5452 int i, nmods; 5453 struct str_mlist *mlist; 5454 STRUCT_DECL(str_list, strlist); 5455 5456 if (arg == NULL) { /* Return number of modules plus driver */ 5457 if (stp->sd_vnode->v_type == VFIFO) 5458 *rvalp = stp->sd_pushcnt; 5459 else 5460 *rvalp = stp->sd_pushcnt + 1; 5461 return (0); 5462 } 5463 5464 STRUCT_INIT(strlist, flag); 5465 5466 error = strcopyin((void *)arg, STRUCT_BUF(strlist), 5467 STRUCT_SIZE(strlist), copyflag); 5468 if (error != 0) 5469 return (error); 5470 5471 mlist = STRUCT_FGETP(strlist, sl_modlist); 5472 nmods = STRUCT_FGET(strlist, sl_nmods); 5473 if (nmods <= 0) 5474 return (EINVAL); 5475 5476 claimstr(stp->sd_wrq); 5477 q = stp->sd_wrq; 5478 for (i = 0; i < nmods && _SAMESTR(q); i++, q = q->q_next) { 5479 qname = Q2NAME(q->q_next); 5480 error = strcopyout(qname, &mlist[i], strlen(qname) + 1, 5481 copyflag); 5482 if (error != 0) { 5483 releasestr(stp->sd_wrq); 5484 return (error); 5485 } 5486 } 5487 releasestr(stp->sd_wrq); 5488 return (strcopyout(&i, (void *)arg, sizeof (int), copyflag)); 5489 } 5490 5491 case I_CKBAND: 5492 { 5493 queue_t *q; 5494 qband_t *qbp; 5495 5496 if ((arg < 0) || (arg >= NBAND)) 5497 return (EINVAL); 5498 q = _RD(stp->sd_wrq); 5499 mutex_enter(QLOCK(q)); 5500 if (arg > (int)q->q_nband) { 5501 *rvalp = 0; 5502 } else { 5503 if (arg == 0) { 5504 if (q->q_first) 5505 *rvalp = 1; 5506 else 5507 *rvalp = 0; 5508 } else { 5509 qbp = q->q_bandp; 5510 while (--arg > 0) 5511 qbp = qbp->qb_next; 5512 if (qbp->qb_first) 5513 *rvalp = 1; 5514 else 5515 *rvalp = 0; 5516 } 5517 } 5518 mutex_exit(QLOCK(q)); 5519 return (0); 5520 } 5521 5522 case I_GETBAND: 5523 { 5524 int intpri; 5525 queue_t *q; 5526 5527 q = _RD(stp->sd_wrq); 5528 mutex_enter(QLOCK(q)); 5529 mp = q->q_first; 5530 if (!mp) { 5531 mutex_exit(QLOCK(q)); 5532 return (ENODATA); 5533 } 5534 intpri = (int)mp->b_band; 5535 error = strcopyout(&intpri, (void *)arg, sizeof (int), 5536 copyflag); 5537 mutex_exit(QLOCK(q)); 5538 return (error); 5539 } 5540 5541 case I_ATMARK: 5542 { 5543 queue_t *q; 5544 5545 if (arg & ~(ANYMARK|LASTMARK)) 5546 return (EINVAL); 5547 q = _RD(stp->sd_wrq); 5548 mutex_enter(&stp->sd_lock); 5549 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) { 5550 *rvalp = 1; 5551 } else { 5552 mutex_enter(QLOCK(q)); 5553 mp = q->q_first; 5554 5555 if (mp == NULL) 5556 *rvalp = 0; 5557 else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK)) 5558 *rvalp = 1; 5559 else if ((arg == LASTMARK) && (mp == stp->sd_mark)) 5560 *rvalp = 1; 5561 else 5562 *rvalp = 0; 5563 mutex_exit(QLOCK(q)); 5564 } 5565 mutex_exit(&stp->sd_lock); 5566 return (0); 5567 } 5568 5569 case I_CANPUT: 5570 { 5571 char band; 5572 5573 if ((arg < 0) || (arg >= NBAND)) 5574 return (EINVAL); 5575 band = (char)arg; 5576 *rvalp = bcanputnext(stp->sd_wrq, band); 5577 return (0); 5578 } 5579 5580 case I_SETCLTIME: 5581 { 5582 int closetime; 5583 5584 error = strcopyin((void *)arg, &closetime, sizeof (int), 5585 copyflag); 5586 if (error) 5587 return (error); 5588 if (closetime < 0) 5589 return (EINVAL); 5590 5591 stp->sd_closetime = closetime; 5592 return (0); 5593 } 5594 5595 case I_GETCLTIME: 5596 { 5597 int closetime; 5598 5599 closetime = stp->sd_closetime; 5600 return (strcopyout(&closetime, (void *)arg, sizeof (int), 5601 copyflag)); 5602 } 5603 5604 case TIOCGSID: 5605 { 5606 pid_t sid; 5607 5608 mutex_enter(&stp->sd_lock); 5609 if (stp->sd_sidp == NULL) { 5610 mutex_exit(&stp->sd_lock); 5611 return (ENOTTY); 5612 } 5613 sid = stp->sd_sidp->pid_id; 5614 mutex_exit(&stp->sd_lock); 5615 return (strcopyout(&sid, (void *)arg, sizeof (pid_t), 5616 copyflag)); 5617 } 5618 5619 case TIOCSPGRP: 5620 { 5621 pid_t pgrp; 5622 proc_t *q; 5623 pid_t sid, fg_pgid, bg_pgid; 5624 5625 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t), 5626 copyflag)) 5627 return (error); 5628 mutex_enter(&stp->sd_lock); 5629 mutex_enter(&pidlock); 5630 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) { 5631 mutex_exit(&pidlock); 5632 mutex_exit(&stp->sd_lock); 5633 return (ENOTTY); 5634 } 5635 if (pgrp == stp->sd_pgidp->pid_id) { 5636 mutex_exit(&pidlock); 5637 mutex_exit(&stp->sd_lock); 5638 return (0); 5639 } 5640 if (pgrp <= 0 || pgrp >= maxpid) { 5641 mutex_exit(&pidlock); 5642 mutex_exit(&stp->sd_lock); 5643 return (EINVAL); 5644 } 5645 if ((q = pgfind(pgrp)) == NULL || 5646 q->p_sessp != ttoproc(curthread)->p_sessp) { 5647 mutex_exit(&pidlock); 5648 mutex_exit(&stp->sd_lock); 5649 return (EPERM); 5650 } 5651 sid = stp->sd_sidp->pid_id; 5652 fg_pgid = q->p_pgrp; 5653 bg_pgid = stp->sd_pgidp->pid_id; 5654 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid); 5655 PID_RELE(stp->sd_pgidp); 5656 ctty_clear_sighuped(); 5657 stp->sd_pgidp = q->p_pgidp; 5658 PID_HOLD(stp->sd_pgidp); 5659 mutex_exit(&pidlock); 5660 mutex_exit(&stp->sd_lock); 5661 return (0); 5662 } 5663 5664 case TIOCGPGRP: 5665 { 5666 pid_t pgrp; 5667 5668 mutex_enter(&stp->sd_lock); 5669 if (stp->sd_sidp == NULL) { 5670 mutex_exit(&stp->sd_lock); 5671 return (ENOTTY); 5672 } 5673 pgrp = stp->sd_pgidp->pid_id; 5674 mutex_exit(&stp->sd_lock); 5675 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t), 5676 copyflag)); 5677 } 5678 5679 case TIOCSCTTY: 5680 { 5681 return (strctty(stp)); 5682 } 5683 5684 case TIOCNOTTY: 5685 { 5686 /* freectty() always assumes curproc. */ 5687 if (freectty(B_FALSE) != 0) 5688 return (0); 5689 return (ENOTTY); 5690 } 5691 5692 case FIONBIO: 5693 case FIOASYNC: 5694 return (0); /* handled by the upper layer */ 5695 } 5696 } 5697 5698 /* 5699 * Custom free routine used for M_PASSFP messages. 5700 */ 5701 static void 5702 free_passfp(struct k_strrecvfd *srf) 5703 { 5704 (void) closef(srf->fp); 5705 kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t)); 5706 } 5707 5708 /* ARGSUSED */ 5709 int 5710 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr) 5711 { 5712 queue_t *qp, *nextqp; 5713 struct k_strrecvfd *srf; 5714 mblk_t *mp; 5715 frtn_t *frtnp; 5716 size_t bufsize; 5717 queue_t *mate = NULL; 5718 syncq_t *sq = NULL; 5719 int retval = 0; 5720 5721 if (stp->sd_flag & STRHUP) 5722 return (ENXIO); 5723 5724 claimstr(stp->sd_wrq); 5725 5726 /* Fastpath, we have a pipe, and we are already mated, use it. */ 5727 if (STRMATED(stp)) { 5728 qp = _RD(stp->sd_mate->sd_wrq); 5729 claimstr(qp); 5730 mate = qp; 5731 } else { /* Not already mated. */ 5732 5733 /* 5734 * Walk the stream to the end of this one. 5735 * assumes that the claimstr() will prevent 5736 * plumbing between the stream head and the 5737 * driver from changing 5738 */ 5739 qp = stp->sd_wrq; 5740 5741 /* 5742 * Loop until we reach the end of this stream. 5743 * On completion, qp points to the write queue 5744 * at the end of the stream, or the read queue 5745 * at the stream head if this is a fifo. 5746 */ 5747 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp)) 5748 ; 5749 5750 /* 5751 * Just in case we get a q_next which is NULL, but 5752 * not at the end of the stream. This is actually 5753 * broken, so we set an assert to catch it in 5754 * debug, and set an error and return if not debug. 5755 */ 5756 ASSERT(qp); 5757 if (qp == NULL) { 5758 releasestr(stp->sd_wrq); 5759 return (EINVAL); 5760 } 5761 5762 /* 5763 * Enter the syncq for the driver, so (hopefully) 5764 * the queue values will not change on us. 5765 * XXXX - This will only prevent the race IFF only 5766 * the write side modifies the q_next member, and 5767 * the put procedure is protected by at least 5768 * MT_PERQ. 5769 */ 5770 if ((sq = qp->q_syncq) != NULL) 5771 entersq(sq, SQ_PUT); 5772 5773 /* Now get the q_next value from this qp. */ 5774 nextqp = qp->q_next; 5775 5776 /* 5777 * If nextqp exists and the other stream is different 5778 * from this one claim the stream, set the mate, and 5779 * get the read queue at the stream head of the other 5780 * stream. Assumes that nextqp was at least valid when 5781 * we got it. Hopefully the entersq of the driver 5782 * will prevent it from changing on us. 5783 */ 5784 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) { 5785 ASSERT(qp->q_qinfo->qi_srvp); 5786 ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp); 5787 ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp); 5788 claimstr(nextqp); 5789 5790 /* Make sure we still have a q_next */ 5791 if (nextqp != qp->q_next) { 5792 releasestr(stp->sd_wrq); 5793 releasestr(nextqp); 5794 return (EINVAL); 5795 } 5796 5797 qp = _RD(STREAM(nextqp)->sd_wrq); 5798 mate = qp; 5799 } 5800 /* If we entered the synq above, leave it. */ 5801 if (sq != NULL) 5802 leavesq(sq, SQ_PUT); 5803 } /* STRMATED(STP) */ 5804 5805 /* XXX prevents substitution of the ops vector */ 5806 if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) { 5807 retval = EINVAL; 5808 goto out; 5809 } 5810 5811 if (qp->q_flag & QFULL) { 5812 retval = EAGAIN; 5813 goto out; 5814 } 5815 5816 /* 5817 * Since M_PASSFP messages include a file descriptor, we use 5818 * esballoc() and specify a custom free routine (free_passfp()) that 5819 * will close the descriptor as part of freeing the message. For 5820 * convenience, we stash the frtn_t right after the data block. 5821 */ 5822 bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t); 5823 srf = kmem_alloc(bufsize, KM_NOSLEEP); 5824 if (srf == NULL) { 5825 retval = EAGAIN; 5826 goto out; 5827 } 5828 5829 frtnp = (frtn_t *)(srf + 1); 5830 frtnp->free_arg = (caddr_t)srf; 5831 frtnp->free_func = free_passfp; 5832 5833 mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp); 5834 if (mp == NULL) { 5835 kmem_free(srf, bufsize); 5836 retval = EAGAIN; 5837 goto out; 5838 } 5839 mp->b_wptr += sizeof (struct k_strrecvfd); 5840 mp->b_datap->db_type = M_PASSFP; 5841 5842 srf->fp = fp; 5843 srf->uid = crgetuid(curthread->t_cred); 5844 srf->gid = crgetgid(curthread->t_cred); 5845 mutex_enter(&fp->f_tlock); 5846 fp->f_count++; 5847 mutex_exit(&fp->f_tlock); 5848 5849 put(qp, mp); 5850 out: 5851 releasestr(stp->sd_wrq); 5852 if (mate) 5853 releasestr(mate); 5854 return (retval); 5855 } 5856 5857 /* 5858 * Send an ioctl message downstream and wait for acknowledgement. 5859 * flags may be set to either U_TO_K or K_TO_K and a combination 5860 * of STR_NOERROR or STR_NOSIG 5861 * STR_NOSIG: Signals are essentially ignored or held and have 5862 * no effect for the duration of the call. 5863 * STR_NOERROR: Ignores stream head read, write and hup errors. 5864 * Additionally, if an existing ioctl times out, it is assumed 5865 * lost and and this ioctl will continue as if the previous ioctl had 5866 * finished. ETIME may be returned if this ioctl times out (i.e. 5867 * ic_timout is not INFTIM). Non-stream head errors may be returned if 5868 * the ioc_error indicates that the driver/module had problems, 5869 * an EFAULT was found when accessing user data, a lack of 5870 * resources, etc. 5871 */ 5872 int 5873 strdoioctl( 5874 struct stdata *stp, 5875 struct strioctl *strioc, 5876 int fflags, /* file flags with model info */ 5877 int flag, 5878 cred_t *crp, 5879 int *rvalp) 5880 { 5881 mblk_t *bp; 5882 struct iocblk *iocbp; 5883 struct copyreq *reqp; 5884 struct copyresp *resp; 5885 int id; 5886 int transparent = 0; 5887 int error = 0; 5888 int len = 0; 5889 caddr_t taddr; 5890 int copyflag = (flag & (U_TO_K | K_TO_K)); 5891 int sigflag = (flag & STR_NOSIG); 5892 int errs; 5893 uint_t waitflags; 5894 boolean_t set_iocwaitne = B_FALSE; 5895 5896 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 5897 ASSERT((fflags & FMODELS) != 0); 5898 5899 TRACE_2(TR_FAC_STREAMS_FR, 5900 TR_STRDOIOCTL, 5901 "strdoioctl:stp %p strioc %p", stp, strioc); 5902 if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */ 5903 transparent = 1; 5904 strioc->ic_len = sizeof (intptr_t); 5905 } 5906 5907 if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz)) 5908 return (EINVAL); 5909 5910 if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error, 5911 crp, curproc->p_pid)) == NULL) 5912 return (error); 5913 5914 bzero(bp->b_wptr, sizeof (union ioctypes)); 5915 5916 iocbp = (struct iocblk *)bp->b_wptr; 5917 iocbp->ioc_count = strioc->ic_len; 5918 iocbp->ioc_cmd = strioc->ic_cmd; 5919 iocbp->ioc_flag = (fflags & FMODELS); 5920 5921 crhold(crp); 5922 iocbp->ioc_cr = crp; 5923 DB_TYPE(bp) = M_IOCTL; 5924 bp->b_wptr += sizeof (struct iocblk); 5925 5926 if (flag & STR_NOERROR) 5927 errs = STPLEX; 5928 else 5929 errs = STRHUP|STRDERR|STWRERR|STPLEX; 5930 5931 /* 5932 * If there is data to copy into ioctl block, do so. 5933 */ 5934 if (iocbp->ioc_count > 0) { 5935 if (transparent) 5936 /* 5937 * Note: STR_NOERROR does not have an effect 5938 * in putiocd() 5939 */ 5940 id = K_TO_K | sigflag; 5941 else 5942 id = flag; 5943 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) { 5944 freemsg(bp); 5945 crfree(crp); 5946 return (error); 5947 } 5948 5949 /* 5950 * We could have slept copying in user pages. 5951 * Recheck the stream head state (the other end 5952 * of a pipe could have gone away). 5953 */ 5954 if (stp->sd_flag & errs) { 5955 mutex_enter(&stp->sd_lock); 5956 error = strgeterr(stp, errs, 0); 5957 mutex_exit(&stp->sd_lock); 5958 if (error != 0) { 5959 freemsg(bp); 5960 crfree(crp); 5961 return (error); 5962 } 5963 } 5964 } 5965 if (transparent) 5966 iocbp->ioc_count = TRANSPARENT; 5967 5968 /* 5969 * Block for up to STRTIMOUT milliseconds if there is an outstanding 5970 * ioctl for this stream already running. All processes 5971 * sleeping here will be awakened as a result of an ACK 5972 * or NAK being received for the outstanding ioctl, or 5973 * as a result of the timer expiring on the outstanding 5974 * ioctl (a failure), or as a result of any waiting 5975 * process's timer expiring (also a failure). 5976 */ 5977 5978 error = 0; 5979 mutex_enter(&stp->sd_lock); 5980 while ((stp->sd_flag & IOCWAIT) || 5981 (!set_iocwaitne && (stp->sd_flag & IOCWAITNE))) { 5982 clock_t cv_rval; 5983 5984 TRACE_0(TR_FAC_STREAMS_FR, 5985 TR_STRDOIOCTL_WAIT, 5986 "strdoioctl sleeps - IOCWAIT"); 5987 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock, 5988 STRTIMOUT, sigflag); 5989 if (cv_rval <= 0) { 5990 if (cv_rval == 0) { 5991 error = EINTR; 5992 } else { 5993 if (flag & STR_NOERROR) { 5994 /* 5995 * Terminating current ioctl in 5996 * progress -- assume it got lost and 5997 * wake up the other thread so that the 5998 * operation completes. 5999 */ 6000 if (!(stp->sd_flag & IOCWAITNE)) { 6001 set_iocwaitne = B_TRUE; 6002 stp->sd_flag |= IOCWAITNE; 6003 cv_broadcast(&stp->sd_monitor); 6004 } 6005 /* 6006 * Otherwise, there's a running 6007 * STR_NOERROR -- we have no choice 6008 * here but to wait forever (or until 6009 * interrupted). 6010 */ 6011 } else { 6012 /* 6013 * pending ioctl has caused 6014 * us to time out 6015 */ 6016 error = ETIME; 6017 } 6018 } 6019 } else if ((stp->sd_flag & errs)) { 6020 error = strgeterr(stp, errs, 0); 6021 } 6022 if (error) { 6023 mutex_exit(&stp->sd_lock); 6024 freemsg(bp); 6025 crfree(crp); 6026 return (error); 6027 } 6028 } 6029 6030 /* 6031 * Have control of ioctl mechanism. 6032 * Send down ioctl packet and wait for response. 6033 */ 6034 if (stp->sd_iocblk != (mblk_t *)-1) { 6035 freemsg(stp->sd_iocblk); 6036 } 6037 stp->sd_iocblk = NULL; 6038 6039 /* 6040 * If this is marked with 'noerror' (internal; mostly 6041 * I_{P,}{UN,}LINK), then make sure nobody else is able to get 6042 * in here by setting IOCWAITNE. 6043 */ 6044 waitflags = IOCWAIT; 6045 if (flag & STR_NOERROR) 6046 waitflags |= IOCWAITNE; 6047 6048 stp->sd_flag |= waitflags; 6049 6050 /* 6051 * Assign sequence number. 6052 */ 6053 iocbp->ioc_id = stp->sd_iocid = getiocseqno(); 6054 6055 mutex_exit(&stp->sd_lock); 6056 6057 TRACE_1(TR_FAC_STREAMS_FR, 6058 TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp); 6059 stream_willservice(stp); 6060 putnext(stp->sd_wrq, bp); 6061 stream_runservice(stp); 6062 6063 /* 6064 * Timed wait for acknowledgment. The wait time is limited by the 6065 * timeout value, which must be a positive integer (number of 6066 * milliseconds) to wait, or 0 (use default value of STRTIMOUT 6067 * milliseconds), or -1 (wait forever). This will be awakened 6068 * either by an ACK/NAK message arriving, the timer expiring, or 6069 * the timer expiring on another ioctl waiting for control of the 6070 * mechanism. 6071 */ 6072 waitioc: 6073 mutex_enter(&stp->sd_lock); 6074 6075 6076 /* 6077 * If the reply has already arrived, don't sleep. If awakened from 6078 * the sleep, fail only if the reply has not arrived by then. 6079 * Otherwise, process the reply. 6080 */ 6081 while (!stp->sd_iocblk) { 6082 clock_t cv_rval; 6083 6084 if (stp->sd_flag & errs) { 6085 error = strgeterr(stp, errs, 0); 6086 if (error != 0) { 6087 stp->sd_flag &= ~waitflags; 6088 cv_broadcast(&stp->sd_iocmonitor); 6089 mutex_exit(&stp->sd_lock); 6090 crfree(crp); 6091 return (error); 6092 } 6093 } 6094 6095 TRACE_0(TR_FAC_STREAMS_FR, 6096 TR_STRDOIOCTL_WAIT2, 6097 "strdoioctl sleeps awaiting reply"); 6098 ASSERT(error == 0); 6099 6100 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, 6101 (strioc->ic_timout ? 6102 strioc->ic_timout * 1000 : STRTIMOUT), sigflag); 6103 6104 /* 6105 * There are four possible cases here: interrupt, timeout, 6106 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a 6107 * valid M_IOCTL reply). 6108 * 6109 * If we've been awakened by a STR_NOERROR ioctl on some other 6110 * thread, then sd_iocblk will still be NULL, and IOCWAITNE 6111 * will be set. Pretend as if we just timed out. Note that 6112 * this other thread waited at least STRTIMOUT before trying to 6113 * awaken our thread, so this is indistinguishable (even for 6114 * INFTIM) from the case where we failed with ETIME waiting on 6115 * IOCWAIT in the prior loop. 6116 */ 6117 if (cv_rval > 0 && !(flag & STR_NOERROR) && 6118 stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) { 6119 cv_rval = -1; 6120 } 6121 6122 /* 6123 * note: STR_NOERROR does not protect 6124 * us here.. use ic_timout < 0 6125 */ 6126 if (cv_rval <= 0) { 6127 if (cv_rval == 0) { 6128 error = EINTR; 6129 } else { 6130 error = ETIME; 6131 } 6132 /* 6133 * A message could have come in after we were scheduled 6134 * but before we were actually run. 6135 */ 6136 bp = stp->sd_iocblk; 6137 stp->sd_iocblk = NULL; 6138 if (bp != NULL) { 6139 if ((bp->b_datap->db_type == M_COPYIN) || 6140 (bp->b_datap->db_type == M_COPYOUT)) { 6141 mutex_exit(&stp->sd_lock); 6142 if (bp->b_cont) { 6143 freemsg(bp->b_cont); 6144 bp->b_cont = NULL; 6145 } 6146 bp->b_datap->db_type = M_IOCDATA; 6147 bp->b_wptr = bp->b_rptr + 6148 sizeof (struct copyresp); 6149 resp = (struct copyresp *)bp->b_rptr; 6150 resp->cp_rval = 6151 (caddr_t)1; /* failure */ 6152 stream_willservice(stp); 6153 putnext(stp->sd_wrq, bp); 6154 stream_runservice(stp); 6155 mutex_enter(&stp->sd_lock); 6156 } else { 6157 freemsg(bp); 6158 } 6159 } 6160 stp->sd_flag &= ~waitflags; 6161 cv_broadcast(&stp->sd_iocmonitor); 6162 mutex_exit(&stp->sd_lock); 6163 crfree(crp); 6164 return (error); 6165 } 6166 } 6167 bp = stp->sd_iocblk; 6168 /* 6169 * Note: it is strictly impossible to get here with sd_iocblk set to 6170 * -1. This is because the initial loop above doesn't allow any new 6171 * ioctls into the fray until all others have passed this point. 6172 */ 6173 ASSERT(bp != NULL && bp != (mblk_t *)-1); 6174 TRACE_1(TR_FAC_STREAMS_FR, 6175 TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp); 6176 if ((bp->b_datap->db_type == M_IOCACK) || 6177 (bp->b_datap->db_type == M_IOCNAK)) { 6178 /* for detection of duplicate ioctl replies */ 6179 stp->sd_iocblk = (mblk_t *)-1; 6180 stp->sd_flag &= ~waitflags; 6181 cv_broadcast(&stp->sd_iocmonitor); 6182 mutex_exit(&stp->sd_lock); 6183 } else { 6184 /* 6185 * flags not cleared here because we're still doing 6186 * copy in/out for ioctl. 6187 */ 6188 stp->sd_iocblk = NULL; 6189 mutex_exit(&stp->sd_lock); 6190 } 6191 6192 6193 /* 6194 * Have received acknowledgment. 6195 */ 6196 6197 switch (bp->b_datap->db_type) { 6198 case M_IOCACK: 6199 /* 6200 * Positive ack. 6201 */ 6202 iocbp = (struct iocblk *)bp->b_rptr; 6203 6204 /* 6205 * Set error if indicated. 6206 */ 6207 if (iocbp->ioc_error) { 6208 error = iocbp->ioc_error; 6209 break; 6210 } 6211 6212 /* 6213 * Set return value. 6214 */ 6215 *rvalp = iocbp->ioc_rval; 6216 6217 /* 6218 * Data may have been returned in ACK message (ioc_count > 0). 6219 * If so, copy it out to the user's buffer. 6220 */ 6221 if (iocbp->ioc_count && !transparent) { 6222 if (error = getiocd(bp, strioc->ic_dp, copyflag)) 6223 break; 6224 } 6225 if (!transparent) { 6226 if (len) /* an M_COPYOUT was used with I_STR */ 6227 strioc->ic_len = len; 6228 else 6229 strioc->ic_len = (int)iocbp->ioc_count; 6230 } 6231 break; 6232 6233 case M_IOCNAK: 6234 /* 6235 * Negative ack. 6236 * 6237 * The only thing to do is set error as specified 6238 * in neg ack packet. 6239 */ 6240 iocbp = (struct iocblk *)bp->b_rptr; 6241 6242 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL); 6243 break; 6244 6245 case M_COPYIN: 6246 /* 6247 * Driver or module has requested user ioctl data. 6248 */ 6249 reqp = (struct copyreq *)bp->b_rptr; 6250 6251 /* 6252 * M_COPYIN should *never* have a message attached, though 6253 * it's harmless if it does -- thus, panic on a DEBUG 6254 * kernel and just free it on a non-DEBUG build. 6255 */ 6256 ASSERT(bp->b_cont == NULL); 6257 if (bp->b_cont != NULL) { 6258 freemsg(bp->b_cont); 6259 bp->b_cont = NULL; 6260 } 6261 6262 error = putiocd(bp, reqp->cq_addr, flag, crp); 6263 if (error && bp->b_cont) { 6264 freemsg(bp->b_cont); 6265 bp->b_cont = NULL; 6266 } 6267 6268 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6269 bp->b_datap->db_type = M_IOCDATA; 6270 6271 mblk_setcred(bp, crp, curproc->p_pid); 6272 resp = (struct copyresp *)bp->b_rptr; 6273 resp->cp_rval = (caddr_t)(uintptr_t)error; 6274 resp->cp_flag = (fflags & FMODELS); 6275 6276 stream_willservice(stp); 6277 putnext(stp->sd_wrq, bp); 6278 stream_runservice(stp); 6279 6280 if (error) { 6281 mutex_enter(&stp->sd_lock); 6282 stp->sd_flag &= ~waitflags; 6283 cv_broadcast(&stp->sd_iocmonitor); 6284 mutex_exit(&stp->sd_lock); 6285 crfree(crp); 6286 return (error); 6287 } 6288 6289 goto waitioc; 6290 6291 case M_COPYOUT: 6292 /* 6293 * Driver or module has ioctl data for a user. 6294 */ 6295 reqp = (struct copyreq *)bp->b_rptr; 6296 ASSERT(bp->b_cont != NULL); 6297 6298 /* 6299 * Always (transparent or non-transparent ) 6300 * use the address specified in the request 6301 */ 6302 taddr = reqp->cq_addr; 6303 if (!transparent) 6304 len = (int)reqp->cq_size; 6305 6306 /* copyout data to the provided address */ 6307 error = getiocd(bp, taddr, copyflag); 6308 6309 freemsg(bp->b_cont); 6310 bp->b_cont = NULL; 6311 6312 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6313 bp->b_datap->db_type = M_IOCDATA; 6314 6315 mblk_setcred(bp, crp, curproc->p_pid); 6316 resp = (struct copyresp *)bp->b_rptr; 6317 resp->cp_rval = (caddr_t)(uintptr_t)error; 6318 resp->cp_flag = (fflags & FMODELS); 6319 6320 stream_willservice(stp); 6321 putnext(stp->sd_wrq, bp); 6322 stream_runservice(stp); 6323 6324 if (error) { 6325 mutex_enter(&stp->sd_lock); 6326 stp->sd_flag &= ~waitflags; 6327 cv_broadcast(&stp->sd_iocmonitor); 6328 mutex_exit(&stp->sd_lock); 6329 crfree(crp); 6330 return (error); 6331 } 6332 goto waitioc; 6333 6334 default: 6335 ASSERT(0); 6336 mutex_enter(&stp->sd_lock); 6337 stp->sd_flag &= ~waitflags; 6338 cv_broadcast(&stp->sd_iocmonitor); 6339 mutex_exit(&stp->sd_lock); 6340 break; 6341 } 6342 6343 freemsg(bp); 6344 crfree(crp); 6345 return (error); 6346 } 6347 6348 /* 6349 * Send an M_CMD message downstream and wait for a reply. This is a ptools 6350 * special used to retrieve information from modules/drivers a stream without 6351 * being subjected to flow control or interfering with pending messages on the 6352 * stream (e.g. an ioctl in flight). 6353 */ 6354 int 6355 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp) 6356 { 6357 mblk_t *mp; 6358 struct cmdblk *cmdp; 6359 int error = 0; 6360 int errs = STRHUP|STRDERR|STWRERR|STPLEX; 6361 clock_t rval, timeout = STRTIMOUT; 6362 6363 if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) || 6364 scp->sc_timeout < -1) 6365 return (EINVAL); 6366 6367 if (scp->sc_timeout > 0) 6368 timeout = scp->sc_timeout * MILLISEC; 6369 6370 if ((mp = allocb_cred(sizeof (struct cmdblk), crp, 6371 curproc->p_pid)) == NULL) 6372 return (ENOMEM); 6373 6374 crhold(crp); 6375 6376 cmdp = (struct cmdblk *)mp->b_wptr; 6377 cmdp->cb_cr = crp; 6378 cmdp->cb_cmd = scp->sc_cmd; 6379 cmdp->cb_len = scp->sc_len; 6380 cmdp->cb_error = 0; 6381 mp->b_wptr += sizeof (struct cmdblk); 6382 6383 DB_TYPE(mp) = M_CMD; 6384 DB_CPID(mp) = curproc->p_pid; 6385 6386 /* 6387 * Copy in the payload. 6388 */ 6389 if (cmdp->cb_len > 0) { 6390 mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp, 6391 curproc->p_pid); 6392 if (mp->b_cont == NULL) { 6393 error = ENOMEM; 6394 goto out; 6395 } 6396 6397 /* cb_len comes from sc_len, which has already been checked */ 6398 ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf)); 6399 (void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len); 6400 mp->b_cont->b_wptr += cmdp->cb_len; 6401 DB_CPID(mp->b_cont) = curproc->p_pid; 6402 } 6403 6404 /* 6405 * Since this mechanism is strictly for ptools, and since only one 6406 * process can be grabbed at a time, we simply fail if there's 6407 * currently an operation pending. 6408 */ 6409 mutex_enter(&stp->sd_lock); 6410 if (stp->sd_flag & STRCMDWAIT) { 6411 mutex_exit(&stp->sd_lock); 6412 error = EBUSY; 6413 goto out; 6414 } 6415 stp->sd_flag |= STRCMDWAIT; 6416 ASSERT(stp->sd_cmdblk == NULL); 6417 mutex_exit(&stp->sd_lock); 6418 6419 putnext(stp->sd_wrq, mp); 6420 mp = NULL; 6421 6422 /* 6423 * Timed wait for acknowledgment. If the reply has already arrived, 6424 * don't sleep. If awakened from the sleep, fail only if the reply 6425 * has not arrived by then. Otherwise, process the reply. 6426 */ 6427 mutex_enter(&stp->sd_lock); 6428 while (stp->sd_cmdblk == NULL) { 6429 if (stp->sd_flag & errs) { 6430 if ((error = strgeterr(stp, errs, 0)) != 0) 6431 goto waitout; 6432 } 6433 6434 rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0); 6435 if (stp->sd_cmdblk != NULL) 6436 break; 6437 6438 if (rval <= 0) { 6439 error = (rval == 0) ? EINTR : ETIME; 6440 goto waitout; 6441 } 6442 } 6443 6444 /* 6445 * We received a reply. 6446 */ 6447 mp = stp->sd_cmdblk; 6448 stp->sd_cmdblk = NULL; 6449 ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD); 6450 ASSERT(stp->sd_flag & STRCMDWAIT); 6451 stp->sd_flag &= ~STRCMDWAIT; 6452 mutex_exit(&stp->sd_lock); 6453 6454 cmdp = (struct cmdblk *)mp->b_rptr; 6455 if ((error = cmdp->cb_error) != 0) 6456 goto out; 6457 6458 /* 6459 * Data may have been returned in the reply (cb_len > 0). 6460 * If so, copy it out to the user's buffer. 6461 */ 6462 if (cmdp->cb_len > 0) { 6463 if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) { 6464 error = EPROTO; 6465 goto out; 6466 } 6467 6468 cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf)); 6469 (void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len); 6470 } 6471 scp->sc_len = cmdp->cb_len; 6472 out: 6473 freemsg(mp); 6474 crfree(crp); 6475 return (error); 6476 waitout: 6477 ASSERT(stp->sd_cmdblk == NULL); 6478 stp->sd_flag &= ~STRCMDWAIT; 6479 mutex_exit(&stp->sd_lock); 6480 crfree(crp); 6481 return (error); 6482 } 6483 6484 /* 6485 * For the SunOS keyboard driver. 6486 * Return the next available "ioctl" sequence number. 6487 * Exported, so that streams modules can send "ioctl" messages 6488 * downstream from their open routine. 6489 */ 6490 int 6491 getiocseqno(void) 6492 { 6493 int i; 6494 6495 mutex_enter(&strresources); 6496 i = ++ioc_id; 6497 mutex_exit(&strresources); 6498 return (i); 6499 } 6500 6501 /* 6502 * Get the next message from the read queue. If the message is 6503 * priority, STRPRI will have been set by strrput(). This flag 6504 * should be reset only when the entire message at the front of the 6505 * queue as been consumed. 6506 * 6507 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 6508 */ 6509 int 6510 strgetmsg( 6511 struct vnode *vp, 6512 struct strbuf *mctl, 6513 struct strbuf *mdata, 6514 unsigned char *prip, 6515 int *flagsp, 6516 int fmode, 6517 rval_t *rvp) 6518 { 6519 struct stdata *stp; 6520 mblk_t *bp, *nbp; 6521 mblk_t *savemp = NULL; 6522 mblk_t *savemptail = NULL; 6523 uint_t old_sd_flag; 6524 int flg; 6525 int more = 0; 6526 int error = 0; 6527 char first = 1; 6528 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 6529 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 6530 unsigned char pri = 0; 6531 queue_t *q; 6532 int pr = 0; /* Partial read successful */ 6533 struct uio uios; 6534 struct uio *uiop = &uios; 6535 struct iovec iovs; 6536 unsigned char type; 6537 6538 TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER, 6539 "strgetmsg:%p", vp); 6540 6541 ASSERT(vp->v_stream); 6542 stp = vp->v_stream; 6543 rvp->r_val1 = 0; 6544 6545 mutex_enter(&stp->sd_lock); 6546 6547 if ((error = i_straccess(stp, JCREAD)) != 0) { 6548 mutex_exit(&stp->sd_lock); 6549 return (error); 6550 } 6551 6552 if (stp->sd_flag & (STRDERR|STPLEX)) { 6553 error = strgeterr(stp, STRDERR|STPLEX, 0); 6554 if (error != 0) { 6555 mutex_exit(&stp->sd_lock); 6556 return (error); 6557 } 6558 } 6559 mutex_exit(&stp->sd_lock); 6560 6561 switch (*flagsp) { 6562 case MSG_HIPRI: 6563 if (*prip != 0) 6564 return (EINVAL); 6565 break; 6566 6567 case MSG_ANY: 6568 case MSG_BAND: 6569 break; 6570 6571 default: 6572 return (EINVAL); 6573 } 6574 /* 6575 * Setup uio and iov for data part 6576 */ 6577 iovs.iov_base = mdata->buf; 6578 iovs.iov_len = mdata->maxlen; 6579 uios.uio_iov = &iovs; 6580 uios.uio_iovcnt = 1; 6581 uios.uio_loffset = 0; 6582 uios.uio_segflg = UIO_USERSPACE; 6583 uios.uio_fmode = 0; 6584 uios.uio_extflg = UIO_COPY_CACHED; 6585 uios.uio_resid = mdata->maxlen; 6586 uios.uio_offset = 0; 6587 6588 q = _RD(stp->sd_wrq); 6589 mutex_enter(&stp->sd_lock); 6590 old_sd_flag = stp->sd_flag; 6591 mark = 0; 6592 for (;;) { 6593 int done = 0; 6594 mblk_t *q_first = q->q_first; 6595 6596 /* 6597 * Get the next message of appropriate priority 6598 * from the stream head. If the caller is interested 6599 * in band or hipri messages, then they should already 6600 * be enqueued at the stream head. On the other hand 6601 * if the caller wants normal (band 0) messages, they 6602 * might be deferred in a synchronous stream and they 6603 * will need to be pulled up. 6604 * 6605 * After we have dequeued a message, we might find that 6606 * it was a deferred M_SIG that was enqueued at the 6607 * stream head. It must now be posted as part of the 6608 * read by calling strsignal_nolock(). 6609 * 6610 * Also note that strrput does not enqueue an M_PCSIG, 6611 * and there cannot be more than one hipri message, 6612 * so there was no need to have the M_PCSIG case. 6613 * 6614 * At some time it might be nice to try and wrap the 6615 * functionality of kstrgetmsg() and strgetmsg() into 6616 * a common routine so to reduce the amount of replicated 6617 * code (since they are extremely similar). 6618 */ 6619 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) { 6620 /* Asking for normal, band0 data */ 6621 bp = strget(stp, q, uiop, first, &error); 6622 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6623 if (bp != NULL) { 6624 if (DB_TYPE(bp) == M_SIG) { 6625 strsignal_nolock(stp, *bp->b_rptr, 6626 bp->b_band); 6627 freemsg(bp); 6628 continue; 6629 } else { 6630 break; 6631 } 6632 } 6633 if (error != 0) 6634 goto getmout; 6635 6636 /* 6637 * We can't depend on the value of STRPRI here because 6638 * the stream head may be in transit. Therefore, we 6639 * must look at the type of the first message to 6640 * determine if a high priority messages is waiting 6641 */ 6642 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL && 6643 DB_TYPE(q_first) >= QPCTL && 6644 (bp = getq_noenab(q, 0)) != NULL) { 6645 /* Asked for HIPRI and got one */ 6646 ASSERT(DB_TYPE(bp) >= QPCTL); 6647 break; 6648 } else if ((*flagsp & MSG_BAND) && q_first != NULL && 6649 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) && 6650 (bp = getq_noenab(q, 0)) != NULL) { 6651 /* 6652 * Asked for at least band "prip" and got either at 6653 * least that band or a hipri message. 6654 */ 6655 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL); 6656 if (DB_TYPE(bp) == M_SIG) { 6657 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 6658 freemsg(bp); 6659 continue; 6660 } else { 6661 break; 6662 } 6663 } 6664 6665 /* No data. Time to sleep? */ 6666 qbackenable(q, 0); 6667 6668 /* 6669 * If STRHUP or STREOF, return 0 length control and data. 6670 * If resid is 0, then a read(fd,buf,0) was done. Do not 6671 * sleep to satisfy this request because by default we have 6672 * zero bytes to return. 6673 */ 6674 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 && 6675 mdata->maxlen == 0)) { 6676 mctl->len = mdata->len = 0; 6677 *flagsp = 0; 6678 mutex_exit(&stp->sd_lock); 6679 return (0); 6680 } 6681 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT, 6682 "strgetmsg calls strwaitq:%p, %p", 6683 vp, uiop); 6684 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1, 6685 &done)) != 0) || done) { 6686 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE, 6687 "strgetmsg error or done:%p, %p", 6688 vp, uiop); 6689 mutex_exit(&stp->sd_lock); 6690 return (error); 6691 } 6692 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE, 6693 "strgetmsg awakes:%p, %p", vp, uiop); 6694 if ((error = i_straccess(stp, JCREAD)) != 0) { 6695 mutex_exit(&stp->sd_lock); 6696 return (error); 6697 } 6698 first = 0; 6699 } 6700 ASSERT(bp != NULL); 6701 /* 6702 * Extract any mark information. If the message is not completely 6703 * consumed this information will be put in the mblk 6704 * that is putback. 6705 * If MSGMARKNEXT is set and the message is completely consumed 6706 * the STRATMARK flag will be set below. Likewise, if 6707 * MSGNOTMARKNEXT is set and the message is 6708 * completely consumed STRNOTATMARK will be set. 6709 */ 6710 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 6711 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 6712 (MSGMARKNEXT|MSGNOTMARKNEXT)); 6713 if (mark != 0 && bp == stp->sd_mark) { 6714 mark |= _LASTMARK; 6715 stp->sd_mark = NULL; 6716 } 6717 /* 6718 * keep track of the original message type and priority 6719 */ 6720 pri = bp->b_band; 6721 type = bp->b_datap->db_type; 6722 if (type == M_PASSFP) { 6723 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 6724 stp->sd_mark = bp; 6725 bp->b_flag |= mark & ~_LASTMARK; 6726 putback(stp, q, bp, pri); 6727 qbackenable(q, pri); 6728 mutex_exit(&stp->sd_lock); 6729 return (EBADMSG); 6730 } 6731 ASSERT(type != M_SIG); 6732 6733 /* 6734 * Set this flag so strrput will not generate signals. Need to 6735 * make sure this flag is cleared before leaving this routine 6736 * else signals will stop being sent. 6737 */ 6738 stp->sd_flag |= STRGETINPROG; 6739 mutex_exit(&stp->sd_lock); 6740 6741 if (STREAM_NEEDSERVICE(stp)) 6742 stream_runservice(stp); 6743 6744 /* 6745 * Set HIPRI flag if message is priority. 6746 */ 6747 if (type >= QPCTL) 6748 flg = MSG_HIPRI; 6749 else 6750 flg = MSG_BAND; 6751 6752 /* 6753 * First process PROTO or PCPROTO blocks, if any. 6754 */ 6755 if (mctl->maxlen >= 0 && type != M_DATA) { 6756 size_t n, bcnt; 6757 char *ubuf; 6758 6759 bcnt = mctl->maxlen; 6760 ubuf = mctl->buf; 6761 while (bp != NULL && bp->b_datap->db_type != M_DATA) { 6762 if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 && 6763 copyout(bp->b_rptr, ubuf, n)) { 6764 error = EFAULT; 6765 mutex_enter(&stp->sd_lock); 6766 /* 6767 * clear stream head pri flag based on 6768 * first message type 6769 */ 6770 if (type >= QPCTL) { 6771 ASSERT(type == M_PCPROTO); 6772 stp->sd_flag &= ~STRPRI; 6773 } 6774 more = 0; 6775 freemsg(bp); 6776 goto getmout; 6777 } 6778 ubuf += n; 6779 bp->b_rptr += n; 6780 if (bp->b_rptr >= bp->b_wptr) { 6781 nbp = bp; 6782 bp = bp->b_cont; 6783 freeb(nbp); 6784 } 6785 ASSERT(n <= bcnt); 6786 bcnt -= n; 6787 if (bcnt == 0) 6788 break; 6789 } 6790 mctl->len = mctl->maxlen - bcnt; 6791 } else 6792 mctl->len = -1; 6793 6794 if (bp && bp->b_datap->db_type != M_DATA) { 6795 /* 6796 * More PROTO blocks in msg. 6797 */ 6798 more |= MORECTL; 6799 savemp = bp; 6800 while (bp && bp->b_datap->db_type != M_DATA) { 6801 savemptail = bp; 6802 bp = bp->b_cont; 6803 } 6804 savemptail->b_cont = NULL; 6805 } 6806 6807 /* 6808 * Now process DATA blocks, if any. 6809 */ 6810 if (mdata->maxlen >= 0 && bp) { 6811 /* 6812 * struiocopyout will consume a potential zero-length 6813 * M_DATA even if uio_resid is zero. 6814 */ 6815 size_t oldresid = uiop->uio_resid; 6816 6817 bp = struiocopyout(bp, uiop, &error); 6818 if (error != 0) { 6819 mutex_enter(&stp->sd_lock); 6820 /* 6821 * clear stream head hi pri flag based on 6822 * first message 6823 */ 6824 if (type >= QPCTL) { 6825 ASSERT(type == M_PCPROTO); 6826 stp->sd_flag &= ~STRPRI; 6827 } 6828 more = 0; 6829 freemsg(savemp); 6830 goto getmout; 6831 } 6832 /* 6833 * (pr == 1) indicates a partial read. 6834 */ 6835 if (oldresid > uiop->uio_resid) 6836 pr = 1; 6837 mdata->len = mdata->maxlen - uiop->uio_resid; 6838 } else 6839 mdata->len = -1; 6840 6841 if (bp) { /* more data blocks in msg */ 6842 more |= MOREDATA; 6843 if (savemp) 6844 savemptail->b_cont = bp; 6845 else 6846 savemp = bp; 6847 } 6848 6849 mutex_enter(&stp->sd_lock); 6850 if (savemp) { 6851 if (pr && (savemp->b_datap->db_type == M_DATA) && 6852 msgnodata(savemp)) { 6853 /* 6854 * Avoid queuing a zero-length tail part of 6855 * a message. pr=1 indicates that we read some of 6856 * the message. 6857 */ 6858 freemsg(savemp); 6859 more &= ~MOREDATA; 6860 /* 6861 * clear stream head hi pri flag based on 6862 * first message 6863 */ 6864 if (type >= QPCTL) { 6865 ASSERT(type == M_PCPROTO); 6866 stp->sd_flag &= ~STRPRI; 6867 } 6868 } else { 6869 savemp->b_band = pri; 6870 /* 6871 * If the first message was HIPRI and the one we're 6872 * putting back isn't, then clear STRPRI, otherwise 6873 * set STRPRI again. Note that we must set STRPRI 6874 * again since the flush logic in strrput_nondata() 6875 * may have cleared it while we had sd_lock dropped. 6876 */ 6877 if (type >= QPCTL) { 6878 ASSERT(type == M_PCPROTO); 6879 if (queclass(savemp) < QPCTL) 6880 stp->sd_flag &= ~STRPRI; 6881 else 6882 stp->sd_flag |= STRPRI; 6883 } else if (queclass(savemp) >= QPCTL) { 6884 /* 6885 * The first message was not a HIPRI message, 6886 * but the one we are about to putback is. 6887 * For simplicitly, we do not allow for HIPRI 6888 * messages to be embedded in the message 6889 * body, so just force it to same type as 6890 * first message. 6891 */ 6892 ASSERT(type == M_DATA || type == M_PROTO); 6893 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 6894 savemp->b_datap->db_type = type; 6895 } 6896 if (mark != 0) { 6897 savemp->b_flag |= mark & ~_LASTMARK; 6898 if ((mark & _LASTMARK) && 6899 (stp->sd_mark == NULL)) { 6900 /* 6901 * If another marked message arrived 6902 * while sd_lock was not held sd_mark 6903 * would be non-NULL. 6904 */ 6905 stp->sd_mark = savemp; 6906 } 6907 } 6908 putback(stp, q, savemp, pri); 6909 } 6910 } else { 6911 /* 6912 * The complete message was consumed. 6913 * 6914 * If another M_PCPROTO arrived while sd_lock was not held 6915 * it would have been discarded since STRPRI was still set. 6916 * 6917 * Move the MSG*MARKNEXT information 6918 * to the stream head just in case 6919 * the read queue becomes empty. 6920 * clear stream head hi pri flag based on 6921 * first message 6922 * 6923 * If the stream head was at the mark 6924 * (STRATMARK) before we dropped sd_lock above 6925 * and some data was consumed then we have 6926 * moved past the mark thus STRATMARK is 6927 * cleared. However, if a message arrived in 6928 * strrput during the copyout above causing 6929 * STRATMARK to be set we can not clear that 6930 * flag. 6931 */ 6932 if (type >= QPCTL) { 6933 ASSERT(type == M_PCPROTO); 6934 stp->sd_flag &= ~STRPRI; 6935 } 6936 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 6937 if (mark & MSGMARKNEXT) { 6938 stp->sd_flag &= ~STRNOTATMARK; 6939 stp->sd_flag |= STRATMARK; 6940 } else if (mark & MSGNOTMARKNEXT) { 6941 stp->sd_flag &= ~STRATMARK; 6942 stp->sd_flag |= STRNOTATMARK; 6943 } else { 6944 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 6945 } 6946 } else if (pr && (old_sd_flag & STRATMARK)) { 6947 stp->sd_flag &= ~STRATMARK; 6948 } 6949 } 6950 6951 *flagsp = flg; 6952 *prip = pri; 6953 6954 /* 6955 * Getmsg cleanup processing - if the state of the queue has changed 6956 * some signals may need to be sent and/or poll awakened. 6957 */ 6958 getmout: 6959 qbackenable(q, pri); 6960 6961 /* 6962 * We dropped the stream head lock above. Send all M_SIG messages 6963 * before processing stream head for SIGPOLL messages. 6964 */ 6965 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6966 while ((bp = q->q_first) != NULL && 6967 (bp->b_datap->db_type == M_SIG)) { 6968 /* 6969 * sd_lock is held so the content of the read queue can not 6970 * change. 6971 */ 6972 bp = getq(q); 6973 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 6974 6975 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 6976 mutex_exit(&stp->sd_lock); 6977 freemsg(bp); 6978 if (STREAM_NEEDSERVICE(stp)) 6979 stream_runservice(stp); 6980 mutex_enter(&stp->sd_lock); 6981 } 6982 6983 /* 6984 * stream head cannot change while we make the determination 6985 * whether or not to send a signal. Drop the flag to allow strrput 6986 * to send firstmsgsigs again. 6987 */ 6988 stp->sd_flag &= ~STRGETINPROG; 6989 6990 /* 6991 * If the type of message at the front of the queue changed 6992 * due to the receive the appropriate signals and pollwakeup events 6993 * are generated. The type of changes are: 6994 * Processed a hipri message, q_first is not hipri. 6995 * Processed a band X message, and q_first is band Y. 6996 * The generated signals and pollwakeups are identical to what 6997 * strrput() generates should the message that is now on q_first 6998 * arrive to an empty read queue. 6999 * 7000 * Note: only strrput will send a signal for a hipri message. 7001 */ 7002 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7003 strsigset_t signals = 0; 7004 strpollset_t pollwakeups = 0; 7005 7006 if (flg & MSG_HIPRI) { 7007 /* 7008 * Removed a hipri message. Regular data at 7009 * the front of the queue. 7010 */ 7011 if (bp->b_band == 0) { 7012 signals = S_INPUT | S_RDNORM; 7013 pollwakeups = POLLIN | POLLRDNORM; 7014 } else { 7015 signals = S_INPUT | S_RDBAND; 7016 pollwakeups = POLLIN | POLLRDBAND; 7017 } 7018 } else if (pri != bp->b_band) { 7019 /* 7020 * The band is different for the new q_first. 7021 */ 7022 if (bp->b_band == 0) { 7023 signals = S_RDNORM; 7024 pollwakeups = POLLIN | POLLRDNORM; 7025 } else { 7026 signals = S_RDBAND; 7027 pollwakeups = POLLIN | POLLRDBAND; 7028 } 7029 } 7030 7031 if (pollwakeups != 0) { 7032 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7033 if (!(stp->sd_rput_opt & SR_POLLIN)) 7034 goto no_pollwake; 7035 stp->sd_rput_opt &= ~SR_POLLIN; 7036 } 7037 mutex_exit(&stp->sd_lock); 7038 pollwakeup(&stp->sd_pollist, pollwakeups); 7039 mutex_enter(&stp->sd_lock); 7040 } 7041 no_pollwake: 7042 7043 if (stp->sd_sigflags & signals) 7044 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7045 } 7046 mutex_exit(&stp->sd_lock); 7047 7048 rvp->r_val1 = more; 7049 return (error); 7050 #undef _LASTMARK 7051 } 7052 7053 /* 7054 * Get the next message from the read queue. If the message is 7055 * priority, STRPRI will have been set by strrput(). This flag 7056 * should be reset only when the entire message at the front of the 7057 * queue as been consumed. 7058 * 7059 * If uiop is NULL all data is returned in mctlp. 7060 * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed 7061 * not enabled. 7062 * The timeout parameter is in milliseconds; -1 for infinity. 7063 * This routine handles the consolidation private flags: 7064 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7065 * MSG_DELAYERROR Defer the error check until the queue is empty. 7066 * MSG_HOLDSIG Hold signals while waiting for data. 7067 * MSG_IPEEK Only peek at messages. 7068 * MSG_DISCARDTAIL Discard the tail M_DATA part of the message 7069 * that doesn't fit. 7070 * MSG_NOMARK If the message is marked leave it on the queue. 7071 * 7072 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 7073 */ 7074 int 7075 kstrgetmsg( 7076 struct vnode *vp, 7077 mblk_t **mctlp, 7078 struct uio *uiop, 7079 unsigned char *prip, 7080 int *flagsp, 7081 clock_t timout, 7082 rval_t *rvp) 7083 { 7084 struct stdata *stp; 7085 mblk_t *bp, *nbp; 7086 mblk_t *savemp = NULL; 7087 mblk_t *savemptail = NULL; 7088 int flags; 7089 uint_t old_sd_flag; 7090 int flg; 7091 int more = 0; 7092 int error = 0; 7093 char first = 1; 7094 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 7095 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 7096 unsigned char pri = 0; 7097 queue_t *q; 7098 int pr = 0; /* Partial read successful */ 7099 unsigned char type; 7100 7101 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER, 7102 "kstrgetmsg:%p", vp); 7103 7104 ASSERT(vp->v_stream); 7105 stp = vp->v_stream; 7106 rvp->r_val1 = 0; 7107 7108 mutex_enter(&stp->sd_lock); 7109 7110 if ((error = i_straccess(stp, JCREAD)) != 0) { 7111 mutex_exit(&stp->sd_lock); 7112 return (error); 7113 } 7114 7115 flags = *flagsp; 7116 if (stp->sd_flag & (STRDERR|STPLEX)) { 7117 if ((stp->sd_flag & STPLEX) || 7118 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) { 7119 error = strgeterr(stp, STRDERR|STPLEX, 7120 (flags & MSG_IPEEK)); 7121 if (error != 0) { 7122 mutex_exit(&stp->sd_lock); 7123 return (error); 7124 } 7125 } 7126 } 7127 mutex_exit(&stp->sd_lock); 7128 7129 switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) { 7130 case MSG_HIPRI: 7131 if (*prip != 0) 7132 return (EINVAL); 7133 break; 7134 7135 case MSG_ANY: 7136 case MSG_BAND: 7137 break; 7138 7139 default: 7140 return (EINVAL); 7141 } 7142 7143 retry: 7144 q = _RD(stp->sd_wrq); 7145 mutex_enter(&stp->sd_lock); 7146 old_sd_flag = stp->sd_flag; 7147 mark = 0; 7148 for (;;) { 7149 int done = 0; 7150 int waitflag; 7151 int fmode; 7152 mblk_t *q_first = q->q_first; 7153 7154 /* 7155 * This section of the code operates just like the code 7156 * in strgetmsg(). There is a comment there about what 7157 * is going on here. 7158 */ 7159 if (!(flags & (MSG_HIPRI|MSG_BAND))) { 7160 /* Asking for normal, band0 data */ 7161 bp = strget(stp, q, uiop, first, &error); 7162 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7163 if (bp != NULL) { 7164 if (DB_TYPE(bp) == M_SIG) { 7165 strsignal_nolock(stp, *bp->b_rptr, 7166 bp->b_band); 7167 freemsg(bp); 7168 continue; 7169 } else { 7170 break; 7171 } 7172 } 7173 if (error != 0) { 7174 goto getmout; 7175 } 7176 /* 7177 * We can't depend on the value of STRPRI here because 7178 * the stream head may be in transit. Therefore, we 7179 * must look at the type of the first message to 7180 * determine if a high priority messages is waiting 7181 */ 7182 } else if ((flags & MSG_HIPRI) && q_first != NULL && 7183 DB_TYPE(q_first) >= QPCTL && 7184 (bp = getq_noenab(q, 0)) != NULL) { 7185 ASSERT(DB_TYPE(bp) >= QPCTL); 7186 break; 7187 } else if ((flags & MSG_BAND) && q_first != NULL && 7188 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) && 7189 (bp = getq_noenab(q, 0)) != NULL) { 7190 /* 7191 * Asked for at least band "prip" and got either at 7192 * least that band or a hipri message. 7193 */ 7194 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL); 7195 if (DB_TYPE(bp) == M_SIG) { 7196 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7197 freemsg(bp); 7198 continue; 7199 } else { 7200 break; 7201 } 7202 } 7203 7204 /* No data. Time to sleep? */ 7205 qbackenable(q, 0); 7206 7207 /* 7208 * Delayed error notification? 7209 */ 7210 if ((stp->sd_flag & (STRDERR|STPLEX)) && 7211 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) { 7212 error = strgeterr(stp, STRDERR|STPLEX, 7213 (flags & MSG_IPEEK)); 7214 if (error != 0) { 7215 mutex_exit(&stp->sd_lock); 7216 return (error); 7217 } 7218 } 7219 7220 /* 7221 * If STRHUP or STREOF, return 0 length control and data. 7222 * If a read(fd,buf,0) has been done, do not sleep, just 7223 * return. 7224 * 7225 * If mctlp == NULL and uiop == NULL, then the code will 7226 * do the strwaitq. This is an understood way of saying 7227 * sleep "polling" until a message is received. 7228 */ 7229 if ((stp->sd_flag & (STRHUP|STREOF)) || 7230 (uiop != NULL && uiop->uio_resid == 0)) { 7231 if (mctlp != NULL) 7232 *mctlp = NULL; 7233 *flagsp = 0; 7234 mutex_exit(&stp->sd_lock); 7235 return (0); 7236 } 7237 7238 waitflag = GETWAIT; 7239 if (flags & 7240 (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) { 7241 if (flags & MSG_HOLDSIG) 7242 waitflag |= STR_NOSIG; 7243 if (flags & MSG_IGNERROR) 7244 waitflag |= STR_NOERROR; 7245 if (flags & MSG_IPEEK) 7246 waitflag |= STR_PEEK; 7247 if (flags & MSG_DELAYERROR) 7248 waitflag |= STR_DELAYERR; 7249 } 7250 if (uiop != NULL) 7251 fmode = uiop->uio_fmode; 7252 else 7253 fmode = 0; 7254 7255 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT, 7256 "kstrgetmsg calls strwaitq:%p, %p", 7257 vp, uiop); 7258 if (((error = strwaitq(stp, waitflag, (ssize_t)0, 7259 fmode, timout, &done))) != 0 || done) { 7260 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE, 7261 "kstrgetmsg error or done:%p, %p", 7262 vp, uiop); 7263 mutex_exit(&stp->sd_lock); 7264 return (error); 7265 } 7266 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE, 7267 "kstrgetmsg awakes:%p, %p", vp, uiop); 7268 if ((error = i_straccess(stp, JCREAD)) != 0) { 7269 mutex_exit(&stp->sd_lock); 7270 return (error); 7271 } 7272 first = 0; 7273 } 7274 ASSERT(bp != NULL); 7275 /* 7276 * Extract any mark information. If the message is not completely 7277 * consumed this information will be put in the mblk 7278 * that is putback. 7279 * If MSGMARKNEXT is set and the message is completely consumed 7280 * the STRATMARK flag will be set below. Likewise, if 7281 * MSGNOTMARKNEXT is set and the message is 7282 * completely consumed STRNOTATMARK will be set. 7283 */ 7284 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 7285 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 7286 (MSGMARKNEXT|MSGNOTMARKNEXT)); 7287 pri = bp->b_band; 7288 if (mark != 0) { 7289 /* 7290 * If the caller doesn't want the mark return. 7291 * Used to implement MSG_WAITALL in sockets. 7292 */ 7293 if (flags & MSG_NOMARK) { 7294 putback(stp, q, bp, pri); 7295 qbackenable(q, pri); 7296 mutex_exit(&stp->sd_lock); 7297 return (EWOULDBLOCK); 7298 } 7299 if (bp == stp->sd_mark) { 7300 mark |= _LASTMARK; 7301 stp->sd_mark = NULL; 7302 } 7303 } 7304 7305 /* 7306 * keep track of the first message type 7307 */ 7308 type = bp->b_datap->db_type; 7309 7310 if (bp->b_datap->db_type == M_PASSFP) { 7311 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7312 stp->sd_mark = bp; 7313 bp->b_flag |= mark & ~_LASTMARK; 7314 putback(stp, q, bp, pri); 7315 qbackenable(q, pri); 7316 mutex_exit(&stp->sd_lock); 7317 return (EBADMSG); 7318 } 7319 ASSERT(type != M_SIG); 7320 7321 if (flags & MSG_IPEEK) { 7322 /* 7323 * Clear any struioflag - we do the uiomove over again 7324 * when peeking since it simplifies the code. 7325 * 7326 * Dup the message and put the original back on the queue. 7327 * If dupmsg() fails, try again with copymsg() to see if 7328 * there is indeed a shortage of memory. dupmsg() may fail 7329 * if db_ref in any of the messages reaches its limit. 7330 */ 7331 7332 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) { 7333 /* 7334 * Restore the state of the stream head since we 7335 * need to drop sd_lock (strwaitbuf is sleeping). 7336 */ 7337 size_t size = msgdsize(bp); 7338 7339 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7340 stp->sd_mark = bp; 7341 bp->b_flag |= mark & ~_LASTMARK; 7342 putback(stp, q, bp, pri); 7343 mutex_exit(&stp->sd_lock); 7344 error = strwaitbuf(size, BPRI_HI); 7345 if (error) { 7346 /* 7347 * There is no net change to the queue thus 7348 * no need to qbackenable. 7349 */ 7350 return (error); 7351 } 7352 goto retry; 7353 } 7354 7355 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7356 stp->sd_mark = bp; 7357 bp->b_flag |= mark & ~_LASTMARK; 7358 putback(stp, q, bp, pri); 7359 bp = nbp; 7360 } 7361 7362 /* 7363 * Set this flag so strrput will not generate signals. Need to 7364 * make sure this flag is cleared before leaving this routine 7365 * else signals will stop being sent. 7366 */ 7367 stp->sd_flag |= STRGETINPROG; 7368 mutex_exit(&stp->sd_lock); 7369 7370 if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) { 7371 mblk_t *tmp, *prevmp; 7372 7373 /* 7374 * Put first non-data mblk back to stream head and 7375 * cut the mblk chain so sd_rputdatafunc only sees 7376 * M_DATA mblks. We can skip the first mblk since it 7377 * is M_DATA according to the condition above. 7378 */ 7379 for (prevmp = bp, tmp = bp->b_cont; tmp != NULL; 7380 prevmp = tmp, tmp = tmp->b_cont) { 7381 if (DB_TYPE(tmp) != M_DATA) { 7382 prevmp->b_cont = NULL; 7383 mutex_enter(&stp->sd_lock); 7384 putback(stp, q, tmp, tmp->b_band); 7385 mutex_exit(&stp->sd_lock); 7386 break; 7387 } 7388 } 7389 7390 bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp, 7391 NULL, NULL, NULL, NULL); 7392 7393 if (bp == NULL) 7394 goto retry; 7395 } 7396 7397 if (STREAM_NEEDSERVICE(stp)) 7398 stream_runservice(stp); 7399 7400 /* 7401 * Set HIPRI flag if message is priority. 7402 */ 7403 if (type >= QPCTL) 7404 flg = MSG_HIPRI; 7405 else 7406 flg = MSG_BAND; 7407 7408 /* 7409 * First process PROTO or PCPROTO blocks, if any. 7410 */ 7411 if (mctlp != NULL && type != M_DATA) { 7412 mblk_t *nbp; 7413 7414 *mctlp = bp; 7415 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA) 7416 bp = bp->b_cont; 7417 nbp = bp->b_cont; 7418 bp->b_cont = NULL; 7419 bp = nbp; 7420 } 7421 7422 if (bp && bp->b_datap->db_type != M_DATA) { 7423 /* 7424 * More PROTO blocks in msg. Will only happen if mctlp is NULL. 7425 */ 7426 more |= MORECTL; 7427 savemp = bp; 7428 while (bp && bp->b_datap->db_type != M_DATA) { 7429 savemptail = bp; 7430 bp = bp->b_cont; 7431 } 7432 savemptail->b_cont = NULL; 7433 } 7434 7435 /* 7436 * Now process DATA blocks, if any. 7437 */ 7438 if (uiop == NULL) { 7439 /* Append data to tail of mctlp */ 7440 7441 if (mctlp != NULL) { 7442 mblk_t **mpp = mctlp; 7443 7444 while (*mpp != NULL) 7445 mpp = &((*mpp)->b_cont); 7446 *mpp = bp; 7447 bp = NULL; 7448 } 7449 } else if (uiop->uio_resid >= 0 && bp) { 7450 size_t oldresid = uiop->uio_resid; 7451 7452 /* 7453 * If a streams message is likely to consist 7454 * of many small mblks, it is pulled up into 7455 * one continuous chunk of memory. 7456 * The size of the first mblk may be bogus because 7457 * successive read() calls on the socket reduce 7458 * the size of this mblk until it is exhausted 7459 * and then the code walks on to the next. Thus 7460 * the size of the mblk may not be the original size 7461 * that was passed up, it's simply a remainder 7462 * and hence can be very small without any 7463 * implication that the packet is badly fragmented. 7464 * So the size of the possible second mblk is 7465 * used to spot a badly fragmented packet. 7466 * see longer comment at top of page 7467 * by mblk_pull_len declaration. 7468 */ 7469 7470 if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) { 7471 (void) pullupmsg(bp, -1); 7472 } 7473 7474 bp = struiocopyout(bp, uiop, &error); 7475 if (error != 0) { 7476 if (mctlp != NULL) { 7477 freemsg(*mctlp); 7478 *mctlp = NULL; 7479 } else 7480 freemsg(savemp); 7481 mutex_enter(&stp->sd_lock); 7482 /* 7483 * clear stream head hi pri flag based on 7484 * first message 7485 */ 7486 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7487 ASSERT(type == M_PCPROTO); 7488 stp->sd_flag &= ~STRPRI; 7489 } 7490 more = 0; 7491 goto getmout; 7492 } 7493 /* 7494 * (pr == 1) indicates a partial read. 7495 */ 7496 if (oldresid > uiop->uio_resid) 7497 pr = 1; 7498 } 7499 7500 if (bp) { /* more data blocks in msg */ 7501 more |= MOREDATA; 7502 if (savemp) 7503 savemptail->b_cont = bp; 7504 else 7505 savemp = bp; 7506 } 7507 7508 mutex_enter(&stp->sd_lock); 7509 if (savemp) { 7510 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) { 7511 /* 7512 * When MSG_DISCARDTAIL is set or 7513 * when peeking discard any tail. When peeking this 7514 * is the tail of the dup that was copied out - the 7515 * message has already been putback on the queue. 7516 * Return MOREDATA to the caller even though the data 7517 * is discarded. This is used by sockets (to 7518 * set MSG_TRUNC). 7519 */ 7520 freemsg(savemp); 7521 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7522 ASSERT(type == M_PCPROTO); 7523 stp->sd_flag &= ~STRPRI; 7524 } 7525 } else if (pr && (savemp->b_datap->db_type == M_DATA) && 7526 msgnodata(savemp)) { 7527 /* 7528 * Avoid queuing a zero-length tail part of 7529 * a message. pr=1 indicates that we read some of 7530 * the message. 7531 */ 7532 freemsg(savemp); 7533 more &= ~MOREDATA; 7534 if (type >= QPCTL) { 7535 ASSERT(type == M_PCPROTO); 7536 stp->sd_flag &= ~STRPRI; 7537 } 7538 } else { 7539 savemp->b_band = pri; 7540 /* 7541 * If the first message was HIPRI and the one we're 7542 * putting back isn't, then clear STRPRI, otherwise 7543 * set STRPRI again. Note that we must set STRPRI 7544 * again since the flush logic in strrput_nondata() 7545 * may have cleared it while we had sd_lock dropped. 7546 */ 7547 7548 if (type >= QPCTL) { 7549 ASSERT(type == M_PCPROTO); 7550 if (queclass(savemp) < QPCTL) 7551 stp->sd_flag &= ~STRPRI; 7552 else 7553 stp->sd_flag |= STRPRI; 7554 } else if (queclass(savemp) >= QPCTL) { 7555 /* 7556 * The first message was not a HIPRI message, 7557 * but the one we are about to putback is. 7558 * For simplicitly, we do not allow for HIPRI 7559 * messages to be embedded in the message 7560 * body, so just force it to same type as 7561 * first message. 7562 */ 7563 ASSERT(type == M_DATA || type == M_PROTO); 7564 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 7565 savemp->b_datap->db_type = type; 7566 } 7567 if (mark != 0) { 7568 if ((mark & _LASTMARK) && 7569 (stp->sd_mark == NULL)) { 7570 /* 7571 * If another marked message arrived 7572 * while sd_lock was not held sd_mark 7573 * would be non-NULL. 7574 */ 7575 stp->sd_mark = savemp; 7576 } 7577 savemp->b_flag |= mark & ~_LASTMARK; 7578 } 7579 putback(stp, q, savemp, pri); 7580 } 7581 } else if (!(flags & MSG_IPEEK)) { 7582 /* 7583 * The complete message was consumed. 7584 * 7585 * If another M_PCPROTO arrived while sd_lock was not held 7586 * it would have been discarded since STRPRI was still set. 7587 * 7588 * Move the MSG*MARKNEXT information 7589 * to the stream head just in case 7590 * the read queue becomes empty. 7591 * clear stream head hi pri flag based on 7592 * first message 7593 * 7594 * If the stream head was at the mark 7595 * (STRATMARK) before we dropped sd_lock above 7596 * and some data was consumed then we have 7597 * moved past the mark thus STRATMARK is 7598 * cleared. However, if a message arrived in 7599 * strrput during the copyout above causing 7600 * STRATMARK to be set we can not clear that 7601 * flag. 7602 * XXX A "perimeter" would help by single-threading strrput, 7603 * strread, strgetmsg and kstrgetmsg. 7604 */ 7605 if (type >= QPCTL) { 7606 ASSERT(type == M_PCPROTO); 7607 stp->sd_flag &= ~STRPRI; 7608 } 7609 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 7610 if (mark & MSGMARKNEXT) { 7611 stp->sd_flag &= ~STRNOTATMARK; 7612 stp->sd_flag |= STRATMARK; 7613 } else if (mark & MSGNOTMARKNEXT) { 7614 stp->sd_flag &= ~STRATMARK; 7615 stp->sd_flag |= STRNOTATMARK; 7616 } else { 7617 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 7618 } 7619 } else if (pr && (old_sd_flag & STRATMARK)) { 7620 stp->sd_flag &= ~STRATMARK; 7621 } 7622 } 7623 7624 *flagsp = flg; 7625 *prip = pri; 7626 7627 /* 7628 * Getmsg cleanup processing - if the state of the queue has changed 7629 * some signals may need to be sent and/or poll awakened. 7630 */ 7631 getmout: 7632 qbackenable(q, pri); 7633 7634 /* 7635 * We dropped the stream head lock above. Send all M_SIG messages 7636 * before processing stream head for SIGPOLL messages. 7637 */ 7638 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7639 while ((bp = q->q_first) != NULL && 7640 (bp->b_datap->db_type == M_SIG)) { 7641 /* 7642 * sd_lock is held so the content of the read queue can not 7643 * change. 7644 */ 7645 bp = getq(q); 7646 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 7647 7648 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7649 mutex_exit(&stp->sd_lock); 7650 freemsg(bp); 7651 if (STREAM_NEEDSERVICE(stp)) 7652 stream_runservice(stp); 7653 mutex_enter(&stp->sd_lock); 7654 } 7655 7656 /* 7657 * stream head cannot change while we make the determination 7658 * whether or not to send a signal. Drop the flag to allow strrput 7659 * to send firstmsgsigs again. 7660 */ 7661 stp->sd_flag &= ~STRGETINPROG; 7662 7663 /* 7664 * If the type of message at the front of the queue changed 7665 * due to the receive the appropriate signals and pollwakeup events 7666 * are generated. The type of changes are: 7667 * Processed a hipri message, q_first is not hipri. 7668 * Processed a band X message, and q_first is band Y. 7669 * The generated signals and pollwakeups are identical to what 7670 * strrput() generates should the message that is now on q_first 7671 * arrive to an empty read queue. 7672 * 7673 * Note: only strrput will send a signal for a hipri message. 7674 */ 7675 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7676 strsigset_t signals = 0; 7677 strpollset_t pollwakeups = 0; 7678 7679 if (flg & MSG_HIPRI) { 7680 /* 7681 * Removed a hipri message. Regular data at 7682 * the front of the queue. 7683 */ 7684 if (bp->b_band == 0) { 7685 signals = S_INPUT | S_RDNORM; 7686 pollwakeups = POLLIN | POLLRDNORM; 7687 } else { 7688 signals = S_INPUT | S_RDBAND; 7689 pollwakeups = POLLIN | POLLRDBAND; 7690 } 7691 } else if (pri != bp->b_band) { 7692 /* 7693 * The band is different for the new q_first. 7694 */ 7695 if (bp->b_band == 0) { 7696 signals = S_RDNORM; 7697 pollwakeups = POLLIN | POLLRDNORM; 7698 } else { 7699 signals = S_RDBAND; 7700 pollwakeups = POLLIN | POLLRDBAND; 7701 } 7702 } 7703 7704 if (pollwakeups != 0) { 7705 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7706 if (!(stp->sd_rput_opt & SR_POLLIN)) 7707 goto no_pollwake; 7708 stp->sd_rput_opt &= ~SR_POLLIN; 7709 } 7710 mutex_exit(&stp->sd_lock); 7711 pollwakeup(&stp->sd_pollist, pollwakeups); 7712 mutex_enter(&stp->sd_lock); 7713 } 7714 no_pollwake: 7715 7716 if (stp->sd_sigflags & signals) 7717 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7718 } 7719 mutex_exit(&stp->sd_lock); 7720 7721 rvp->r_val1 = more; 7722 return (error); 7723 #undef _LASTMARK 7724 } 7725 7726 /* 7727 * Put a message downstream. 7728 * 7729 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7730 */ 7731 int 7732 strputmsg( 7733 struct vnode *vp, 7734 struct strbuf *mctl, 7735 struct strbuf *mdata, 7736 unsigned char pri, 7737 int flag, 7738 int fmode) 7739 { 7740 struct stdata *stp; 7741 queue_t *wqp; 7742 mblk_t *mp; 7743 ssize_t msgsize; 7744 ssize_t rmin, rmax; 7745 int error; 7746 struct uio uios; 7747 struct uio *uiop = &uios; 7748 struct iovec iovs; 7749 int xpg4 = 0; 7750 7751 ASSERT(vp->v_stream); 7752 stp = vp->v_stream; 7753 wqp = stp->sd_wrq; 7754 7755 /* 7756 * If it is an XPG4 application, we need to send 7757 * SIGPIPE below 7758 */ 7759 7760 xpg4 = (flag & MSG_XPG4) ? 1 : 0; 7761 flag &= ~MSG_XPG4; 7762 7763 if (AU_AUDITING()) 7764 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode); 7765 7766 mutex_enter(&stp->sd_lock); 7767 7768 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7769 mutex_exit(&stp->sd_lock); 7770 return (error); 7771 } 7772 7773 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7774 error = strwriteable(stp, B_FALSE, xpg4); 7775 if (error != 0) { 7776 mutex_exit(&stp->sd_lock); 7777 return (error); 7778 } 7779 } 7780 7781 mutex_exit(&stp->sd_lock); 7782 7783 /* 7784 * Check for legal flag value. 7785 */ 7786 switch (flag) { 7787 case MSG_HIPRI: 7788 if ((mctl->len < 0) || (pri != 0)) 7789 return (EINVAL); 7790 break; 7791 case MSG_BAND: 7792 break; 7793 7794 default: 7795 return (EINVAL); 7796 } 7797 7798 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN, 7799 "strputmsg in:stp %p", stp); 7800 7801 /* get these values from those cached in the stream head */ 7802 rmin = stp->sd_qn_minpsz; 7803 rmax = stp->sd_qn_maxpsz; 7804 7805 /* 7806 * Make sure ctl and data sizes together fall within the 7807 * limits of the max and min receive packet sizes and do 7808 * not exceed system limit. 7809 */ 7810 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 7811 if (rmax == 0) { 7812 return (ERANGE); 7813 } 7814 /* 7815 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 7816 * Needed to prevent partial failures in the strmakedata loop. 7817 */ 7818 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 7819 rmax = stp->sd_maxblk; 7820 7821 if ((msgsize = mdata->len) < 0) { 7822 msgsize = 0; 7823 rmin = 0; /* no range check for NULL data part */ 7824 } 7825 if ((msgsize < rmin) || 7826 ((msgsize > rmax) && (rmax != INFPSZ)) || 7827 (mctl->len > strctlsz)) { 7828 return (ERANGE); 7829 } 7830 7831 /* 7832 * Setup uio and iov for data part 7833 */ 7834 iovs.iov_base = mdata->buf; 7835 iovs.iov_len = msgsize; 7836 uios.uio_iov = &iovs; 7837 uios.uio_iovcnt = 1; 7838 uios.uio_loffset = 0; 7839 uios.uio_segflg = UIO_USERSPACE; 7840 uios.uio_fmode = fmode; 7841 uios.uio_extflg = UIO_COPY_DEFAULT; 7842 uios.uio_resid = msgsize; 7843 uios.uio_offset = 0; 7844 7845 /* Ignore flow control in strput for HIPRI */ 7846 if (flag & MSG_HIPRI) 7847 flag |= MSG_IGNFLOW; 7848 7849 for (;;) { 7850 int done = 0; 7851 7852 /* 7853 * strput will always free the ctl mblk - even when strput 7854 * fails. 7855 */ 7856 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) { 7857 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7858 "strputmsg out:stp %p out %d error %d", 7859 stp, 1, error); 7860 return (error); 7861 } 7862 /* 7863 * Verify that the whole message can be transferred by 7864 * strput. 7865 */ 7866 ASSERT(stp->sd_maxblk == INFPSZ || 7867 stp->sd_maxblk >= mdata->len); 7868 7869 msgsize = mdata->len; 7870 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 7871 mdata->len = msgsize; 7872 7873 if (error == 0) 7874 break; 7875 7876 if (error != EWOULDBLOCK) 7877 goto out; 7878 7879 mutex_enter(&stp->sd_lock); 7880 /* 7881 * Check for a missed wakeup. 7882 * Needed since strput did not hold sd_lock across 7883 * the canputnext. 7884 */ 7885 if (bcanputnext(wqp, pri)) { 7886 /* Try again */ 7887 mutex_exit(&stp->sd_lock); 7888 continue; 7889 } 7890 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT, 7891 "strputmsg wait:stp %p waits pri %d", stp, pri); 7892 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1, 7893 &done)) != 0) || done) { 7894 mutex_exit(&stp->sd_lock); 7895 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7896 "strputmsg out:q %p out %d error %d", 7897 stp, 0, error); 7898 return (error); 7899 } 7900 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE, 7901 "strputmsg wake:stp %p wakes", stp); 7902 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7903 mutex_exit(&stp->sd_lock); 7904 return (error); 7905 } 7906 mutex_exit(&stp->sd_lock); 7907 } 7908 out: 7909 /* 7910 * For historic reasons, applications expect EAGAIN 7911 * when data mblk could not be allocated. so change 7912 * ENOMEM back to EAGAIN 7913 */ 7914 if (error == ENOMEM) 7915 error = EAGAIN; 7916 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7917 "strputmsg out:stp %p out %d error %d", stp, 2, error); 7918 return (error); 7919 } 7920 7921 /* 7922 * Put a message downstream. 7923 * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop. 7924 * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio 7925 * and the fmode parameter. 7926 * 7927 * This routine handles the consolidation private flags: 7928 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7929 * MSG_HOLDSIG Hold signals while waiting for data. 7930 * MSG_IGNFLOW Don't check streams flow control. 7931 * 7932 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7933 */ 7934 int 7935 kstrputmsg( 7936 struct vnode *vp, 7937 mblk_t *mctl, 7938 struct uio *uiop, 7939 ssize_t msgsize, 7940 unsigned char pri, 7941 int flag, 7942 int fmode) 7943 { 7944 struct stdata *stp; 7945 queue_t *wqp; 7946 ssize_t rmin, rmax; 7947 int error; 7948 7949 ASSERT(vp->v_stream); 7950 stp = vp->v_stream; 7951 wqp = stp->sd_wrq; 7952 if (AU_AUDITING()) 7953 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode); 7954 if (mctl == NULL) 7955 return (EINVAL); 7956 7957 mutex_enter(&stp->sd_lock); 7958 7959 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7960 mutex_exit(&stp->sd_lock); 7961 freemsg(mctl); 7962 return (error); 7963 } 7964 7965 if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) { 7966 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7967 error = strwriteable(stp, B_FALSE, B_TRUE); 7968 if (error != 0) { 7969 mutex_exit(&stp->sd_lock); 7970 freemsg(mctl); 7971 return (error); 7972 } 7973 } 7974 } 7975 7976 mutex_exit(&stp->sd_lock); 7977 7978 /* 7979 * Check for legal flag value. 7980 */ 7981 switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) { 7982 case MSG_HIPRI: 7983 if (pri != 0) { 7984 freemsg(mctl); 7985 return (EINVAL); 7986 } 7987 break; 7988 case MSG_BAND: 7989 break; 7990 default: 7991 freemsg(mctl); 7992 return (EINVAL); 7993 } 7994 7995 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN, 7996 "kstrputmsg in:stp %p", stp); 7997 7998 /* get these values from those cached in the stream head */ 7999 rmin = stp->sd_qn_minpsz; 8000 rmax = stp->sd_qn_maxpsz; 8001 8002 /* 8003 * Make sure ctl and data sizes together fall within the 8004 * limits of the max and min receive packet sizes and do 8005 * not exceed system limit. 8006 */ 8007 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 8008 if (rmax == 0) { 8009 freemsg(mctl); 8010 return (ERANGE); 8011 } 8012 /* 8013 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 8014 * Needed to prevent partial failures in the strmakedata loop. 8015 */ 8016 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 8017 rmax = stp->sd_maxblk; 8018 8019 if (uiop == NULL) { 8020 msgsize = -1; 8021 rmin = -1; /* no range check for NULL data part */ 8022 } else { 8023 /* Use uio flags as well as the fmode parameter flags */ 8024 fmode |= uiop->uio_fmode; 8025 8026 if ((msgsize < rmin) || 8027 ((msgsize > rmax) && (rmax != INFPSZ))) { 8028 freemsg(mctl); 8029 return (ERANGE); 8030 } 8031 } 8032 8033 /* Ignore flow control in strput for HIPRI */ 8034 if (flag & MSG_HIPRI) 8035 flag |= MSG_IGNFLOW; 8036 8037 for (;;) { 8038 int done = 0; 8039 int waitflag; 8040 mblk_t *mp; 8041 8042 /* 8043 * strput will always free the ctl mblk - even when strput 8044 * fails. If MSG_IGNFLOW is set then any error returned 8045 * will cause us to break the loop, so we don't need a copy 8046 * of the message. If MSG_IGNFLOW is not set, then we can 8047 * get hit by flow control and be forced to try again. In 8048 * this case we need to have a copy of the message. We 8049 * do this using copymsg since the message may get modified 8050 * by something below us. 8051 * 8052 * We've observed that many TPI providers do not check db_ref 8053 * on the control messages but blindly reuse them for the 8054 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more 8055 * friendly to such providers than using dupmsg. Also, note 8056 * that sockfs uses MSG_IGNFLOW for all TPI control messages. 8057 * Only data messages are subject to flow control, hence 8058 * subject to this copymsg. 8059 */ 8060 if (flag & MSG_IGNFLOW) { 8061 mp = mctl; 8062 mctl = NULL; 8063 } else { 8064 do { 8065 /* 8066 * If a message has a free pointer, the message 8067 * must be dupmsg to maintain this pointer. 8068 * Code using this facility must be sure 8069 * that modules below will not change the 8070 * contents of the dblk without checking db_ref 8071 * first. If db_ref is > 1, then the module 8072 * needs to do a copymsg first. Otherwise, 8073 * the contents of the dblk may become 8074 * inconsistent because the freesmg/freeb below 8075 * may end up calling atomic_add_32_nv. 8076 * The atomic_add_32_nv in freeb (accessing 8077 * all of db_ref, db_type, db_flags, and 8078 * db_struioflag) does not prevent other threads 8079 * from concurrently trying to modify e.g. 8080 * db_type. 8081 */ 8082 if (mctl->b_datap->db_frtnp != NULL) 8083 mp = dupmsg(mctl); 8084 else 8085 mp = copymsg(mctl); 8086 8087 if (mp != NULL) 8088 break; 8089 8090 error = strwaitbuf(msgdsize(mctl), BPRI_MED); 8091 if (error) { 8092 freemsg(mctl); 8093 return (error); 8094 } 8095 } while (mp == NULL); 8096 } 8097 /* 8098 * Verify that all of msgsize can be transferred by 8099 * strput. 8100 */ 8101 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize); 8102 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 8103 if (error == 0) 8104 break; 8105 8106 if (error != EWOULDBLOCK) 8107 goto out; 8108 8109 /* 8110 * IF MSG_IGNFLOW is set we should have broken out of loop 8111 * above. 8112 */ 8113 ASSERT(!(flag & MSG_IGNFLOW)); 8114 mutex_enter(&stp->sd_lock); 8115 /* 8116 * Check for a missed wakeup. 8117 * Needed since strput did not hold sd_lock across 8118 * the canputnext. 8119 */ 8120 if (bcanputnext(wqp, pri)) { 8121 /* Try again */ 8122 mutex_exit(&stp->sd_lock); 8123 continue; 8124 } 8125 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT, 8126 "kstrputmsg wait:stp %p waits pri %d", stp, pri); 8127 8128 waitflag = WRITEWAIT; 8129 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) { 8130 if (flag & MSG_HOLDSIG) 8131 waitflag |= STR_NOSIG; 8132 if (flag & MSG_IGNERROR) 8133 waitflag |= STR_NOERROR; 8134 } 8135 if (((error = strwaitq(stp, waitflag, 8136 (ssize_t)0, fmode, -1, &done)) != 0) || done) { 8137 mutex_exit(&stp->sd_lock); 8138 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8139 "kstrputmsg out:stp %p out %d error %d", 8140 stp, 0, error); 8141 freemsg(mctl); 8142 return (error); 8143 } 8144 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE, 8145 "kstrputmsg wake:stp %p wakes", stp); 8146 if ((error = i_straccess(stp, JCWRITE)) != 0) { 8147 mutex_exit(&stp->sd_lock); 8148 freemsg(mctl); 8149 return (error); 8150 } 8151 mutex_exit(&stp->sd_lock); 8152 } 8153 out: 8154 freemsg(mctl); 8155 /* 8156 * For historic reasons, applications expect EAGAIN 8157 * when data mblk could not be allocated. so change 8158 * ENOMEM back to EAGAIN 8159 */ 8160 if (error == ENOMEM) 8161 error = EAGAIN; 8162 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8163 "kstrputmsg out:stp %p out %d error %d", stp, 2, error); 8164 return (error); 8165 } 8166 8167 /* 8168 * Determines whether the necessary conditions are set on a stream 8169 * for it to be readable, writeable, or have exceptions. 8170 * 8171 * strpoll handles the consolidation private events: 8172 * POLLNOERR Do not return POLLERR even if there are stream 8173 * head errors. 8174 * Used by sockfs. 8175 * POLLRDDATA Do not return POLLIN unless at least one message on 8176 * the queue contains one or more M_DATA mblks. Thus 8177 * when this flag is set a queue with only 8178 * M_PROTO/M_PCPROTO mblks does not return POLLIN. 8179 * Used by sockfs to ignore T_EXDATA_IND messages. 8180 * 8181 * Note: POLLRDDATA assumes that synch streams only return messages with 8182 * an M_DATA attached (i.e. not messages consisting of only 8183 * an M_PROTO/M_PCPROTO part). 8184 */ 8185 int 8186 strpoll( 8187 struct stdata *stp, 8188 short events_arg, 8189 int anyyet, 8190 short *reventsp, 8191 struct pollhead **phpp) 8192 { 8193 int events = (ushort_t)events_arg; 8194 int retevents = 0; 8195 mblk_t *mp; 8196 qband_t *qbp; 8197 long sd_flags = stp->sd_flag; 8198 int headlocked = 0; 8199 8200 /* 8201 * For performance, a single 'if' tests for most possible edge 8202 * conditions in one shot 8203 */ 8204 if (sd_flags & (STPLEX | STRDERR | STWRERR)) { 8205 if (sd_flags & STPLEX) { 8206 *reventsp = POLLNVAL; 8207 return (EINVAL); 8208 } 8209 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) && 8210 (sd_flags & STRDERR)) || 8211 ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) && 8212 (sd_flags & STWRERR))) { 8213 if (!(events & POLLNOERR)) { 8214 *reventsp = POLLERR; 8215 return (0); 8216 } 8217 } 8218 } 8219 if (sd_flags & STRHUP) { 8220 retevents |= POLLHUP; 8221 } else if (events & (POLLWRNORM | POLLWRBAND)) { 8222 queue_t *tq; 8223 queue_t *qp = stp->sd_wrq; 8224 8225 claimstr(qp); 8226 /* Find next module forward that has a service procedure */ 8227 tq = qp->q_next->q_nfsrv; 8228 ASSERT(tq != NULL); 8229 8230 polllock(&stp->sd_pollist, QLOCK(tq)); 8231 if (events & POLLWRNORM) { 8232 queue_t *sqp; 8233 8234 if (tq->q_flag & QFULL) 8235 /* ensure backq svc procedure runs */ 8236 tq->q_flag |= QWANTW; 8237 else if ((sqp = stp->sd_struiowrq) != NULL) { 8238 /* Check sync stream barrier write q */ 8239 mutex_exit(QLOCK(tq)); 8240 polllock(&stp->sd_pollist, QLOCK(sqp)); 8241 if (sqp->q_flag & QFULL) 8242 /* ensure pollwakeup() is done */ 8243 sqp->q_flag |= QWANTWSYNC; 8244 else 8245 retevents |= POLLOUT; 8246 /* More write events to process ??? */ 8247 if (! (events & POLLWRBAND)) { 8248 mutex_exit(QLOCK(sqp)); 8249 releasestr(qp); 8250 goto chkrd; 8251 } 8252 mutex_exit(QLOCK(sqp)); 8253 polllock(&stp->sd_pollist, QLOCK(tq)); 8254 } else 8255 retevents |= POLLOUT; 8256 } 8257 if (events & POLLWRBAND) { 8258 qbp = tq->q_bandp; 8259 if (qbp) { 8260 while (qbp) { 8261 if (qbp->qb_flag & QB_FULL) 8262 qbp->qb_flag |= QB_WANTW; 8263 else 8264 retevents |= POLLWRBAND; 8265 qbp = qbp->qb_next; 8266 } 8267 } else { 8268 retevents |= POLLWRBAND; 8269 } 8270 } 8271 mutex_exit(QLOCK(tq)); 8272 releasestr(qp); 8273 } 8274 chkrd: 8275 if (sd_flags & STRPRI) { 8276 retevents |= (events & POLLPRI); 8277 } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) { 8278 queue_t *qp = _RD(stp->sd_wrq); 8279 int normevents = (events & (POLLIN | POLLRDNORM)); 8280 8281 /* 8282 * Note: Need to do polllock() here since ps_lock may be 8283 * held. See bug 4191544. 8284 */ 8285 polllock(&stp->sd_pollist, &stp->sd_lock); 8286 headlocked = 1; 8287 mp = qp->q_first; 8288 while (mp) { 8289 /* 8290 * For POLLRDDATA we scan b_cont and b_next until we 8291 * find an M_DATA. 8292 */ 8293 if ((events & POLLRDDATA) && 8294 mp->b_datap->db_type != M_DATA) { 8295 mblk_t *nmp = mp->b_cont; 8296 8297 while (nmp != NULL && 8298 nmp->b_datap->db_type != M_DATA) 8299 nmp = nmp->b_cont; 8300 if (nmp == NULL) { 8301 mp = mp->b_next; 8302 continue; 8303 } 8304 } 8305 if (mp->b_band == 0) 8306 retevents |= normevents; 8307 else 8308 retevents |= (events & (POLLIN | POLLRDBAND)); 8309 break; 8310 } 8311 if (! (retevents & normevents) && 8312 (stp->sd_wakeq & RSLEEP)) { 8313 /* 8314 * Sync stream barrier read queue has data. 8315 */ 8316 retevents |= normevents; 8317 } 8318 /* Treat eof as normal data */ 8319 if (sd_flags & STREOF) 8320 retevents |= normevents; 8321 } 8322 8323 *reventsp = (short)retevents; 8324 if (retevents) { 8325 if (headlocked) 8326 mutex_exit(&stp->sd_lock); 8327 return (0); 8328 } 8329 8330 /* 8331 * If poll() has not found any events yet, set up event cell 8332 * to wake up the poll if a requested event occurs on this 8333 * stream. Check for collisions with outstanding poll requests. 8334 */ 8335 if (!anyyet) { 8336 *phpp = &stp->sd_pollist; 8337 if (headlocked == 0) { 8338 polllock(&stp->sd_pollist, &stp->sd_lock); 8339 headlocked = 1; 8340 } 8341 stp->sd_rput_opt |= SR_POLLIN; 8342 } 8343 if (headlocked) 8344 mutex_exit(&stp->sd_lock); 8345 return (0); 8346 } 8347 8348 /* 8349 * The purpose of putback() is to assure sleeping polls/reads 8350 * are awakened when there are no new messages arriving at the, 8351 * stream head, and a message is placed back on the read queue. 8352 * 8353 * sd_lock must be held when messages are placed back on stream 8354 * head. (getq() holds sd_lock when it removes messages from 8355 * the queue) 8356 */ 8357 8358 static void 8359 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band) 8360 { 8361 mblk_t *qfirst; 8362 ASSERT(MUTEX_HELD(&stp->sd_lock)); 8363 8364 /* 8365 * As a result of lock-step ordering around q_lock and sd_lock, 8366 * it's possible for function calls like putnext() and 8367 * canputnext() to get an inaccurate picture of how much 8368 * data is really being processed at the stream head. 8369 * We only consolidate with existing messages on the queue 8370 * if the length of the message we want to put back is smaller 8371 * than the queue hiwater mark. 8372 */ 8373 if ((stp->sd_rput_opt & SR_CONSOL_DATA) && 8374 (DB_TYPE(bp) == M_DATA) && ((qfirst = q->q_first) != NULL) && 8375 (DB_TYPE(qfirst) == M_DATA) && 8376 ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0) && 8377 ((bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT)) == 0) && 8378 (mp_cont_len(bp, NULL) < q->q_hiwat)) { 8379 /* 8380 * We use the same logic as defined in strrput() 8381 * but in reverse as we are putting back onto the 8382 * queue and want to retain byte ordering. 8383 * Consolidate M_DATA messages with M_DATA ONLY. 8384 * strrput() allows the consolidation of M_DATA onto 8385 * M_PROTO | M_PCPROTO but not the other way round. 8386 * 8387 * The consolidation does not take place if the message 8388 * we are returning to the queue is marked with either 8389 * of the marks or the delim flag or if q_first 8390 * is marked with MSGMARK. The MSGMARK check is needed to 8391 * handle the odd semantics of MSGMARK where essentially 8392 * the whole message is to be treated as marked. 8393 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first 8394 * to the front of the b_cont chain. 8395 */ 8396 rmvq_noenab(q, qfirst); 8397 8398 /* 8399 * The first message in the b_cont list 8400 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 8401 * We need to handle the case where we 8402 * are appending: 8403 * 8404 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 8405 * 2) a MSGMARKNEXT to a plain message. 8406 * 3) a MSGNOTMARKNEXT to a plain message 8407 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 8408 * message. 8409 * 8410 * Thus we never append a MSGMARKNEXT or 8411 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 8412 */ 8413 if (qfirst->b_flag & MSGMARKNEXT) { 8414 bp->b_flag |= MSGMARKNEXT; 8415 bp->b_flag &= ~MSGNOTMARKNEXT; 8416 qfirst->b_flag &= ~MSGMARKNEXT; 8417 } else if (qfirst->b_flag & MSGNOTMARKNEXT) { 8418 bp->b_flag |= MSGNOTMARKNEXT; 8419 qfirst->b_flag &= ~MSGNOTMARKNEXT; 8420 } 8421 8422 linkb(bp, qfirst); 8423 } 8424 (void) putbq(q, bp); 8425 8426 /* 8427 * A message may have come in when the sd_lock was dropped in the 8428 * calling routine. If this is the case and STR*ATMARK info was 8429 * received, need to move that from the stream head to the q_last 8430 * so that SIOCATMARK can return the proper value. 8431 */ 8432 if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) { 8433 unsigned short *flagp = &q->q_last->b_flag; 8434 uint_t b_flag = (uint_t)*flagp; 8435 8436 if (stp->sd_flag & STRATMARK) { 8437 b_flag &= ~MSGNOTMARKNEXT; 8438 b_flag |= MSGMARKNEXT; 8439 stp->sd_flag &= ~STRATMARK; 8440 } else { 8441 b_flag &= ~MSGMARKNEXT; 8442 b_flag |= MSGNOTMARKNEXT; 8443 stp->sd_flag &= ~STRNOTATMARK; 8444 } 8445 *flagp = (unsigned short) b_flag; 8446 } 8447 8448 #ifdef DEBUG 8449 /* 8450 * Make sure that the flags are not messed up. 8451 */ 8452 { 8453 mblk_t *mp; 8454 mp = q->q_last; 8455 while (mp != NULL) { 8456 ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 8457 (MSGMARKNEXT|MSGNOTMARKNEXT)); 8458 mp = mp->b_cont; 8459 } 8460 } 8461 #endif 8462 if (q->q_first == bp) { 8463 short pollevents; 8464 8465 if (stp->sd_flag & RSLEEP) { 8466 stp->sd_flag &= ~RSLEEP; 8467 cv_broadcast(&q->q_wait); 8468 } 8469 if (stp->sd_flag & STRPRI) { 8470 pollevents = POLLPRI; 8471 } else { 8472 if (band == 0) { 8473 if (!(stp->sd_rput_opt & SR_POLLIN)) 8474 return; 8475 stp->sd_rput_opt &= ~SR_POLLIN; 8476 pollevents = POLLIN | POLLRDNORM; 8477 } else { 8478 pollevents = POLLIN | POLLRDBAND; 8479 } 8480 } 8481 mutex_exit(&stp->sd_lock); 8482 pollwakeup(&stp->sd_pollist, pollevents); 8483 mutex_enter(&stp->sd_lock); 8484 } 8485 } 8486 8487 /* 8488 * Return the held vnode attached to the stream head of a 8489 * given queue 8490 * It is the responsibility of the calling routine to ensure 8491 * that the queue does not go away (e.g. pop). 8492 */ 8493 vnode_t * 8494 strq2vp(queue_t *qp) 8495 { 8496 vnode_t *vp; 8497 vp = STREAM(qp)->sd_vnode; 8498 ASSERT(vp != NULL); 8499 VN_HOLD(vp); 8500 return (vp); 8501 } 8502 8503 /* 8504 * return the stream head write queue for the given vp 8505 * It is the responsibility of the calling routine to ensure 8506 * that the stream or vnode do not close. 8507 */ 8508 queue_t * 8509 strvp2wq(vnode_t *vp) 8510 { 8511 ASSERT(vp->v_stream != NULL); 8512 return (vp->v_stream->sd_wrq); 8513 } 8514 8515 /* 8516 * pollwakeup stream head 8517 * It is the responsibility of the calling routine to ensure 8518 * that the stream or vnode do not close. 8519 */ 8520 void 8521 strpollwakeup(vnode_t *vp, short event) 8522 { 8523 ASSERT(vp->v_stream); 8524 pollwakeup(&vp->v_stream->sd_pollist, event); 8525 } 8526 8527 /* 8528 * Mate the stream heads of two vnodes together. If the two vnodes are the 8529 * same, we just make the write-side point at the read-side -- otherwise, 8530 * we do a full mate. Only works on vnodes associated with streams that are 8531 * still being built and thus have only a stream head. 8532 */ 8533 void 8534 strmate(vnode_t *vp1, vnode_t *vp2) 8535 { 8536 queue_t *wrq1 = strvp2wq(vp1); 8537 queue_t *wrq2 = strvp2wq(vp2); 8538 8539 /* 8540 * Verify that there are no modules on the stream yet. We also 8541 * rely on the stream head always having a service procedure to 8542 * avoid tweaking q_nfsrv. 8543 */ 8544 ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL); 8545 ASSERT(wrq1->q_qinfo->qi_srvp != NULL); 8546 ASSERT(wrq2->q_qinfo->qi_srvp != NULL); 8547 8548 /* 8549 * If the queues are the same, just twist; otherwise do a full mate. 8550 */ 8551 if (wrq1 == wrq2) { 8552 wrq1->q_next = _RD(wrq1); 8553 } else { 8554 wrq1->q_next = _RD(wrq2); 8555 wrq2->q_next = _RD(wrq1); 8556 STREAM(wrq1)->sd_mate = STREAM(wrq2); 8557 STREAM(wrq1)->sd_flag |= STRMATE; 8558 STREAM(wrq2)->sd_mate = STREAM(wrq1); 8559 STREAM(wrq2)->sd_flag |= STRMATE; 8560 } 8561 } 8562 8563 /* 8564 * XXX will go away when console is correctly fixed. 8565 * Clean up the console PIDS, from previous I_SETSIG, 8566 * called only for cnopen which never calls strclean(). 8567 */ 8568 void 8569 str_cn_clean(struct vnode *vp) 8570 { 8571 strsig_t *ssp, *pssp, *tssp; 8572 struct stdata *stp; 8573 struct pid *pidp; 8574 int update = 0; 8575 8576 ASSERT(vp->v_stream); 8577 stp = vp->v_stream; 8578 pssp = NULL; 8579 mutex_enter(&stp->sd_lock); 8580 ssp = stp->sd_siglist; 8581 while (ssp) { 8582 mutex_enter(&pidlock); 8583 pidp = ssp->ss_pidp; 8584 /* 8585 * Get rid of PID if the proc is gone. 8586 */ 8587 if (pidp->pid_prinactive) { 8588 tssp = ssp->ss_next; 8589 if (pssp) 8590 pssp->ss_next = tssp; 8591 else 8592 stp->sd_siglist = tssp; 8593 ASSERT(pidp->pid_ref <= 1); 8594 PID_RELE(ssp->ss_pidp); 8595 mutex_exit(&pidlock); 8596 kmem_free(ssp, sizeof (strsig_t)); 8597 update = 1; 8598 ssp = tssp; 8599 continue; 8600 } else 8601 mutex_exit(&pidlock); 8602 pssp = ssp; 8603 ssp = ssp->ss_next; 8604 } 8605 if (update) { 8606 stp->sd_sigflags = 0; 8607 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 8608 stp->sd_sigflags |= ssp->ss_events; 8609 } 8610 mutex_exit(&stp->sd_lock); 8611 } 8612 8613 /* 8614 * Return B_TRUE if there is data in the message, B_FALSE otherwise. 8615 */ 8616 static boolean_t 8617 msghasdata(mblk_t *bp) 8618 { 8619 for (; bp; bp = bp->b_cont) 8620 if (bp->b_datap->db_type == M_DATA) { 8621 ASSERT(bp->b_wptr >= bp->b_rptr); 8622 if (bp->b_wptr > bp->b_rptr) 8623 return (B_TRUE); 8624 } 8625 return (B_FALSE); 8626 } 8627