1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/sysmacros.h> 32 #include <sys/param.h> 33 #include <sys/errno.h> 34 #include <sys/signal.h> 35 #include <sys/stat.h> 36 #include <sys/proc.h> 37 #include <sys/cred.h> 38 #include <sys/user.h> 39 #include <sys/vnode.h> 40 #include <sys/file.h> 41 #include <sys/stream.h> 42 #include <sys/strsubr.h> 43 #include <sys/stropts.h> 44 #include <sys/tihdr.h> 45 #include <sys/var.h> 46 #include <sys/poll.h> 47 #include <sys/termio.h> 48 #include <sys/ttold.h> 49 #include <sys/systm.h> 50 #include <sys/uio.h> 51 #include <sys/cmn_err.h> 52 #include <sys/sad.h> 53 #include <sys/netstack.h> 54 #include <sys/priocntl.h> 55 #include <sys/jioctl.h> 56 #include <sys/procset.h> 57 #include <sys/session.h> 58 #include <sys/kmem.h> 59 #include <sys/filio.h> 60 #include <sys/vtrace.h> 61 #include <sys/debug.h> 62 #include <sys/strredir.h> 63 #include <sys/fs/fifonode.h> 64 #include <sys/fs/snode.h> 65 #include <sys/strlog.h> 66 #include <sys/strsun.h> 67 #include <sys/project.h> 68 #include <sys/kbio.h> 69 #include <sys/msio.h> 70 #include <sys/tty.h> 71 #include <sys/ptyvar.h> 72 #include <sys/vuid_event.h> 73 #include <sys/modctl.h> 74 #include <sys/sunddi.h> 75 #include <sys/sunldi_impl.h> 76 #include <sys/autoconf.h> 77 #include <sys/policy.h> 78 #include <sys/dld.h> 79 #include <sys/zone.h> 80 81 /* 82 * This define helps improve the readability of streams code while 83 * still maintaining a very old streams performance enhancement. The 84 * performance enhancement basically involved having all callers 85 * of straccess() perform the first check that straccess() will do 86 * locally before actually calling straccess(). (There by reducing 87 * the number of unnecessary calls to straccess().) 88 */ 89 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \ 90 (stp->sd_vnode->v_type == VFIFO) ? 0 : \ 91 straccess((x), (y))) 92 93 /* 94 * what is mblk_pull_len? 95 * 96 * If a streams message consists of many short messages, 97 * a performance degradation occurs from copyout overhead. 98 * To decrease the per mblk overhead, messages that are 99 * likely to consist of many small mblks are pulled up into 100 * one continuous chunk of memory. 101 * 102 * To avoid the processing overhead of examining every 103 * mblk, a quick heuristic is used. If the first mblk in 104 * the message is shorter than mblk_pull_len, it is likely 105 * that the rest of the mblk will be short. 106 * 107 * This heuristic was decided upon after performance tests 108 * indicated that anything more complex slowed down the main 109 * code path. 110 */ 111 #define MBLK_PULL_LEN 64 112 uint32_t mblk_pull_len = MBLK_PULL_LEN; 113 114 /* 115 * The sgttyb_handling flag controls the handling of the old BSD 116 * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows: 117 * 118 * 0 - Emit no warnings at all and retain old, broken behavior. 119 * 1 - Emit no warnings and silently handle new semantics. 120 * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used 121 * (once per system invocation). Handle with new semantics. 122 * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is 123 * made (so that offenders drop core and are easy to debug). 124 * 125 * The "new semantics" are that TIOCGETP returns B38400 for 126 * sg_[io]speed if the corresponding value is over B38400, and that 127 * TIOCSET[PN] accept B38400 in these cases to mean "retain current 128 * bit rate." 129 */ 130 int sgttyb_handling = 1; 131 static boolean_t sgttyb_complaint; 132 133 /* don't push drcompat module by default on Style-2 streams */ 134 static int push_drcompat = 0; 135 136 /* 137 * id value used to distinguish between different ioctl messages 138 */ 139 static uint32_t ioc_id; 140 141 static void putback(struct stdata *, queue_t *, mblk_t *, int); 142 static void strcleanall(struct vnode *); 143 static int strwsrv(queue_t *); 144 static int strdocmd(struct stdata *, struct strcmd *, cred_t *); 145 146 /* 147 * qinit and module_info structures for stream head read and write queues 148 */ 149 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW }; 150 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 }; 151 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info }; 152 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info }; 153 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT, 154 FIFOLOWAT }; 155 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 }; 156 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info }; 157 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info }; 158 159 extern kmutex_t strresources; /* protects global resources */ 160 extern kmutex_t muxifier; /* single-threads multiplexor creation */ 161 162 static boolean_t msghasdata(mblk_t *bp); 163 #define msgnodata(bp) (!msghasdata(bp)) 164 165 /* 166 * Stream head locking notes: 167 * There are four monitors associated with the stream head: 168 * 1. v_stream monitor: in stropen() and strclose() v_lock 169 * is held while the association of vnode and stream 170 * head is established or tested for. 171 * 2. open/close/push/pop monitor: sd_lock is held while each 172 * thread bids for exclusive access to this monitor 173 * for opening or closing a stream. In addition, this 174 * monitor is entered during pushes and pops. This 175 * guarantees that during plumbing operations there 176 * is only one thread trying to change the plumbing. 177 * Any other threads present in the stream are only 178 * using the plumbing. 179 * 3. read/write monitor: in the case of read, a thread holds 180 * sd_lock while trying to get data from the stream 181 * head queue. if there is none to fulfill a read 182 * request, it sets RSLEEP and calls cv_wait_sig() down 183 * in strwaitq() to await the arrival of new data. 184 * when new data arrives in strrput(), sd_lock is acquired 185 * before testing for RSLEEP and calling cv_broadcast(). 186 * the behavior of strwrite(), strwsrv(), and WSLEEP 187 * mirror this. 188 * 4. ioctl monitor: sd_lock is gotten to ensure that only one 189 * thread is doing an ioctl at a time. 190 */ 191 192 static int 193 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name, 194 int anchor, cred_t *crp, uint_t anchor_zoneid) 195 { 196 int error; 197 fmodsw_impl_t *fp; 198 199 if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) { 200 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO; 201 return (error); 202 } 203 if (stp->sd_pushcnt >= nstrpush) { 204 return (EINVAL); 205 } 206 207 if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) { 208 stp->sd_flag |= STREOPENFAIL; 209 return (EINVAL); 210 } 211 212 /* 213 * push new module and call its open routine via qattach 214 */ 215 if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0) 216 return (error); 217 218 /* 219 * Check to see if caller wants a STREAMS anchor 220 * put at this place in the stream, and add if so. 221 */ 222 mutex_enter(&stp->sd_lock); 223 if (anchor == stp->sd_pushcnt) { 224 stp->sd_anchor = stp->sd_pushcnt; 225 stp->sd_anchorzone = anchor_zoneid; 226 } 227 mutex_exit(&stp->sd_lock); 228 229 return (0); 230 } 231 232 /* 233 * Open a stream device. 234 */ 235 int 236 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp) 237 { 238 struct stdata *stp; 239 queue_t *qp; 240 int s; 241 dev_t dummydev, savedev; 242 struct autopush *ap; 243 struct dlautopush dlap; 244 int error = 0; 245 ssize_t rmin, rmax; 246 int cloneopen; 247 queue_t *brq; 248 major_t major; 249 str_stack_t *ss; 250 zoneid_t zoneid; 251 uint_t anchor; 252 253 if (audit_active) 254 audit_stropen(vp, devp, flag, crp); 255 256 /* 257 * If the stream already exists, wait for any open in progress 258 * to complete, then call the open function of each module and 259 * driver in the stream. Otherwise create the stream. 260 */ 261 TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp); 262 retry: 263 mutex_enter(&vp->v_lock); 264 if ((stp = vp->v_stream) != NULL) { 265 266 /* 267 * Waiting for stream to be created to device 268 * due to another open. 269 */ 270 mutex_exit(&vp->v_lock); 271 272 if (STRMATED(stp)) { 273 struct stdata *strmatep = stp->sd_mate; 274 275 STRLOCKMATES(stp); 276 if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 277 if (flag & (FNDELAY|FNONBLOCK)) { 278 error = EAGAIN; 279 mutex_exit(&strmatep->sd_lock); 280 goto ckreturn; 281 } 282 mutex_exit(&stp->sd_lock); 283 if (!cv_wait_sig(&strmatep->sd_monitor, 284 &strmatep->sd_lock)) { 285 error = EINTR; 286 mutex_exit(&strmatep->sd_lock); 287 mutex_enter(&stp->sd_lock); 288 goto ckreturn; 289 } 290 mutex_exit(&strmatep->sd_lock); 291 goto retry; 292 } 293 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 294 if (flag & (FNDELAY|FNONBLOCK)) { 295 error = EAGAIN; 296 mutex_exit(&strmatep->sd_lock); 297 goto ckreturn; 298 } 299 mutex_exit(&strmatep->sd_lock); 300 if (!cv_wait_sig(&stp->sd_monitor, 301 &stp->sd_lock)) { 302 error = EINTR; 303 goto ckreturn; 304 } 305 mutex_exit(&stp->sd_lock); 306 goto retry; 307 } 308 309 if (stp->sd_flag & (STRDERR|STWRERR)) { 310 error = EIO; 311 mutex_exit(&strmatep->sd_lock); 312 goto ckreturn; 313 } 314 315 stp->sd_flag |= STWOPEN; 316 STRUNLOCKMATES(stp); 317 } else { 318 mutex_enter(&stp->sd_lock); 319 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 320 if (flag & (FNDELAY|FNONBLOCK)) { 321 error = EAGAIN; 322 goto ckreturn; 323 } 324 if (!cv_wait_sig(&stp->sd_monitor, 325 &stp->sd_lock)) { 326 error = EINTR; 327 goto ckreturn; 328 } 329 mutex_exit(&stp->sd_lock); 330 goto retry; /* could be clone! */ 331 } 332 333 if (stp->sd_flag & (STRDERR|STWRERR)) { 334 error = EIO; 335 goto ckreturn; 336 } 337 338 stp->sd_flag |= STWOPEN; 339 mutex_exit(&stp->sd_lock); 340 } 341 342 /* 343 * Open all modules and devices down stream to notify 344 * that another user is streaming. For modules, set the 345 * last argument to MODOPEN and do not pass any open flags. 346 * Ignore dummydev since this is not the first open. 347 */ 348 claimstr(stp->sd_wrq); 349 qp = stp->sd_wrq; 350 while (_SAMESTR(qp)) { 351 qp = qp->q_next; 352 if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0) 353 break; 354 } 355 releasestr(stp->sd_wrq); 356 mutex_enter(&stp->sd_lock); 357 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR); 358 stp->sd_rerror = 0; 359 stp->sd_werror = 0; 360 ckreturn: 361 cv_broadcast(&stp->sd_monitor); 362 mutex_exit(&stp->sd_lock); 363 return (error); 364 } 365 366 /* 367 * This vnode isn't streaming. SPECFS already 368 * checked for multiple vnodes pointing to the 369 * same stream, so create a stream to the driver. 370 */ 371 qp = allocq(); 372 stp = shalloc(qp); 373 374 /* 375 * Initialize stream head. shalloc() has given us 376 * exclusive access, and we have the vnode locked; 377 * we can do whatever we want with stp. 378 */ 379 stp->sd_flag = STWOPEN; 380 stp->sd_siglist = NULL; 381 stp->sd_pollist.ph_list = NULL; 382 stp->sd_sigflags = 0; 383 stp->sd_mark = NULL; 384 stp->sd_closetime = STRTIMOUT; 385 stp->sd_sidp = NULL; 386 stp->sd_pgidp = NULL; 387 stp->sd_vnode = vp; 388 stp->sd_rerror = 0; 389 stp->sd_werror = 0; 390 stp->sd_wroff = 0; 391 stp->sd_tail = 0; 392 stp->sd_iocblk = NULL; 393 stp->sd_cmdblk = NULL; 394 stp->sd_pushcnt = 0; 395 stp->sd_qn_minpsz = 0; 396 stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */ 397 stp->sd_maxblk = INFPSZ; 398 qp->q_ptr = _WR(qp)->q_ptr = stp; 399 STREAM(qp) = STREAM(_WR(qp)) = stp; 400 vp->v_stream = stp; 401 mutex_exit(&vp->v_lock); 402 if (vp->v_type == VFIFO) { 403 stp->sd_flag |= OLDNDELAY; 404 /* 405 * This means, both for pipes and fifos 406 * strwrite will send SIGPIPE if the other 407 * end is closed. For putmsg it depends 408 * on whether it is a XPG4_2 application 409 * or not 410 */ 411 stp->sd_wput_opt = SW_SIGPIPE; 412 413 /* setq might sleep in kmem_alloc - avoid holding locks. */ 414 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE, 415 SQ_CI|SQ_CO, B_FALSE); 416 417 set_qend(qp); 418 stp->sd_strtab = fifo_getinfo(); 419 _WR(qp)->q_nfsrv = _WR(qp); 420 qp->q_nfsrv = qp; 421 /* 422 * Wake up others that are waiting for stream to be created. 423 */ 424 mutex_enter(&stp->sd_lock); 425 /* 426 * nothing is be pushed on stream yet, so 427 * optimized stream head packetsizes are just that 428 * of the read queue 429 */ 430 stp->sd_qn_minpsz = qp->q_minpsz; 431 stp->sd_qn_maxpsz = qp->q_maxpsz; 432 stp->sd_flag &= ~STWOPEN; 433 goto fifo_opendone; 434 } 435 /* setq might sleep in kmem_alloc - avoid holding locks. */ 436 setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE); 437 438 set_qend(qp); 439 440 /* 441 * Open driver and create stream to it (via qattach). 442 */ 443 savedev = *devp; 444 cloneopen = (getmajor(*devp) == clone_major); 445 if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) { 446 mutex_enter(&vp->v_lock); 447 vp->v_stream = NULL; 448 mutex_exit(&vp->v_lock); 449 mutex_enter(&stp->sd_lock); 450 cv_broadcast(&stp->sd_monitor); 451 mutex_exit(&stp->sd_lock); 452 freeq(_RD(qp)); 453 shfree(stp); 454 return (error); 455 } 456 /* 457 * Set sd_strtab after open in order to handle clonable drivers 458 */ 459 stp->sd_strtab = STREAMSTAB(getmajor(*devp)); 460 461 /* 462 * Historical note: dummydev used to be be prior to the initial 463 * open (via qattach above), which made the value seen 464 * inconsistent between an I_PUSH and an autopush of a module. 465 */ 466 dummydev = *devp; 467 468 /* 469 * For clone open of old style (Q not associated) network driver, 470 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH 471 */ 472 brq = _RD(_WR(qp)->q_next); 473 major = getmajor(*devp); 474 if (push_drcompat && cloneopen && NETWORK_DRV(major) && 475 ((brq->q_flag & _QASSOCIATED) == 0)) { 476 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0) 477 cmn_err(CE_WARN, "cannot push " DRMODNAME 478 " streams module"); 479 } 480 481 if (!NETWORK_DRV(major)) { 482 savedev = *devp; 483 } else { 484 /* 485 * For network devices, process differently based on the 486 * return value from dld_autopush(): 487 * 488 * 0: the passed-in device points to a GLDv3 datalink with 489 * per-link autopush configuration; use that configuration 490 * and ignore any per-driver autopush configuration. 491 * 492 * 1: the passed-in device points to a physical GLDv3 493 * datalink without per-link autopush configuration. The 494 * passed in device was changed to refer to the actual 495 * physical device (if it's not already); we use that new 496 * device to look up any per-driver autopush configuration. 497 * 498 * -1: neither of the above cases applied; use the initial 499 * device to look up any per-driver autopush configuration. 500 */ 501 switch (dld_autopush(&savedev, &dlap)) { 502 case 0: 503 zoneid = crgetzoneid(crp); 504 for (s = 0; s < dlap.dap_npush; s++) { 505 error = push_mod(qp, &dummydev, stp, 506 dlap.dap_aplist[s], dlap.dap_anchor, crp, 507 zoneid); 508 if (error != 0) 509 break; 510 } 511 goto opendone; 512 case 1: 513 break; 514 case -1: 515 savedev = *devp; 516 break; 517 } 518 } 519 /* 520 * Find the autopush configuration based on "savedev". Start with the 521 * global zone. If not found check in the local zone. 522 */ 523 zoneid = GLOBAL_ZONEID; 524 retryap: 525 ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))-> 526 netstack_str; 527 if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) { 528 netstack_rele(ss->ss_netstack); 529 if (zoneid == GLOBAL_ZONEID) { 530 /* 531 * None found. Also look in the zone's autopush table. 532 */ 533 zoneid = crgetzoneid(crp); 534 if (zoneid != GLOBAL_ZONEID) 535 goto retryap; 536 } 537 goto opendone; 538 } 539 anchor = ap->ap_anchor; 540 zoneid = crgetzoneid(crp); 541 for (s = 0; s < ap->ap_npush; s++) { 542 error = push_mod(qp, &dummydev, stp, ap->ap_list[s], 543 anchor, crp, zoneid); 544 if (error != 0) 545 break; 546 } 547 sad_ap_rele(ap, ss); 548 netstack_rele(ss->ss_netstack); 549 550 opendone: 551 552 /* 553 * let specfs know that open failed part way through 554 */ 555 if (error) { 556 mutex_enter(&stp->sd_lock); 557 stp->sd_flag |= STREOPENFAIL; 558 mutex_exit(&stp->sd_lock); 559 } 560 561 /* 562 * Wake up others that are waiting for stream to be created. 563 */ 564 mutex_enter(&stp->sd_lock); 565 stp->sd_flag &= ~STWOPEN; 566 567 /* 568 * As a performance concern we are caching the values of 569 * q_minpsz and q_maxpsz of the module below the stream 570 * head in the stream head. 571 */ 572 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 573 rmin = stp->sd_wrq->q_next->q_minpsz; 574 rmax = stp->sd_wrq->q_next->q_maxpsz; 575 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 576 577 /* do this processing here as a performance concern */ 578 if (strmsgsz != 0) { 579 if (rmax == INFPSZ) 580 rmax = strmsgsz; 581 else 582 rmax = MIN(strmsgsz, rmax); 583 } 584 585 mutex_enter(QLOCK(stp->sd_wrq)); 586 stp->sd_qn_minpsz = rmin; 587 stp->sd_qn_maxpsz = rmax; 588 mutex_exit(QLOCK(stp->sd_wrq)); 589 590 fifo_opendone: 591 cv_broadcast(&stp->sd_monitor); 592 mutex_exit(&stp->sd_lock); 593 return (error); 594 } 595 596 static int strsink(queue_t *, mblk_t *); 597 static struct qinit deadrend = { 598 strsink, NULL, NULL, NULL, NULL, &strm_info, NULL 599 }; 600 static struct qinit deadwend = { 601 NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL 602 }; 603 604 /* 605 * Close a stream. 606 * This is called from closef() on the last close of an open stream. 607 * Strclean() will already have removed the siglist and pollist 608 * information, so all that remains is to remove all multiplexor links 609 * for the stream, pop all the modules (and the driver), and free the 610 * stream structure. 611 */ 612 613 int 614 strclose(struct vnode *vp, int flag, cred_t *crp) 615 { 616 struct stdata *stp; 617 queue_t *qp; 618 int rval; 619 int freestp = 1; 620 queue_t *rmq; 621 622 if (audit_active) 623 audit_strclose(vp, flag, crp); 624 625 TRACE_1(TR_FAC_STREAMS_FR, 626 TR_STRCLOSE, "strclose:%p", vp); 627 ASSERT(vp->v_stream); 628 629 stp = vp->v_stream; 630 ASSERT(!(stp->sd_flag & STPLEX)); 631 qp = stp->sd_wrq; 632 633 /* 634 * Needed so that strpoll will return non-zero for this fd. 635 * Note that with POLLNOERR STRHUP does still cause POLLHUP. 636 */ 637 mutex_enter(&stp->sd_lock); 638 stp->sd_flag |= STRHUP; 639 mutex_exit(&stp->sd_lock); 640 641 /* 642 * If the registered process or process group did not have an 643 * open instance of this stream then strclean would not be 644 * called. Thus at the time of closing all remaining siglist entries 645 * are removed. 646 */ 647 if (stp->sd_siglist != NULL) 648 strcleanall(vp); 649 650 ASSERT(stp->sd_siglist == NULL); 651 ASSERT(stp->sd_sigflags == 0); 652 653 if (STRMATED(stp)) { 654 struct stdata *strmatep = stp->sd_mate; 655 int waited = 1; 656 657 STRLOCKMATES(stp); 658 while (waited) { 659 waited = 0; 660 while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 661 mutex_exit(&strmatep->sd_lock); 662 cv_wait(&stp->sd_monitor, &stp->sd_lock); 663 mutex_exit(&stp->sd_lock); 664 STRLOCKMATES(stp); 665 waited = 1; 666 } 667 while (strmatep->sd_flag & 668 (STWOPEN|STRCLOSE|STRPLUMB)) { 669 mutex_exit(&stp->sd_lock); 670 cv_wait(&strmatep->sd_monitor, 671 &strmatep->sd_lock); 672 mutex_exit(&strmatep->sd_lock); 673 STRLOCKMATES(stp); 674 waited = 1; 675 } 676 } 677 stp->sd_flag |= STRCLOSE; 678 STRUNLOCKMATES(stp); 679 } else { 680 mutex_enter(&stp->sd_lock); 681 stp->sd_flag |= STRCLOSE; 682 mutex_exit(&stp->sd_lock); 683 } 684 685 ASSERT(qp->q_first == NULL); /* No more delayed write */ 686 687 /* Check if an I_LINK was ever done on this stream */ 688 if (stp->sd_flag & STRHASLINKS) { 689 netstack_t *ns; 690 str_stack_t *ss; 691 692 ns = netstack_find_by_cred(crp); 693 ASSERT(ns != NULL); 694 ss = ns->netstack_str; 695 ASSERT(ss != NULL); 696 697 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss); 698 netstack_rele(ss->ss_netstack); 699 } 700 701 while (_SAMESTR(qp)) { 702 /* 703 * Holding sd_lock prevents q_next from changing in 704 * this stream. 705 */ 706 mutex_enter(&stp->sd_lock); 707 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) { 708 709 /* 710 * sleep until awakened by strwsrv() or timeout 711 */ 712 for (;;) { 713 mutex_enter(QLOCK(qp->q_next)); 714 if (!(qp->q_next->q_mblkcnt)) { 715 mutex_exit(QLOCK(qp->q_next)); 716 break; 717 } 718 stp->sd_flag |= WSLEEP; 719 720 /* ensure strwsrv gets enabled */ 721 qp->q_next->q_flag |= QWANTW; 722 mutex_exit(QLOCK(qp->q_next)); 723 /* get out if we timed out or recv'd a signal */ 724 if (str_cv_wait(&qp->q_wait, &stp->sd_lock, 725 stp->sd_closetime, 0) <= 0) { 726 break; 727 } 728 } 729 stp->sd_flag &= ~WSLEEP; 730 } 731 mutex_exit(&stp->sd_lock); 732 733 rmq = qp->q_next; 734 if (rmq->q_flag & QISDRV) { 735 ASSERT(!_SAMESTR(rmq)); 736 wait_sq_svc(_RD(qp)->q_syncq); 737 } 738 739 qdetach(_RD(rmq), 1, flag, crp, B_FALSE); 740 } 741 742 /* 743 * Since we call pollwakeup in close() now, the poll list should 744 * be empty in most cases. The only exception is the layered devices 745 * (e.g. the console drivers with redirection modules pushed on top 746 * of it). We have to do this after calling qdetach() because 747 * the redirection module won't have torn down the console 748 * redirection until after qdetach() has been invoked. 749 */ 750 if (stp->sd_pollist.ph_list != NULL) { 751 pollwakeup(&stp->sd_pollist, POLLERR); 752 pollhead_clean(&stp->sd_pollist); 753 } 754 ASSERT(stp->sd_pollist.ph_list == NULL); 755 ASSERT(stp->sd_sidp == NULL); 756 ASSERT(stp->sd_pgidp == NULL); 757 758 /* Prevent qenable from re-enabling the stream head queue */ 759 disable_svc(_RD(qp)); 760 761 /* 762 * Wait until service procedure of each queue is 763 * run, if QINSERVICE is set. 764 */ 765 wait_svc(_RD(qp)); 766 767 /* 768 * Now, flush both queues. 769 */ 770 flushq(_RD(qp), FLUSHALL); 771 flushq(qp, FLUSHALL); 772 773 /* 774 * If the write queue of the stream head is pointing to a 775 * read queue, we have a twisted stream. If the read queue 776 * is alive, convert the stream head queues into a dead end. 777 * If the read queue is dead, free the dead pair. 778 */ 779 if (qp->q_next && !_SAMESTR(qp)) { 780 if (qp->q_next->q_qinfo == &deadrend) { /* half-closed pipe */ 781 flushq(qp->q_next, FLUSHALL); /* ensure no message */ 782 shfree(qp->q_next->q_stream); 783 freeq(qp->q_next); 784 freeq(_RD(qp)); 785 } else if (qp->q_next == _RD(qp)) { /* fifo */ 786 freeq(_RD(qp)); 787 } else { /* pipe */ 788 freestp = 0; 789 /* 790 * The q_info pointers are never accessed when 791 * SQLOCK is held. 792 */ 793 ASSERT(qp->q_syncq == _RD(qp)->q_syncq); 794 mutex_enter(SQLOCK(qp->q_syncq)); 795 qp->q_qinfo = &deadwend; 796 _RD(qp)->q_qinfo = &deadrend; 797 mutex_exit(SQLOCK(qp->q_syncq)); 798 } 799 } else { 800 freeq(_RD(qp)); /* free stream head queue pair */ 801 } 802 803 mutex_enter(&vp->v_lock); 804 if (stp->sd_iocblk) { 805 if (stp->sd_iocblk != (mblk_t *)-1) { 806 freemsg(stp->sd_iocblk); 807 } 808 stp->sd_iocblk = NULL; 809 } 810 stp->sd_vnode = NULL; 811 vp->v_stream = NULL; 812 mutex_exit(&vp->v_lock); 813 mutex_enter(&stp->sd_lock); 814 freemsg(stp->sd_cmdblk); 815 stp->sd_cmdblk = NULL; 816 stp->sd_flag &= ~STRCLOSE; 817 cv_broadcast(&stp->sd_monitor); 818 mutex_exit(&stp->sd_lock); 819 820 if (freestp) 821 shfree(stp); 822 return (0); 823 } 824 825 static int 826 strsink(queue_t *q, mblk_t *bp) 827 { 828 struct copyresp *resp; 829 830 switch (bp->b_datap->db_type) { 831 case M_FLUSH: 832 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 833 *bp->b_rptr &= ~FLUSHR; 834 bp->b_flag |= MSGNOLOOP; 835 /* 836 * Protect against the driver passing up 837 * messages after it has done a qprocsoff. 838 */ 839 if (_OTHERQ(q)->q_next == NULL) 840 freemsg(bp); 841 else 842 qreply(q, bp); 843 } else { 844 freemsg(bp); 845 } 846 break; 847 848 case M_COPYIN: 849 case M_COPYOUT: 850 if (bp->b_cont) { 851 freemsg(bp->b_cont); 852 bp->b_cont = NULL; 853 } 854 bp->b_datap->db_type = M_IOCDATA; 855 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 856 resp = (struct copyresp *)bp->b_rptr; 857 resp->cp_rval = (caddr_t)1; /* failure */ 858 /* 859 * Protect against the driver passing up 860 * messages after it has done a qprocsoff. 861 */ 862 if (_OTHERQ(q)->q_next == NULL) 863 freemsg(bp); 864 else 865 qreply(q, bp); 866 break; 867 868 case M_IOCTL: 869 if (bp->b_cont) { 870 freemsg(bp->b_cont); 871 bp->b_cont = NULL; 872 } 873 bp->b_datap->db_type = M_IOCNAK; 874 /* 875 * Protect against the driver passing up 876 * messages after it has done a qprocsoff. 877 */ 878 if (_OTHERQ(q)->q_next == NULL) 879 freemsg(bp); 880 else 881 qreply(q, bp); 882 break; 883 884 default: 885 freemsg(bp); 886 break; 887 } 888 889 return (0); 890 } 891 892 /* 893 * Clean up after a process when it closes a stream. This is called 894 * from closef for all closes, whereas strclose is called only for the 895 * last close on a stream. The siglist is scanned for entries for the 896 * current process, and these are removed. 897 */ 898 void 899 strclean(struct vnode *vp) 900 { 901 strsig_t *ssp, *pssp, *tssp; 902 stdata_t *stp; 903 int update = 0; 904 905 TRACE_1(TR_FAC_STREAMS_FR, 906 TR_STRCLEAN, "strclean:%p", vp); 907 stp = vp->v_stream; 908 pssp = NULL; 909 mutex_enter(&stp->sd_lock); 910 ssp = stp->sd_siglist; 911 while (ssp) { 912 if (ssp->ss_pidp == curproc->p_pidp) { 913 tssp = ssp->ss_next; 914 if (pssp) 915 pssp->ss_next = tssp; 916 else 917 stp->sd_siglist = tssp; 918 mutex_enter(&pidlock); 919 PID_RELE(ssp->ss_pidp); 920 mutex_exit(&pidlock); 921 kmem_free(ssp, sizeof (strsig_t)); 922 update = 1; 923 ssp = tssp; 924 } else { 925 pssp = ssp; 926 ssp = ssp->ss_next; 927 } 928 } 929 if (update) { 930 stp->sd_sigflags = 0; 931 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 932 stp->sd_sigflags |= ssp->ss_events; 933 } 934 mutex_exit(&stp->sd_lock); 935 } 936 937 /* 938 * Used on the last close to remove any remaining items on the siglist. 939 * These could be present on the siglist due to I_ESETSIG calls that 940 * use process groups or processed that do not have an open file descriptor 941 * for this stream (Such entries would not be removed by strclean). 942 */ 943 static void 944 strcleanall(struct vnode *vp) 945 { 946 strsig_t *ssp, *nssp; 947 stdata_t *stp; 948 949 stp = vp->v_stream; 950 mutex_enter(&stp->sd_lock); 951 ssp = stp->sd_siglist; 952 stp->sd_siglist = NULL; 953 while (ssp) { 954 nssp = ssp->ss_next; 955 mutex_enter(&pidlock); 956 PID_RELE(ssp->ss_pidp); 957 mutex_exit(&pidlock); 958 kmem_free(ssp, sizeof (strsig_t)); 959 ssp = nssp; 960 } 961 stp->sd_sigflags = 0; 962 mutex_exit(&stp->sd_lock); 963 } 964 965 /* 966 * Retrieve the next message from the logical stream head read queue 967 * using either rwnext (if sync stream) or getq_noenab. 968 * It is the callers responsibility to call qbackenable after 969 * it is finished with the message. The caller should not call 970 * qbackenable until after any putback calls to avoid spurious backenabling. 971 */ 972 mblk_t * 973 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first, 974 int *errorp) 975 { 976 mblk_t *bp; 977 int error; 978 ssize_t rbytes = 0; 979 980 /* Holding sd_lock prevents the read queue from changing */ 981 ASSERT(MUTEX_HELD(&stp->sd_lock)); 982 983 if (uiop != NULL && stp->sd_struiordq != NULL && 984 q->q_first == NULL && 985 (!first || (stp->sd_wakeq & RSLEEP))) { 986 /* 987 * Stream supports rwnext() for the read side. 988 * If this is the first time we're called by e.g. strread 989 * only do the downcall if there is a deferred wakeup 990 * (registered in sd_wakeq). 991 */ 992 struiod_t uiod; 993 994 if (first) 995 stp->sd_wakeq &= ~RSLEEP; 996 997 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 998 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 999 uiod.d_mp = 0; 1000 /* 1001 * Mark that a thread is in rwnext on the read side 1002 * to prevent strrput from nacking ioctls immediately. 1003 * When the last concurrent rwnext returns 1004 * the ioctls are nack'ed. 1005 */ 1006 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1007 stp->sd_struiodnak++; 1008 /* 1009 * Note: rwnext will drop sd_lock. 1010 */ 1011 error = rwnext(q, &uiod); 1012 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 1013 mutex_enter(&stp->sd_lock); 1014 stp->sd_struiodnak--; 1015 while (stp->sd_struiodnak == 0 && 1016 ((bp = stp->sd_struionak) != NULL)) { 1017 stp->sd_struionak = bp->b_next; 1018 bp->b_next = NULL; 1019 bp->b_datap->db_type = M_IOCNAK; 1020 /* 1021 * Protect against the driver passing up 1022 * messages after it has done a qprocsoff. 1023 */ 1024 if (_OTHERQ(q)->q_next == NULL) 1025 freemsg(bp); 1026 else { 1027 mutex_exit(&stp->sd_lock); 1028 qreply(q, bp); 1029 mutex_enter(&stp->sd_lock); 1030 } 1031 } 1032 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1033 if (error == 0 || error == EWOULDBLOCK) { 1034 if ((bp = uiod.d_mp) != NULL) { 1035 *errorp = 0; 1036 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1037 return (bp); 1038 } 1039 error = 0; 1040 } else if (error == EINVAL) { 1041 /* 1042 * The stream plumbing must have 1043 * changed while we were away, so 1044 * just turn off rwnext()s. 1045 */ 1046 error = 0; 1047 } else if (error == EBUSY) { 1048 /* 1049 * The module might have data in transit using putnext 1050 * Fall back on waiting + getq. 1051 */ 1052 error = 0; 1053 } else { 1054 *errorp = error; 1055 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1056 return (NULL); 1057 } 1058 /* 1059 * Try a getq in case a rwnext() generated mblk 1060 * has bubbled up via strrput(). 1061 */ 1062 } 1063 *errorp = 0; 1064 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1065 1066 /* 1067 * If we have a valid uio, try and use this as a guide for how 1068 * many bytes to retrieve from the queue via getq_noenab(). 1069 * Doing this can avoid unneccesary counting of overlong 1070 * messages in putback(). We currently only do this for sockets 1071 * and only if there is no sd_rputdatafunc hook. 1072 * 1073 * The sd_rputdatafunc hook transforms the entire message 1074 * before any bytes in it can be given to a client. So, rbytes 1075 * must be 0 if there is a hook. 1076 */ 1077 if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) && 1078 (stp->sd_rputdatafunc == NULL)) 1079 rbytes = uiop->uio_resid; 1080 1081 return (getq_noenab(q, rbytes)); 1082 } 1083 1084 /* 1085 * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'. 1086 * If the message does not fit in the uio the remainder of it is returned; 1087 * otherwise NULL is returned. Any embedded zero-length mblk_t's are 1088 * consumed, even if uio_resid reaches zero. On error, `*errorp' is set to 1089 * the error code, the message is consumed, and NULL is returned. 1090 */ 1091 static mblk_t * 1092 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp) 1093 { 1094 int error; 1095 ptrdiff_t n; 1096 mblk_t *nbp; 1097 1098 ASSERT(bp->b_wptr >= bp->b_rptr); 1099 1100 do { 1101 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) { 1102 ASSERT(n > 0); 1103 1104 error = uiomove(bp->b_rptr, n, UIO_READ, uiop); 1105 if (error != 0) { 1106 freemsg(bp); 1107 *errorp = error; 1108 return (NULL); 1109 } 1110 } 1111 1112 bp->b_rptr += n; 1113 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) { 1114 nbp = bp; 1115 bp = bp->b_cont; 1116 freeb(nbp); 1117 } 1118 } while (bp != NULL && uiop->uio_resid > 0); 1119 1120 *errorp = 0; 1121 return (bp); 1122 } 1123 1124 /* 1125 * Read a stream according to the mode flags in sd_flag: 1126 * 1127 * (default mode) - Byte stream, msg boundaries are ignored 1128 * RD_MSGDIS (msg discard) - Read on msg boundaries and throw away 1129 * any data remaining in msg 1130 * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back 1131 * any remaining data on head of read queue 1132 * 1133 * Consume readable messages on the front of the queue until 1134 * ttolwp(curthread)->lwp_count 1135 * is satisfied, the readable messages are exhausted, or a message 1136 * boundary is reached in a message mode. If no data was read and 1137 * the stream was not opened with the NDELAY flag, block until data arrives. 1138 * Otherwise return the data read and update the count. 1139 * 1140 * In default mode a 0 length message signifies end-of-file and terminates 1141 * a read in progress. The 0 length message is removed from the queue 1142 * only if it is the only message read (no data is read). 1143 * 1144 * An attempt to read an M_PROTO or M_PCPROTO message results in an 1145 * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set. 1146 * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data. 1147 * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message 1148 * are unlinked from and M_DATA blocks in the message, the protos are 1149 * thrown away, and the data is read. 1150 */ 1151 /* ARGSUSED */ 1152 int 1153 strread(struct vnode *vp, struct uio *uiop, cred_t *crp) 1154 { 1155 struct stdata *stp; 1156 mblk_t *bp, *nbp; 1157 queue_t *q; 1158 int error = 0; 1159 uint_t old_sd_flag; 1160 int first; 1161 char rflg; 1162 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 1163 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 1164 short delim; 1165 unsigned char pri = 0; 1166 char waitflag; 1167 unsigned char type; 1168 1169 TRACE_1(TR_FAC_STREAMS_FR, 1170 TR_STRREAD_ENTER, "strread:%p", vp); 1171 ASSERT(vp->v_stream); 1172 stp = vp->v_stream; 1173 1174 mutex_enter(&stp->sd_lock); 1175 1176 if ((error = i_straccess(stp, JCREAD)) != 0) { 1177 mutex_exit(&stp->sd_lock); 1178 return (error); 1179 } 1180 1181 if (stp->sd_flag & (STRDERR|STPLEX)) { 1182 error = strgeterr(stp, STRDERR|STPLEX, 0); 1183 if (error != 0) { 1184 mutex_exit(&stp->sd_lock); 1185 return (error); 1186 } 1187 } 1188 1189 /* 1190 * Loop terminates when uiop->uio_resid == 0. 1191 */ 1192 rflg = 0; 1193 waitflag = READWAIT; 1194 q = _RD(stp->sd_wrq); 1195 for (;;) { 1196 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1197 old_sd_flag = stp->sd_flag; 1198 mark = 0; 1199 delim = 0; 1200 first = 1; 1201 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) { 1202 int done = 0; 1203 1204 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1205 1206 if (error != 0) 1207 goto oops; 1208 1209 if (stp->sd_flag & (STRHUP|STREOF)) { 1210 goto oops; 1211 } 1212 if (rflg && !(stp->sd_flag & STRDELIM)) { 1213 goto oops; 1214 } 1215 /* 1216 * If a read(fd,buf,0) has been done, there is no 1217 * need to sleep. We always have zero bytes to 1218 * return. 1219 */ 1220 if (uiop->uio_resid == 0) { 1221 goto oops; 1222 } 1223 1224 qbackenable(q, 0); 1225 1226 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT, 1227 "strread calls strwaitq:%p, %p, %p", 1228 vp, uiop, crp); 1229 if ((error = strwaitq(stp, waitflag, uiop->uio_resid, 1230 uiop->uio_fmode, -1, &done)) != 0 || done) { 1231 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE, 1232 "strread error or done:%p, %p, %p", 1233 vp, uiop, crp); 1234 if ((uiop->uio_fmode & FNDELAY) && 1235 (stp->sd_flag & OLDNDELAY) && 1236 (error == EAGAIN)) 1237 error = 0; 1238 goto oops; 1239 } 1240 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE, 1241 "strread awakes:%p, %p, %p", vp, uiop, crp); 1242 if ((error = i_straccess(stp, JCREAD)) != 0) { 1243 goto oops; 1244 } 1245 first = 0; 1246 } 1247 1248 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1249 ASSERT(bp); 1250 pri = bp->b_band; 1251 /* 1252 * Extract any mark information. If the message is not 1253 * completely consumed this information will be put in the mblk 1254 * that is putback. 1255 * If MSGMARKNEXT is set and the message is completely consumed 1256 * the STRATMARK flag will be set below. Likewise, if 1257 * MSGNOTMARKNEXT is set and the message is 1258 * completely consumed STRNOTATMARK will be set. 1259 * 1260 * For some unknown reason strread only breaks the read at the 1261 * last mark. 1262 */ 1263 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 1264 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 1265 (MSGMARKNEXT|MSGNOTMARKNEXT)); 1266 if (mark != 0 && bp == stp->sd_mark) { 1267 if (rflg) { 1268 putback(stp, q, bp, pri); 1269 goto oops; 1270 } 1271 mark |= _LASTMARK; 1272 stp->sd_mark = NULL; 1273 } 1274 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM)) 1275 delim = 1; 1276 mutex_exit(&stp->sd_lock); 1277 1278 if (STREAM_NEEDSERVICE(stp)) 1279 stream_runservice(stp); 1280 1281 type = bp->b_datap->db_type; 1282 1283 switch (type) { 1284 1285 case M_DATA: 1286 ismdata: 1287 if (msgnodata(bp)) { 1288 if (mark || delim) { 1289 freemsg(bp); 1290 } else if (rflg) { 1291 1292 /* 1293 * If already read data put zero 1294 * length message back on queue else 1295 * free msg and return 0. 1296 */ 1297 bp->b_band = pri; 1298 mutex_enter(&stp->sd_lock); 1299 putback(stp, q, bp, pri); 1300 mutex_exit(&stp->sd_lock); 1301 } else { 1302 freemsg(bp); 1303 } 1304 error = 0; 1305 goto oops1; 1306 } 1307 1308 rflg = 1; 1309 waitflag |= NOINTR; 1310 bp = struiocopyout(bp, uiop, &error); 1311 if (error != 0) 1312 goto oops1; 1313 1314 mutex_enter(&stp->sd_lock); 1315 if (bp) { 1316 /* 1317 * Have remaining data in message. 1318 * Free msg if in discard mode. 1319 */ 1320 if (stp->sd_read_opt & RD_MSGDIS) { 1321 freemsg(bp); 1322 } else { 1323 bp->b_band = pri; 1324 if ((mark & _LASTMARK) && 1325 (stp->sd_mark == NULL)) 1326 stp->sd_mark = bp; 1327 bp->b_flag |= mark & ~_LASTMARK; 1328 if (delim) 1329 bp->b_flag |= MSGDELIM; 1330 if (msgnodata(bp)) 1331 freemsg(bp); 1332 else 1333 putback(stp, q, bp, pri); 1334 } 1335 } else { 1336 /* 1337 * Consumed the complete message. 1338 * Move the MSG*MARKNEXT information 1339 * to the stream head just in case 1340 * the read queue becomes empty. 1341 * 1342 * If the stream head was at the mark 1343 * (STRATMARK) before we dropped sd_lock above 1344 * and some data was consumed then we have 1345 * moved past the mark thus STRATMARK is 1346 * cleared. However, if a message arrived in 1347 * strrput during the copyout above causing 1348 * STRATMARK to be set we can not clear that 1349 * flag. 1350 */ 1351 if (mark & 1352 (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 1353 if (mark & MSGMARKNEXT) { 1354 stp->sd_flag &= ~STRNOTATMARK; 1355 stp->sd_flag |= STRATMARK; 1356 } else if (mark & MSGNOTMARKNEXT) { 1357 stp->sd_flag &= ~STRATMARK; 1358 stp->sd_flag |= STRNOTATMARK; 1359 } else { 1360 stp->sd_flag &= 1361 ~(STRATMARK|STRNOTATMARK); 1362 } 1363 } else if (rflg && (old_sd_flag & STRATMARK)) { 1364 stp->sd_flag &= ~STRATMARK; 1365 } 1366 } 1367 1368 /* 1369 * Check for signal messages at the front of the read 1370 * queue and generate the signal(s) if appropriate. 1371 * The only signal that can be on queue is M_SIG at 1372 * this point. 1373 */ 1374 while ((((bp = q->q_first)) != NULL) && 1375 (bp->b_datap->db_type == M_SIG)) { 1376 bp = getq_noenab(q, 0); 1377 /* 1378 * sd_lock is held so the content of the 1379 * read queue can not change. 1380 */ 1381 ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG); 1382 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 1383 mutex_exit(&stp->sd_lock); 1384 freemsg(bp); 1385 if (STREAM_NEEDSERVICE(stp)) 1386 stream_runservice(stp); 1387 mutex_enter(&stp->sd_lock); 1388 } 1389 1390 if ((uiop->uio_resid == 0) || (mark & _LASTMARK) || 1391 delim || 1392 (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) { 1393 goto oops; 1394 } 1395 continue; 1396 1397 case M_SIG: 1398 strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band); 1399 freemsg(bp); 1400 mutex_enter(&stp->sd_lock); 1401 continue; 1402 1403 case M_PROTO: 1404 case M_PCPROTO: 1405 /* 1406 * Only data messages are readable. 1407 * Any others generate an error, unless 1408 * RD_PROTDIS or RD_PROTDAT is set. 1409 */ 1410 if (stp->sd_read_opt & RD_PROTDAT) { 1411 for (nbp = bp; nbp; nbp = nbp->b_next) { 1412 if ((nbp->b_datap->db_type == 1413 M_PROTO) || 1414 (nbp->b_datap->db_type == 1415 M_PCPROTO)) { 1416 nbp->b_datap->db_type = M_DATA; 1417 } else { 1418 break; 1419 } 1420 } 1421 /* 1422 * clear stream head hi pri flag based on 1423 * first message 1424 */ 1425 if (type == M_PCPROTO) { 1426 mutex_enter(&stp->sd_lock); 1427 stp->sd_flag &= ~STRPRI; 1428 mutex_exit(&stp->sd_lock); 1429 } 1430 goto ismdata; 1431 } else if (stp->sd_read_opt & RD_PROTDIS) { 1432 /* 1433 * discard non-data messages 1434 */ 1435 while (bp && 1436 ((bp->b_datap->db_type == M_PROTO) || 1437 (bp->b_datap->db_type == M_PCPROTO))) { 1438 nbp = unlinkb(bp); 1439 freeb(bp); 1440 bp = nbp; 1441 } 1442 /* 1443 * clear stream head hi pri flag based on 1444 * first message 1445 */ 1446 if (type == M_PCPROTO) { 1447 mutex_enter(&stp->sd_lock); 1448 stp->sd_flag &= ~STRPRI; 1449 mutex_exit(&stp->sd_lock); 1450 } 1451 if (bp) { 1452 bp->b_band = pri; 1453 goto ismdata; 1454 } else { 1455 break; 1456 } 1457 } 1458 /* FALLTHRU */ 1459 case M_PASSFP: 1460 if ((bp->b_datap->db_type == M_PASSFP) && 1461 (stp->sd_read_opt & RD_PROTDIS)) { 1462 freemsg(bp); 1463 break; 1464 } 1465 mutex_enter(&stp->sd_lock); 1466 putback(stp, q, bp, pri); 1467 mutex_exit(&stp->sd_lock); 1468 if (rflg == 0) 1469 error = EBADMSG; 1470 goto oops1; 1471 1472 default: 1473 /* 1474 * Garbage on stream head read queue. 1475 */ 1476 cmn_err(CE_WARN, "bad %x found at stream head\n", 1477 bp->b_datap->db_type); 1478 freemsg(bp); 1479 goto oops1; 1480 } 1481 mutex_enter(&stp->sd_lock); 1482 } 1483 oops: 1484 mutex_exit(&stp->sd_lock); 1485 oops1: 1486 qbackenable(q, pri); 1487 return (error); 1488 #undef _LASTMARK 1489 } 1490 1491 /* 1492 * Default processing of M_PROTO/M_PCPROTO messages. 1493 * Determine which wakeups and signals are needed. 1494 * This can be replaced by a user-specified procedure for kernel users 1495 * of STREAMS. 1496 */ 1497 /* ARGSUSED */ 1498 mblk_t * 1499 strrput_proto(vnode_t *vp, mblk_t *mp, 1500 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1501 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1502 { 1503 *wakeups = RSLEEP; 1504 *allmsgsigs = 0; 1505 1506 switch (mp->b_datap->db_type) { 1507 case M_PROTO: 1508 if (mp->b_band == 0) { 1509 *firstmsgsigs = S_INPUT | S_RDNORM; 1510 *pollwakeups = POLLIN | POLLRDNORM; 1511 } else { 1512 *firstmsgsigs = S_INPUT | S_RDBAND; 1513 *pollwakeups = POLLIN | POLLRDBAND; 1514 } 1515 break; 1516 case M_PCPROTO: 1517 *firstmsgsigs = S_HIPRI; 1518 *pollwakeups = POLLPRI; 1519 break; 1520 } 1521 return (mp); 1522 } 1523 1524 /* 1525 * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and 1526 * M_PASSFP messages. 1527 * Determine which wakeups and signals are needed. 1528 * This can be replaced by a user-specified procedure for kernel users 1529 * of STREAMS. 1530 */ 1531 /* ARGSUSED */ 1532 mblk_t * 1533 strrput_misc(vnode_t *vp, mblk_t *mp, 1534 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1535 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1536 { 1537 *wakeups = 0; 1538 *firstmsgsigs = 0; 1539 *allmsgsigs = 0; 1540 *pollwakeups = 0; 1541 return (mp); 1542 } 1543 1544 /* 1545 * Stream read put procedure. Called from downstream driver/module 1546 * with messages for the stream head. Data, protocol, and in-stream 1547 * signal messages are placed on the queue, others are handled directly. 1548 */ 1549 int 1550 strrput(queue_t *q, mblk_t *bp) 1551 { 1552 struct stdata *stp; 1553 ulong_t rput_opt; 1554 strwakeup_t wakeups; 1555 strsigset_t firstmsgsigs; /* Signals if first message on queue */ 1556 strsigset_t allmsgsigs; /* Signals for all messages */ 1557 strsigset_t signals; /* Signals events to generate */ 1558 strpollset_t pollwakeups; 1559 mblk_t *nextbp; 1560 uchar_t band = 0; 1561 int hipri_sig; 1562 1563 stp = (struct stdata *)q->q_ptr; 1564 /* 1565 * Use rput_opt for optimized access to the SR_ flags except 1566 * SR_POLLIN. That flag has to be checked under sd_lock since it 1567 * is modified by strpoll(). 1568 */ 1569 rput_opt = stp->sd_rput_opt; 1570 1571 ASSERT(qclaimed(q)); 1572 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER, 1573 "strrput called with message type:q %p bp %p", q, bp); 1574 1575 /* 1576 * Perform initial processing and pass to the parameterized functions. 1577 */ 1578 ASSERT(bp->b_next == NULL); 1579 1580 switch (bp->b_datap->db_type) { 1581 case M_DATA: 1582 /* 1583 * sockfs is the only consumer of STREOF and when it is set, 1584 * it implies that the receiver is not interested in receiving 1585 * any more data, hence the mblk is freed to prevent unnecessary 1586 * message queueing at the stream head. 1587 */ 1588 if (stp->sd_flag == STREOF) { 1589 freemsg(bp); 1590 return (0); 1591 } 1592 if ((rput_opt & SR_IGN_ZEROLEN) && 1593 bp->b_rptr == bp->b_wptr && msgnodata(bp)) { 1594 /* 1595 * Ignore zero-length M_DATA messages. These might be 1596 * generated by some transports. 1597 * The zero-length M_DATA messages, even if they 1598 * are ignored, should effect the atmark tracking and 1599 * should wake up a thread sleeping in strwaitmark. 1600 */ 1601 mutex_enter(&stp->sd_lock); 1602 if (bp->b_flag & MSGMARKNEXT) { 1603 /* 1604 * Record the position of the mark either 1605 * in q_last or in STRATMARK. 1606 */ 1607 if (q->q_last != NULL) { 1608 q->q_last->b_flag &= ~MSGNOTMARKNEXT; 1609 q->q_last->b_flag |= MSGMARKNEXT; 1610 } else { 1611 stp->sd_flag &= ~STRNOTATMARK; 1612 stp->sd_flag |= STRATMARK; 1613 } 1614 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1615 /* 1616 * Record that this is not the position of 1617 * the mark either in q_last or in 1618 * STRNOTATMARK. 1619 */ 1620 if (q->q_last != NULL) { 1621 q->q_last->b_flag &= ~MSGMARKNEXT; 1622 q->q_last->b_flag |= MSGNOTMARKNEXT; 1623 } else { 1624 stp->sd_flag &= ~STRATMARK; 1625 stp->sd_flag |= STRNOTATMARK; 1626 } 1627 } 1628 if (stp->sd_flag & RSLEEP) { 1629 stp->sd_flag &= ~RSLEEP; 1630 cv_broadcast(&q->q_wait); 1631 } 1632 mutex_exit(&stp->sd_lock); 1633 freemsg(bp); 1634 return (0); 1635 } 1636 wakeups = RSLEEP; 1637 if (bp->b_band == 0) { 1638 firstmsgsigs = S_INPUT | S_RDNORM; 1639 pollwakeups = POLLIN | POLLRDNORM; 1640 } else { 1641 firstmsgsigs = S_INPUT | S_RDBAND; 1642 pollwakeups = POLLIN | POLLRDBAND; 1643 } 1644 if (rput_opt & SR_SIGALLDATA) 1645 allmsgsigs = firstmsgsigs; 1646 else 1647 allmsgsigs = 0; 1648 1649 mutex_enter(&stp->sd_lock); 1650 if ((rput_opt & SR_CONSOL_DATA) && 1651 (q->q_last != NULL) && 1652 (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) { 1653 /* 1654 * Consolidate an M_DATA message onto an M_DATA, 1655 * M_PROTO, or M_PCPROTO by merging it with q_last. 1656 * The consolidation does not take place if 1657 * the old message is marked with either of the 1658 * marks or the delim flag or if the new 1659 * message is marked with MSGMARK. The MSGMARK 1660 * check is needed to handle the odd semantics of 1661 * MSGMARK where essentially the whole message 1662 * is to be treated as marked. 1663 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from the 1664 * new message to the front of the b_cont chain. 1665 */ 1666 mblk_t *lbp = q->q_last; 1667 unsigned char db_type = lbp->b_datap->db_type; 1668 1669 if ((db_type == M_DATA || db_type == M_PROTO || 1670 db_type == M_PCPROTO) && 1671 !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) { 1672 rmvq_noenab(q, lbp); 1673 /* 1674 * The first message in the b_cont list 1675 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 1676 * We need to handle the case where we 1677 * are appending: 1678 * 1679 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 1680 * 2) a MSGMARKNEXT to a plain message. 1681 * 3) a MSGNOTMARKNEXT to a plain message 1682 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 1683 * message. 1684 * 1685 * Thus we never append a MSGMARKNEXT or 1686 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 1687 */ 1688 if (bp->b_flag & MSGMARKNEXT) { 1689 lbp->b_flag |= MSGMARKNEXT; 1690 lbp->b_flag &= ~MSGNOTMARKNEXT; 1691 bp->b_flag &= ~MSGMARKNEXT; 1692 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1693 lbp->b_flag |= MSGNOTMARKNEXT; 1694 bp->b_flag &= ~MSGNOTMARKNEXT; 1695 } 1696 1697 linkb(lbp, bp); 1698 bp = lbp; 1699 /* 1700 * The new message logically isn't the first 1701 * even though the q_first check below thinks 1702 * it is. Clear the firstmsgsigs to make it 1703 * not appear to be first. 1704 */ 1705 firstmsgsigs = 0; 1706 } 1707 } 1708 break; 1709 1710 case M_PASSFP: 1711 wakeups = RSLEEP; 1712 allmsgsigs = 0; 1713 if (bp->b_band == 0) { 1714 firstmsgsigs = S_INPUT | S_RDNORM; 1715 pollwakeups = POLLIN | POLLRDNORM; 1716 } else { 1717 firstmsgsigs = S_INPUT | S_RDBAND; 1718 pollwakeups = POLLIN | POLLRDBAND; 1719 } 1720 mutex_enter(&stp->sd_lock); 1721 break; 1722 1723 case M_PROTO: 1724 case M_PCPROTO: 1725 ASSERT(stp->sd_rprotofunc != NULL); 1726 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp, 1727 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1728 #define ALLSIG (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\ 1729 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG) 1730 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\ 1731 POLLWRBAND) 1732 1733 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1734 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1735 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1736 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1737 1738 mutex_enter(&stp->sd_lock); 1739 break; 1740 1741 default: 1742 ASSERT(stp->sd_rmiscfunc != NULL); 1743 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp, 1744 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1745 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1746 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1747 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1748 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1749 #undef ALLSIG 1750 #undef ALLPOLL 1751 mutex_enter(&stp->sd_lock); 1752 break; 1753 } 1754 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1755 1756 /* By default generate superset of signals */ 1757 signals = (firstmsgsigs | allmsgsigs); 1758 1759 /* 1760 * The proto and misc functions can return multiple messages 1761 * as a b_next chain. Such messages are processed separately. 1762 */ 1763 one_more: 1764 hipri_sig = 0; 1765 if (bp == NULL) { 1766 nextbp = NULL; 1767 } else { 1768 nextbp = bp->b_next; 1769 bp->b_next = NULL; 1770 1771 switch (bp->b_datap->db_type) { 1772 case M_PCPROTO: 1773 /* 1774 * Only one priority protocol message is allowed at the 1775 * stream head at a time. 1776 */ 1777 if (stp->sd_flag & STRPRI) { 1778 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR, 1779 "M_PCPROTO already at head"); 1780 freemsg(bp); 1781 mutex_exit(&stp->sd_lock); 1782 goto done; 1783 } 1784 stp->sd_flag |= STRPRI; 1785 hipri_sig = 1; 1786 /* FALLTHRU */ 1787 case M_DATA: 1788 case M_PROTO: 1789 case M_PASSFP: 1790 band = bp->b_band; 1791 /* 1792 * Marking doesn't work well when messages 1793 * are marked in more than one band. We only 1794 * remember the last message received, even if 1795 * it is placed on the queue ahead of other 1796 * marked messages. 1797 */ 1798 if (bp->b_flag & MSGMARK) 1799 stp->sd_mark = bp; 1800 (void) putq(q, bp); 1801 1802 /* 1803 * If message is a PCPROTO message, always use 1804 * firstmsgsigs to determine if a signal should be 1805 * sent as strrput is the only place to send 1806 * signals for PCPROTO. Other messages are based on 1807 * the STRGETINPROG flag. The flag determines if 1808 * strrput or (k)strgetmsg will be responsible for 1809 * sending the signals, in the firstmsgsigs case. 1810 */ 1811 if ((hipri_sig == 1) || 1812 (((stp->sd_flag & STRGETINPROG) == 0) && 1813 (q->q_first == bp))) 1814 signals = (firstmsgsigs | allmsgsigs); 1815 else 1816 signals = allmsgsigs; 1817 break; 1818 1819 default: 1820 mutex_exit(&stp->sd_lock); 1821 (void) strrput_nondata(q, bp); 1822 mutex_enter(&stp->sd_lock); 1823 break; 1824 } 1825 } 1826 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1827 /* 1828 * Wake sleeping read/getmsg and cancel deferred wakeup 1829 */ 1830 if (wakeups & RSLEEP) 1831 stp->sd_wakeq &= ~RSLEEP; 1832 1833 wakeups &= stp->sd_flag; 1834 if (wakeups & RSLEEP) { 1835 stp->sd_flag &= ~RSLEEP; 1836 cv_broadcast(&q->q_wait); 1837 } 1838 if (wakeups & WSLEEP) { 1839 stp->sd_flag &= ~WSLEEP; 1840 cv_broadcast(&_WR(q)->q_wait); 1841 } 1842 1843 if (pollwakeups != 0) { 1844 if (pollwakeups == (POLLIN | POLLRDNORM)) { 1845 /* 1846 * Can't use rput_opt since it was not 1847 * read when sd_lock was held and SR_POLLIN is changed 1848 * by strpoll() under sd_lock. 1849 */ 1850 if (!(stp->sd_rput_opt & SR_POLLIN)) 1851 goto no_pollwake; 1852 stp->sd_rput_opt &= ~SR_POLLIN; 1853 } 1854 mutex_exit(&stp->sd_lock); 1855 pollwakeup(&stp->sd_pollist, pollwakeups); 1856 mutex_enter(&stp->sd_lock); 1857 } 1858 no_pollwake: 1859 1860 /* 1861 * strsendsig can handle multiple signals with a 1862 * single call. 1863 */ 1864 if (stp->sd_sigflags & signals) 1865 strsendsig(stp->sd_siglist, signals, band, 0); 1866 mutex_exit(&stp->sd_lock); 1867 1868 1869 done: 1870 if (nextbp == NULL) 1871 return (0); 1872 1873 /* 1874 * Any signals were handled the first time. 1875 * Wakeups and pollwakeups are redone to avoid any race 1876 * conditions - all the messages are not queued until the 1877 * last message has been processed by strrput. 1878 */ 1879 bp = nextbp; 1880 signals = firstmsgsigs = allmsgsigs = 0; 1881 mutex_enter(&stp->sd_lock); 1882 goto one_more; 1883 } 1884 1885 static void 1886 log_dupioc(queue_t *rq, mblk_t *bp) 1887 { 1888 queue_t *wq, *qp; 1889 char *modnames, *mnp, *dname; 1890 size_t maxmodstr; 1891 boolean_t islast; 1892 1893 /* 1894 * Allocate a buffer large enough to hold the names of nstrpush modules 1895 * and one driver, with spaces between and NUL terminator. If we can't 1896 * get memory, then we'll just log the driver name. 1897 */ 1898 maxmodstr = nstrpush * (FMNAMESZ + 1); 1899 mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP); 1900 1901 /* march down write side to print log message down to the driver */ 1902 wq = WR(rq); 1903 1904 /* make sure q_next doesn't shift around while we're grabbing data */ 1905 claimstr(wq); 1906 qp = wq->q_next; 1907 do { 1908 dname = Q2NAME(qp); 1909 islast = !SAMESTR(qp) || qp->q_next == NULL; 1910 if (modnames == NULL) { 1911 /* 1912 * If we don't have memory, then get the driver name in 1913 * the log where we can see it. Note that memory 1914 * pressure is a possible cause of these sorts of bugs. 1915 */ 1916 if (islast) { 1917 modnames = dname; 1918 maxmodstr = 0; 1919 } 1920 } else { 1921 mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname); 1922 if (!islast) 1923 *mnp++ = ' '; 1924 } 1925 qp = qp->q_next; 1926 } while (!islast); 1927 releasestr(wq); 1928 /* Cannot happen unless stream head is corrupt. */ 1929 ASSERT(modnames != NULL); 1930 (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1, 1931 SL_CONSOLE|SL_TRACE|SL_ERROR, 1932 "Warning: stream %p received duplicate %X M_IOC%s; module list: %s", 1933 rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd, 1934 (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames); 1935 if (maxmodstr != 0) 1936 kmem_free(modnames, maxmodstr); 1937 } 1938 1939 int 1940 strrput_nondata(queue_t *q, mblk_t *bp) 1941 { 1942 struct stdata *stp; 1943 struct iocblk *iocbp; 1944 struct stroptions *sop; 1945 struct copyreq *reqp; 1946 struct copyresp *resp; 1947 unsigned char bpri; 1948 unsigned char flushed_already = 0; 1949 1950 stp = (struct stdata *)q->q_ptr; 1951 1952 ASSERT(!(stp->sd_flag & STPLEX)); 1953 ASSERT(qclaimed(q)); 1954 1955 switch (bp->b_datap->db_type) { 1956 case M_ERROR: 1957 /* 1958 * An error has occurred downstream, the errno is in the first 1959 * bytes of the message. 1960 */ 1961 if ((bp->b_wptr - bp->b_rptr) == 2) { /* New flavor */ 1962 unsigned char rw = 0; 1963 1964 mutex_enter(&stp->sd_lock); 1965 if (*bp->b_rptr != NOERROR) { /* read error */ 1966 if (*bp->b_rptr != 0) { 1967 if (stp->sd_flag & STRDERR) 1968 flushed_already |= FLUSHR; 1969 stp->sd_flag |= STRDERR; 1970 rw |= FLUSHR; 1971 } else { 1972 stp->sd_flag &= ~STRDERR; 1973 } 1974 stp->sd_rerror = *bp->b_rptr; 1975 } 1976 bp->b_rptr++; 1977 if (*bp->b_rptr != NOERROR) { /* write error */ 1978 if (*bp->b_rptr != 0) { 1979 if (stp->sd_flag & STWRERR) 1980 flushed_already |= FLUSHW; 1981 stp->sd_flag |= STWRERR; 1982 rw |= FLUSHW; 1983 } else { 1984 stp->sd_flag &= ~STWRERR; 1985 } 1986 stp->sd_werror = *bp->b_rptr; 1987 } 1988 if (rw) { 1989 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE, 1990 "strrput cv_broadcast:q %p, bp %p", 1991 q, bp); 1992 cv_broadcast(&q->q_wait); /* readers */ 1993 cv_broadcast(&_WR(q)->q_wait); /* writers */ 1994 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 1995 1996 mutex_exit(&stp->sd_lock); 1997 pollwakeup(&stp->sd_pollist, POLLERR); 1998 mutex_enter(&stp->sd_lock); 1999 2000 if (stp->sd_sigflags & S_ERROR) 2001 strsendsig(stp->sd_siglist, S_ERROR, 0, 2002 ((rw & FLUSHR) ? stp->sd_rerror : 2003 stp->sd_werror)); 2004 mutex_exit(&stp->sd_lock); 2005 /* 2006 * Send the M_FLUSH only 2007 * for the first M_ERROR 2008 * message on the stream 2009 */ 2010 if (flushed_already == rw) { 2011 freemsg(bp); 2012 return (0); 2013 } 2014 2015 bp->b_datap->db_type = M_FLUSH; 2016 *bp->b_rptr = rw; 2017 bp->b_wptr = bp->b_rptr + 1; 2018 /* 2019 * Protect against the driver 2020 * passing up messages after 2021 * it has done a qprocsoff 2022 */ 2023 if (_OTHERQ(q)->q_next == NULL) 2024 freemsg(bp); 2025 else 2026 qreply(q, bp); 2027 return (0); 2028 } else 2029 mutex_exit(&stp->sd_lock); 2030 } else if (*bp->b_rptr != 0) { /* Old flavor */ 2031 if (stp->sd_flag & (STRDERR|STWRERR)) 2032 flushed_already = FLUSHRW; 2033 mutex_enter(&stp->sd_lock); 2034 stp->sd_flag |= (STRDERR|STWRERR); 2035 stp->sd_rerror = *bp->b_rptr; 2036 stp->sd_werror = *bp->b_rptr; 2037 TRACE_2(TR_FAC_STREAMS_FR, 2038 TR_STRRPUT_WAKE2, 2039 "strrput wakeup #2:q %p, bp %p", q, bp); 2040 cv_broadcast(&q->q_wait); /* the readers */ 2041 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2042 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 2043 2044 mutex_exit(&stp->sd_lock); 2045 pollwakeup(&stp->sd_pollist, POLLERR); 2046 mutex_enter(&stp->sd_lock); 2047 2048 if (stp->sd_sigflags & S_ERROR) 2049 strsendsig(stp->sd_siglist, S_ERROR, 0, 2050 (stp->sd_werror ? stp->sd_werror : 2051 stp->sd_rerror)); 2052 mutex_exit(&stp->sd_lock); 2053 2054 /* 2055 * Send the M_FLUSH only 2056 * for the first M_ERROR 2057 * message on the stream 2058 */ 2059 if (flushed_already != FLUSHRW) { 2060 bp->b_datap->db_type = M_FLUSH; 2061 *bp->b_rptr = FLUSHRW; 2062 /* 2063 * Protect against the driver passing up 2064 * messages after it has done a 2065 * qprocsoff. 2066 */ 2067 if (_OTHERQ(q)->q_next == NULL) 2068 freemsg(bp); 2069 else 2070 qreply(q, bp); 2071 return (0); 2072 } 2073 } 2074 freemsg(bp); 2075 return (0); 2076 2077 case M_HANGUP: 2078 2079 freemsg(bp); 2080 mutex_enter(&stp->sd_lock); 2081 stp->sd_werror = ENXIO; 2082 stp->sd_flag |= STRHUP; 2083 stp->sd_flag &= ~(WSLEEP|RSLEEP); 2084 2085 /* 2086 * send signal if controlling tty 2087 */ 2088 2089 if (stp->sd_sidp) { 2090 prsignal(stp->sd_sidp, SIGHUP); 2091 if (stp->sd_sidp != stp->sd_pgidp) 2092 pgsignal(stp->sd_pgidp, SIGTSTP); 2093 } 2094 2095 /* 2096 * wake up read, write, and exception pollers and 2097 * reset wakeup mechanism. 2098 */ 2099 cv_broadcast(&q->q_wait); /* the readers */ 2100 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2101 cv_broadcast(&stp->sd_monitor); /* the ioctllers */ 2102 strhup(stp); 2103 mutex_exit(&stp->sd_lock); 2104 return (0); 2105 2106 case M_UNHANGUP: 2107 freemsg(bp); 2108 mutex_enter(&stp->sd_lock); 2109 stp->sd_werror = 0; 2110 stp->sd_flag &= ~STRHUP; 2111 mutex_exit(&stp->sd_lock); 2112 return (0); 2113 2114 case M_SIG: 2115 /* 2116 * Someone downstream wants to post a signal. The 2117 * signal to post is contained in the first byte of the 2118 * message. If the message would go on the front of 2119 * the queue, send a signal to the process group 2120 * (if not SIGPOLL) or to the siglist processes 2121 * (SIGPOLL). If something is already on the queue, 2122 * OR if we are delivering a delayed suspend (*sigh* 2123 * another "tty" hack) and there's no one sleeping already, 2124 * just enqueue the message. 2125 */ 2126 mutex_enter(&stp->sd_lock); 2127 if (q->q_first || (*bp->b_rptr == SIGTSTP && 2128 !(stp->sd_flag & RSLEEP))) { 2129 (void) putq(q, bp); 2130 mutex_exit(&stp->sd_lock); 2131 return (0); 2132 } 2133 mutex_exit(&stp->sd_lock); 2134 /* FALLTHRU */ 2135 2136 case M_PCSIG: 2137 /* 2138 * Don't enqueue, just post the signal. 2139 */ 2140 strsignal(stp, *bp->b_rptr, 0L); 2141 freemsg(bp); 2142 return (0); 2143 2144 case M_CMD: 2145 if (MBLKL(bp) != sizeof (cmdblk_t)) { 2146 freemsg(bp); 2147 return (0); 2148 } 2149 2150 mutex_enter(&stp->sd_lock); 2151 if (stp->sd_flag & STRCMDWAIT) { 2152 ASSERT(stp->sd_cmdblk == NULL); 2153 stp->sd_cmdblk = bp; 2154 cv_broadcast(&stp->sd_monitor); 2155 mutex_exit(&stp->sd_lock); 2156 } else { 2157 mutex_exit(&stp->sd_lock); 2158 freemsg(bp); 2159 } 2160 return (0); 2161 2162 case M_FLUSH: 2163 /* 2164 * Flush queues. The indication of which queues to flush 2165 * is in the first byte of the message. If the read queue 2166 * is specified, then flush it. If FLUSHBAND is set, just 2167 * flush the band specified by the second byte of the message. 2168 * 2169 * If a module has issued a M_SETOPT to not flush hi 2170 * priority messages off of the stream head, then pass this 2171 * flag into the flushq code to preserve such messages. 2172 */ 2173 2174 if (*bp->b_rptr & FLUSHR) { 2175 mutex_enter(&stp->sd_lock); 2176 if (*bp->b_rptr & FLUSHBAND) { 2177 ASSERT((bp->b_wptr - bp->b_rptr) >= 2); 2178 flushband(q, *(bp->b_rptr + 1), FLUSHALL); 2179 } else 2180 flushq_common(q, FLUSHALL, 2181 stp->sd_read_opt & RFLUSHPCPROT); 2182 if ((q->q_first == NULL) || 2183 (q->q_first->b_datap->db_type < QPCTL)) 2184 stp->sd_flag &= ~STRPRI; 2185 else { 2186 ASSERT(stp->sd_flag & STRPRI); 2187 } 2188 mutex_exit(&stp->sd_lock); 2189 } 2190 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 2191 *bp->b_rptr &= ~FLUSHR; 2192 bp->b_flag |= MSGNOLOOP; 2193 /* 2194 * Protect against the driver passing up 2195 * messages after it has done a qprocsoff. 2196 */ 2197 if (_OTHERQ(q)->q_next == NULL) 2198 freemsg(bp); 2199 else 2200 qreply(q, bp); 2201 return (0); 2202 } 2203 freemsg(bp); 2204 return (0); 2205 2206 case M_IOCACK: 2207 case M_IOCNAK: 2208 iocbp = (struct iocblk *)bp->b_rptr; 2209 /* 2210 * If not waiting for ACK or NAK then just free msg. 2211 * If incorrect id sequence number then just free msg. 2212 * If already have ACK or NAK for user then this is a 2213 * duplicate, display a warning and free the msg. 2214 */ 2215 mutex_enter(&stp->sd_lock); 2216 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2217 (stp->sd_iocid != iocbp->ioc_id)) { 2218 /* 2219 * If the ACK/NAK is a dup, display a message 2220 * Dup is when sd_iocid == ioc_id, and 2221 * sd_iocblk == <valid ptr> or -1 (the former 2222 * is when an ioctl has been put on the stream 2223 * head, but has not yet been consumed, the 2224 * later is when it has been consumed). 2225 */ 2226 if ((stp->sd_iocid == iocbp->ioc_id) && 2227 (stp->sd_iocblk != NULL)) { 2228 log_dupioc(q, bp); 2229 } 2230 freemsg(bp); 2231 mutex_exit(&stp->sd_lock); 2232 return (0); 2233 } 2234 2235 /* 2236 * Assign ACK or NAK to user and wake up. 2237 */ 2238 stp->sd_iocblk = bp; 2239 cv_broadcast(&stp->sd_monitor); 2240 mutex_exit(&stp->sd_lock); 2241 return (0); 2242 2243 case M_COPYIN: 2244 case M_COPYOUT: 2245 reqp = (struct copyreq *)bp->b_rptr; 2246 2247 /* 2248 * If not waiting for ACK or NAK then just fail request. 2249 * If already have ACK, NAK, or copy request, then just 2250 * fail request. 2251 * If incorrect id sequence number then just fail request. 2252 */ 2253 mutex_enter(&stp->sd_lock); 2254 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2255 (stp->sd_iocid != reqp->cq_id)) { 2256 if (bp->b_cont) { 2257 freemsg(bp->b_cont); 2258 bp->b_cont = NULL; 2259 } 2260 bp->b_datap->db_type = M_IOCDATA; 2261 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 2262 resp = (struct copyresp *)bp->b_rptr; 2263 resp->cp_rval = (caddr_t)1; /* failure */ 2264 mutex_exit(&stp->sd_lock); 2265 putnext(stp->sd_wrq, bp); 2266 return (0); 2267 } 2268 2269 /* 2270 * Assign copy request to user and wake up. 2271 */ 2272 stp->sd_iocblk = bp; 2273 cv_broadcast(&stp->sd_monitor); 2274 mutex_exit(&stp->sd_lock); 2275 return (0); 2276 2277 case M_SETOPTS: 2278 /* 2279 * Set stream head options (read option, write offset, 2280 * min/max packet size, and/or high/low water marks for 2281 * the read side only). 2282 */ 2283 2284 bpri = 0; 2285 sop = (struct stroptions *)bp->b_rptr; 2286 mutex_enter(&stp->sd_lock); 2287 if (sop->so_flags & SO_READOPT) { 2288 switch (sop->so_readopt & RMODEMASK) { 2289 case RNORM: 2290 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 2291 break; 2292 2293 case RMSGD: 2294 stp->sd_read_opt = 2295 ((stp->sd_read_opt & ~RD_MSGNODIS) | 2296 RD_MSGDIS); 2297 break; 2298 2299 case RMSGN: 2300 stp->sd_read_opt = 2301 ((stp->sd_read_opt & ~RD_MSGDIS) | 2302 RD_MSGNODIS); 2303 break; 2304 } 2305 switch (sop->so_readopt & RPROTMASK) { 2306 case RPROTNORM: 2307 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 2308 break; 2309 2310 case RPROTDAT: 2311 stp->sd_read_opt = 2312 ((stp->sd_read_opt & ~RD_PROTDIS) | 2313 RD_PROTDAT); 2314 break; 2315 2316 case RPROTDIS: 2317 stp->sd_read_opt = 2318 ((stp->sd_read_opt & ~RD_PROTDAT) | 2319 RD_PROTDIS); 2320 break; 2321 } 2322 switch (sop->so_readopt & RFLUSHMASK) { 2323 case RFLUSHPCPROT: 2324 /* 2325 * This sets the stream head to NOT flush 2326 * M_PCPROTO messages. 2327 */ 2328 stp->sd_read_opt |= RFLUSHPCPROT; 2329 break; 2330 } 2331 } 2332 if (sop->so_flags & SO_ERROPT) { 2333 switch (sop->so_erropt & RERRMASK) { 2334 case RERRNORM: 2335 stp->sd_flag &= ~STRDERRNONPERSIST; 2336 break; 2337 case RERRNONPERSIST: 2338 stp->sd_flag |= STRDERRNONPERSIST; 2339 break; 2340 } 2341 switch (sop->so_erropt & WERRMASK) { 2342 case WERRNORM: 2343 stp->sd_flag &= ~STWRERRNONPERSIST; 2344 break; 2345 case WERRNONPERSIST: 2346 stp->sd_flag |= STWRERRNONPERSIST; 2347 break; 2348 } 2349 } 2350 if (sop->so_flags & SO_COPYOPT) { 2351 if (sop->so_copyopt & ZCVMSAFE) { 2352 stp->sd_copyflag |= STZCVMSAFE; 2353 stp->sd_copyflag &= ~STZCVMUNSAFE; 2354 } else if (sop->so_copyopt & ZCVMUNSAFE) { 2355 stp->sd_copyflag |= STZCVMUNSAFE; 2356 stp->sd_copyflag &= ~STZCVMSAFE; 2357 } 2358 2359 if (sop->so_copyopt & COPYCACHED) { 2360 stp->sd_copyflag |= STRCOPYCACHED; 2361 } 2362 } 2363 if (sop->so_flags & SO_WROFF) 2364 stp->sd_wroff = sop->so_wroff; 2365 if (sop->so_flags & SO_TAIL) 2366 stp->sd_tail = sop->so_tail; 2367 if (sop->so_flags & SO_MINPSZ) 2368 q->q_minpsz = sop->so_minpsz; 2369 if (sop->so_flags & SO_MAXPSZ) 2370 q->q_maxpsz = sop->so_maxpsz; 2371 if (sop->so_flags & SO_MAXBLK) 2372 stp->sd_maxblk = sop->so_maxblk; 2373 if (sop->so_flags & SO_HIWAT) { 2374 if (sop->so_flags & SO_BAND) { 2375 if (strqset(q, QHIWAT, 2376 sop->so_band, sop->so_hiwat)) { 2377 cmn_err(CE_WARN, "strrput: could not " 2378 "allocate qband\n"); 2379 } else { 2380 bpri = sop->so_band; 2381 } 2382 } else { 2383 q->q_hiwat = sop->so_hiwat; 2384 } 2385 } 2386 if (sop->so_flags & SO_LOWAT) { 2387 if (sop->so_flags & SO_BAND) { 2388 if (strqset(q, QLOWAT, 2389 sop->so_band, sop->so_lowat)) { 2390 cmn_err(CE_WARN, "strrput: could not " 2391 "allocate qband\n"); 2392 } else { 2393 bpri = sop->so_band; 2394 } 2395 } else { 2396 q->q_lowat = sop->so_lowat; 2397 } 2398 } 2399 if (sop->so_flags & SO_MREADON) 2400 stp->sd_flag |= SNDMREAD; 2401 if (sop->so_flags & SO_MREADOFF) 2402 stp->sd_flag &= ~SNDMREAD; 2403 if (sop->so_flags & SO_NDELON) 2404 stp->sd_flag |= OLDNDELAY; 2405 if (sop->so_flags & SO_NDELOFF) 2406 stp->sd_flag &= ~OLDNDELAY; 2407 if (sop->so_flags & SO_ISTTY) 2408 stp->sd_flag |= STRISTTY; 2409 if (sop->so_flags & SO_ISNTTY) 2410 stp->sd_flag &= ~STRISTTY; 2411 if (sop->so_flags & SO_TOSTOP) 2412 stp->sd_flag |= STRTOSTOP; 2413 if (sop->so_flags & SO_TONSTOP) 2414 stp->sd_flag &= ~STRTOSTOP; 2415 if (sop->so_flags & SO_DELIM) 2416 stp->sd_flag |= STRDELIM; 2417 if (sop->so_flags & SO_NODELIM) 2418 stp->sd_flag &= ~STRDELIM; 2419 2420 mutex_exit(&stp->sd_lock); 2421 freemsg(bp); 2422 2423 /* Check backenable in case the water marks changed */ 2424 qbackenable(q, bpri); 2425 return (0); 2426 2427 /* 2428 * The following set of cases deal with situations where two stream 2429 * heads are connected to each other (twisted streams). These messages 2430 * have no meaning at the stream head. 2431 */ 2432 case M_BREAK: 2433 case M_CTL: 2434 case M_DELAY: 2435 case M_START: 2436 case M_STOP: 2437 case M_IOCDATA: 2438 case M_STARTI: 2439 case M_STOPI: 2440 freemsg(bp); 2441 return (0); 2442 2443 case M_IOCTL: 2444 /* 2445 * Always NAK this condition 2446 * (makes no sense) 2447 * If there is one or more threads in the read side 2448 * rwnext we have to defer the nacking until that thread 2449 * returns (in strget). 2450 */ 2451 mutex_enter(&stp->sd_lock); 2452 if (stp->sd_struiodnak != 0) { 2453 /* 2454 * Defer NAK to the streamhead. Queue at the end 2455 * the list. 2456 */ 2457 mblk_t *mp = stp->sd_struionak; 2458 2459 while (mp && mp->b_next) 2460 mp = mp->b_next; 2461 if (mp) 2462 mp->b_next = bp; 2463 else 2464 stp->sd_struionak = bp; 2465 bp->b_next = NULL; 2466 mutex_exit(&stp->sd_lock); 2467 return (0); 2468 } 2469 mutex_exit(&stp->sd_lock); 2470 2471 bp->b_datap->db_type = M_IOCNAK; 2472 /* 2473 * Protect against the driver passing up 2474 * messages after it has done a qprocsoff. 2475 */ 2476 if (_OTHERQ(q)->q_next == NULL) 2477 freemsg(bp); 2478 else 2479 qreply(q, bp); 2480 return (0); 2481 2482 default: 2483 #ifdef DEBUG 2484 cmn_err(CE_WARN, 2485 "bad message type %x received at stream head\n", 2486 bp->b_datap->db_type); 2487 #endif 2488 freemsg(bp); 2489 return (0); 2490 } 2491 2492 /* NOTREACHED */ 2493 } 2494 2495 /* 2496 * Check if the stream pointed to by `stp' can be written to, and return an 2497 * error code if not. If `eiohup' is set, then return EIO if STRHUP is set. 2498 * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream, 2499 * then always return EPIPE and send a SIGPIPE to the invoking thread. 2500 */ 2501 static int 2502 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok) 2503 { 2504 int error; 2505 2506 ASSERT(MUTEX_HELD(&stp->sd_lock)); 2507 2508 /* 2509 * For modem support, POSIX states that on writes, EIO should 2510 * be returned if the stream has been hung up. 2511 */ 2512 if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP) 2513 error = EIO; 2514 else 2515 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0); 2516 2517 if (error != 0) { 2518 if (!(stp->sd_flag & STPLEX) && 2519 (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) { 2520 tsignal(curthread, SIGPIPE); 2521 error = EPIPE; 2522 } 2523 } 2524 2525 return (error); 2526 } 2527 2528 /* 2529 * Copyin and send data down a stream. 2530 * The caller will allocate and copyin any control part that precedes the 2531 * message and pass that in as mctl. 2532 * 2533 * Caller should *not* hold sd_lock. 2534 * When EWOULDBLOCK is returned the caller has to redo the canputnext 2535 * under sd_lock in order to avoid missing a backenabling wakeup. 2536 * 2537 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA. 2538 * 2539 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages. 2540 * For sync streams we can only ignore flow control by reverting to using 2541 * putnext. 2542 * 2543 * If sd_maxblk is less than *iosize this routine might return without 2544 * transferring all of *iosize. In all cases, on return *iosize will contain 2545 * the amount of data that was transferred. 2546 */ 2547 static int 2548 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize, 2549 int b_flag, int pri, int flags) 2550 { 2551 struiod_t uiod; 2552 mblk_t *mp; 2553 queue_t *wqp = stp->sd_wrq; 2554 int error = 0; 2555 ssize_t count = *iosize; 2556 2557 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 2558 2559 if (uiop != NULL && count >= 0) 2560 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0; 2561 2562 if (!(flags & STRUIO_POSTPONE)) { 2563 /* 2564 * Use regular canputnext, strmakedata, putnext sequence. 2565 */ 2566 if (pri == 0) { 2567 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2568 freemsg(mctl); 2569 return (EWOULDBLOCK); 2570 } 2571 } else { 2572 if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) { 2573 freemsg(mctl); 2574 return (EWOULDBLOCK); 2575 } 2576 } 2577 2578 if ((error = strmakedata(iosize, uiop, stp, flags, 2579 &mp)) != 0) { 2580 freemsg(mctl); 2581 /* 2582 * need to change return code to ENOMEM 2583 * so that this is not confused with 2584 * flow control, EAGAIN. 2585 */ 2586 2587 if (error == EAGAIN) 2588 return (ENOMEM); 2589 else 2590 return (error); 2591 } 2592 if (mctl != NULL) { 2593 if (mctl->b_cont == NULL) 2594 mctl->b_cont = mp; 2595 else if (mp != NULL) 2596 linkb(mctl, mp); 2597 mp = mctl; 2598 } else if (mp == NULL) 2599 return (0); 2600 2601 mp->b_flag |= b_flag; 2602 mp->b_band = (uchar_t)pri; 2603 2604 if (flags & MSG_IGNFLOW) { 2605 /* 2606 * XXX Hack: Don't get stuck running service 2607 * procedures. This is needed for sockfs when 2608 * sending the unbind message out of the rput 2609 * procedure - we don't want a put procedure 2610 * to run service procedures. 2611 */ 2612 putnext(wqp, mp); 2613 } else { 2614 stream_willservice(stp); 2615 putnext(wqp, mp); 2616 stream_runservice(stp); 2617 } 2618 return (0); 2619 } 2620 /* 2621 * Stream supports rwnext() for the write side. 2622 */ 2623 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) { 2624 freemsg(mctl); 2625 /* 2626 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled". 2627 */ 2628 return (error == EAGAIN ? ENOMEM : error); 2629 } 2630 if (mctl != NULL) { 2631 if (mctl->b_cont == NULL) 2632 mctl->b_cont = mp; 2633 else if (mp != NULL) 2634 linkb(mctl, mp); 2635 mp = mctl; 2636 } else if (mp == NULL) { 2637 return (0); 2638 } 2639 2640 mp->b_flag |= b_flag; 2641 mp->b_band = (uchar_t)pri; 2642 2643 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 2644 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 2645 uiod.d_uio.uio_offset = 0; 2646 uiod.d_mp = mp; 2647 error = rwnext(wqp, &uiod); 2648 if (! uiod.d_mp) { 2649 uioskip(uiop, *iosize); 2650 return (error); 2651 } 2652 ASSERT(mp == uiod.d_mp); 2653 if (error == EINVAL) { 2654 /* 2655 * The stream plumbing must have changed while 2656 * we were away, so just turn off rwnext()s. 2657 */ 2658 error = 0; 2659 } else if (error == EBUSY || error == EWOULDBLOCK) { 2660 /* 2661 * Couldn't enter a perimeter or took a page fault, 2662 * so fall-back to putnext(). 2663 */ 2664 error = 0; 2665 } else { 2666 freemsg(mp); 2667 return (error); 2668 } 2669 /* Have to check canput before consuming data from the uio */ 2670 if (pri == 0) { 2671 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2672 freemsg(mp); 2673 return (EWOULDBLOCK); 2674 } 2675 } else { 2676 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) { 2677 freemsg(mp); 2678 return (EWOULDBLOCK); 2679 } 2680 } 2681 ASSERT(mp == uiod.d_mp); 2682 /* Copyin data from the uio */ 2683 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) { 2684 freemsg(mp); 2685 return (error); 2686 } 2687 uioskip(uiop, *iosize); 2688 if (flags & MSG_IGNFLOW) { 2689 /* 2690 * XXX Hack: Don't get stuck running service procedures. 2691 * This is needed for sockfs when sending the unbind message 2692 * out of the rput procedure - we don't want a put procedure 2693 * to run service procedures. 2694 */ 2695 putnext(wqp, mp); 2696 } else { 2697 stream_willservice(stp); 2698 putnext(wqp, mp); 2699 stream_runservice(stp); 2700 } 2701 return (0); 2702 } 2703 2704 /* 2705 * Write attempts to break the write request into messages conforming 2706 * with the minimum and maximum packet sizes set downstream. 2707 * 2708 * Write will not block if downstream queue is full and 2709 * O_NDELAY is set, otherwise it will block waiting for the queue to get room. 2710 * 2711 * A write of zero bytes gets packaged into a zero length message and sent 2712 * downstream like any other message. 2713 * 2714 * If buffers of the requested sizes are not available, the write will 2715 * sleep until the buffers become available. 2716 * 2717 * Write (if specified) will supply a write offset in a message if it 2718 * makes sense. This can be specified by downstream modules as part of 2719 * a M_SETOPTS message. Write will not supply the write offset if it 2720 * cannot supply any data in a buffer. In other words, write will never 2721 * send down an empty packet due to a write offset. 2722 */ 2723 /* ARGSUSED2 */ 2724 int 2725 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp) 2726 { 2727 return (strwrite_common(vp, uiop, crp, 0)); 2728 } 2729 2730 /* ARGSUSED2 */ 2731 int 2732 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) 2733 { 2734 struct stdata *stp; 2735 struct queue *wqp; 2736 ssize_t rmin, rmax; 2737 ssize_t iosize; 2738 int waitflag; 2739 int tempmode; 2740 int error = 0; 2741 int b_flag; 2742 2743 ASSERT(vp->v_stream); 2744 stp = vp->v_stream; 2745 2746 mutex_enter(&stp->sd_lock); 2747 2748 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2749 mutex_exit(&stp->sd_lock); 2750 return (error); 2751 } 2752 2753 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 2754 error = strwriteable(stp, B_TRUE, B_TRUE); 2755 if (error != 0) { 2756 mutex_exit(&stp->sd_lock); 2757 return (error); 2758 } 2759 } 2760 2761 mutex_exit(&stp->sd_lock); 2762 2763 wqp = stp->sd_wrq; 2764 2765 /* get these values from them cached in the stream head */ 2766 rmin = stp->sd_qn_minpsz; 2767 rmax = stp->sd_qn_maxpsz; 2768 2769 /* 2770 * Check the min/max packet size constraints. If min packet size 2771 * is non-zero, the write cannot be split into multiple messages 2772 * and still guarantee the size constraints. 2773 */ 2774 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp); 2775 2776 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 2777 if (rmax == 0) { 2778 return (0); 2779 } 2780 if (rmin > 0) { 2781 if (uiop->uio_resid < rmin) { 2782 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2783 "strwrite out:q %p out %d error %d", 2784 wqp, 0, ERANGE); 2785 return (ERANGE); 2786 } 2787 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) { 2788 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2789 "strwrite out:q %p out %d error %d", 2790 wqp, 1, ERANGE); 2791 return (ERANGE); 2792 } 2793 } 2794 2795 /* 2796 * Do until count satisfied or error. 2797 */ 2798 waitflag = WRITEWAIT | wflag; 2799 if (stp->sd_flag & OLDNDELAY) 2800 tempmode = uiop->uio_fmode & ~FNDELAY; 2801 else 2802 tempmode = uiop->uio_fmode; 2803 2804 if (rmax == INFPSZ) 2805 rmax = uiop->uio_resid; 2806 2807 /* 2808 * Note that tempmode does not get used in strput/strmakedata 2809 * but only in strwaitq. The other routines use uio_fmode 2810 * unmodified. 2811 */ 2812 2813 /* LINTED: constant in conditional context */ 2814 while (1) { /* breaks when uio_resid reaches zero */ 2815 /* 2816 * Determine the size of the next message to be 2817 * packaged. May have to break write into several 2818 * messages based on max packet size. 2819 */ 2820 iosize = MIN(uiop->uio_resid, rmax); 2821 2822 /* 2823 * Put block downstream when flow control allows it. 2824 */ 2825 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize)) 2826 b_flag = MSGDELIM; 2827 else 2828 b_flag = 0; 2829 2830 for (;;) { 2831 int done = 0; 2832 2833 error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0); 2834 if (error == 0) 2835 break; 2836 if (error != EWOULDBLOCK) 2837 goto out; 2838 2839 mutex_enter(&stp->sd_lock); 2840 /* 2841 * Check for a missed wakeup. 2842 * Needed since strput did not hold sd_lock across 2843 * the canputnext. 2844 */ 2845 if (canputnext(wqp)) { 2846 /* Try again */ 2847 mutex_exit(&stp->sd_lock); 2848 continue; 2849 } 2850 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT, 2851 "strwrite wait:q %p wait", wqp); 2852 if ((error = strwaitq(stp, waitflag, (ssize_t)0, 2853 tempmode, -1, &done)) != 0 || done) { 2854 mutex_exit(&stp->sd_lock); 2855 if ((vp->v_type == VFIFO) && 2856 (uiop->uio_fmode & FNDELAY) && 2857 (error == EAGAIN)) 2858 error = 0; 2859 goto out; 2860 } 2861 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE, 2862 "strwrite wake:q %p awakes", wqp); 2863 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2864 mutex_exit(&stp->sd_lock); 2865 goto out; 2866 } 2867 mutex_exit(&stp->sd_lock); 2868 } 2869 waitflag |= NOINTR; 2870 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID, 2871 "strwrite resid:q %p uiop %p", wqp, uiop); 2872 if (uiop->uio_resid) { 2873 /* Recheck for errors - needed for sockets */ 2874 if ((stp->sd_wput_opt & SW_RECHECK_ERR) && 2875 (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 2876 mutex_enter(&stp->sd_lock); 2877 error = strwriteable(stp, B_FALSE, B_TRUE); 2878 mutex_exit(&stp->sd_lock); 2879 if (error != 0) 2880 return (error); 2881 } 2882 continue; 2883 } 2884 break; 2885 } 2886 out: 2887 /* 2888 * For historical reasons, applications expect EAGAIN when a data 2889 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN. 2890 */ 2891 if (error == ENOMEM) 2892 error = EAGAIN; 2893 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2894 "strwrite out:q %p out %d error %d", wqp, 2, error); 2895 return (error); 2896 } 2897 2898 /* 2899 * Stream head write service routine. 2900 * Its job is to wake up any sleeping writers when a queue 2901 * downstream needs data (part of the flow control in putq and getq). 2902 * It also must wake anyone sleeping on a poll(). 2903 * For stream head right below mux module, it must also invoke put procedure 2904 * of next downstream module. 2905 */ 2906 int 2907 strwsrv(queue_t *q) 2908 { 2909 struct stdata *stp; 2910 queue_t *tq; 2911 qband_t *qbp; 2912 int i; 2913 qband_t *myqbp; 2914 int isevent; 2915 unsigned char qbf[NBAND]; /* band flushing backenable flags */ 2916 2917 TRACE_1(TR_FAC_STREAMS_FR, 2918 TR_STRWSRV, "strwsrv:q %p", q); 2919 stp = (struct stdata *)q->q_ptr; 2920 ASSERT(qclaimed(q)); 2921 mutex_enter(&stp->sd_lock); 2922 ASSERT(!(stp->sd_flag & STPLEX)); 2923 2924 if (stp->sd_flag & WSLEEP) { 2925 stp->sd_flag &= ~WSLEEP; 2926 cv_broadcast(&q->q_wait); 2927 } 2928 mutex_exit(&stp->sd_lock); 2929 2930 /* The other end of a stream pipe went away. */ 2931 if ((tq = q->q_next) == NULL) { 2932 return (0); 2933 } 2934 2935 /* Find the next module forward that has a service procedure */ 2936 claimstr(q); 2937 tq = q->q_nfsrv; 2938 ASSERT(tq != NULL); 2939 2940 if ((q->q_flag & QBACK)) { 2941 if ((tq->q_flag & QFULL)) { 2942 mutex_enter(QLOCK(tq)); 2943 if (!(tq->q_flag & QFULL)) { 2944 mutex_exit(QLOCK(tq)); 2945 goto wakeup; 2946 } 2947 /* 2948 * The queue must have become full again. Set QWANTW 2949 * again so strwsrv will be back enabled when 2950 * the queue becomes non-full next time. 2951 */ 2952 tq->q_flag |= QWANTW; 2953 mutex_exit(QLOCK(tq)); 2954 } else { 2955 wakeup: 2956 pollwakeup(&stp->sd_pollist, POLLWRNORM); 2957 mutex_enter(&stp->sd_lock); 2958 if (stp->sd_sigflags & S_WRNORM) 2959 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 2960 mutex_exit(&stp->sd_lock); 2961 } 2962 } 2963 2964 isevent = 0; 2965 i = 1; 2966 bzero((caddr_t)qbf, NBAND); 2967 mutex_enter(QLOCK(tq)); 2968 if ((myqbp = q->q_bandp) != NULL) 2969 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) { 2970 ASSERT(myqbp); 2971 if ((myqbp->qb_flag & QB_BACK)) { 2972 if (qbp->qb_flag & QB_FULL) { 2973 /* 2974 * The band must have become full again. 2975 * Set QB_WANTW again so strwsrv will 2976 * be back enabled when the band becomes 2977 * non-full next time. 2978 */ 2979 qbp->qb_flag |= QB_WANTW; 2980 } else { 2981 isevent = 1; 2982 qbf[i] = 1; 2983 } 2984 } 2985 myqbp = myqbp->qb_next; 2986 i++; 2987 } 2988 mutex_exit(QLOCK(tq)); 2989 2990 if (isevent) { 2991 for (i = tq->q_nband; i; i--) { 2992 if (qbf[i]) { 2993 pollwakeup(&stp->sd_pollist, POLLWRBAND); 2994 mutex_enter(&stp->sd_lock); 2995 if (stp->sd_sigflags & S_WRBAND) 2996 strsendsig(stp->sd_siglist, S_WRBAND, 2997 (uchar_t)i, 0); 2998 mutex_exit(&stp->sd_lock); 2999 } 3000 } 3001 } 3002 3003 releasestr(q); 3004 return (0); 3005 } 3006 3007 /* 3008 * Special case of strcopyin/strcopyout for copying 3009 * struct strioctl that can deal with both data 3010 * models. 3011 */ 3012 3013 #ifdef _LP64 3014 3015 static int 3016 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3017 { 3018 struct strioctl32 strioc32; 3019 struct strioctl *striocp; 3020 3021 if (copyflag & U_TO_K) { 3022 ASSERT((copyflag & K_TO_K) == 0); 3023 3024 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3025 if (copyin(from, &strioc32, sizeof (strioc32))) 3026 return (EFAULT); 3027 3028 striocp = (struct strioctl *)to; 3029 striocp->ic_cmd = strioc32.ic_cmd; 3030 striocp->ic_timout = strioc32.ic_timout; 3031 striocp->ic_len = strioc32.ic_len; 3032 striocp->ic_dp = (char *)(uintptr_t)strioc32.ic_dp; 3033 3034 } else { /* NATIVE data model */ 3035 if (copyin(from, to, sizeof (struct strioctl))) { 3036 return (EFAULT); 3037 } else { 3038 return (0); 3039 } 3040 } 3041 } else { 3042 ASSERT(copyflag & K_TO_K); 3043 bcopy(from, to, sizeof (struct strioctl)); 3044 } 3045 return (0); 3046 } 3047 3048 static int 3049 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3050 { 3051 struct strioctl32 strioc32; 3052 struct strioctl *striocp; 3053 3054 if (copyflag & U_TO_K) { 3055 ASSERT((copyflag & K_TO_K) == 0); 3056 3057 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3058 striocp = (struct strioctl *)from; 3059 strioc32.ic_cmd = striocp->ic_cmd; 3060 strioc32.ic_timout = striocp->ic_timout; 3061 strioc32.ic_len = striocp->ic_len; 3062 strioc32.ic_dp = (caddr32_t)(uintptr_t)striocp->ic_dp; 3063 ASSERT((char *)(uintptr_t)strioc32.ic_dp == 3064 striocp->ic_dp); 3065 3066 if (copyout(&strioc32, to, sizeof (strioc32))) 3067 return (EFAULT); 3068 3069 } else { /* NATIVE data model */ 3070 if (copyout(from, to, sizeof (struct strioctl))) { 3071 return (EFAULT); 3072 } else { 3073 return (0); 3074 } 3075 } 3076 } else { 3077 ASSERT(copyflag & K_TO_K); 3078 bcopy(from, to, sizeof (struct strioctl)); 3079 } 3080 return (0); 3081 } 3082 3083 #else /* ! _LP64 */ 3084 3085 /* ARGSUSED2 */ 3086 static int 3087 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3088 { 3089 return (strcopyin(from, to, sizeof (struct strioctl), copyflag)); 3090 } 3091 3092 /* ARGSUSED2 */ 3093 static int 3094 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3095 { 3096 return (strcopyout(from, to, sizeof (struct strioctl), copyflag)); 3097 } 3098 3099 #endif /* _LP64 */ 3100 3101 /* 3102 * Determine type of job control semantics expected by user. The 3103 * possibilities are: 3104 * JCREAD - Behaves like read() on fd; send SIGTTIN 3105 * JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set 3106 * JCSETP - Sets a value in the stream; send SIGTTOU, ignore TOSTOP 3107 * JCGETP - Gets a value in the stream; no signals. 3108 * See straccess in strsubr.c for usage of these values. 3109 * 3110 * This routine also returns -1 for I_STR as a special case; the 3111 * caller must call again with the real ioctl number for 3112 * classification. 3113 */ 3114 static int 3115 job_control_type(int cmd) 3116 { 3117 switch (cmd) { 3118 case I_STR: 3119 return (-1); 3120 3121 case I_RECVFD: 3122 case I_E_RECVFD: 3123 return (JCREAD); 3124 3125 case I_FDINSERT: 3126 case I_SENDFD: 3127 return (JCWRITE); 3128 3129 case TCSETA: 3130 case TCSETAW: 3131 case TCSETAF: 3132 case TCSBRK: 3133 case TCXONC: 3134 case TCFLSH: 3135 case TCDSET: /* Obsolete */ 3136 case TIOCSWINSZ: 3137 case TCSETS: 3138 case TCSETSW: 3139 case TCSETSF: 3140 case TIOCSETD: 3141 case TIOCHPCL: 3142 case TIOCSETP: 3143 case TIOCSETN: 3144 case TIOCEXCL: 3145 case TIOCNXCL: 3146 case TIOCFLUSH: 3147 case TIOCSETC: 3148 case TIOCLBIS: 3149 case TIOCLBIC: 3150 case TIOCLSET: 3151 case TIOCSBRK: 3152 case TIOCCBRK: 3153 case TIOCSDTR: 3154 case TIOCCDTR: 3155 case TIOCSLTC: 3156 case TIOCSTOP: 3157 case TIOCSTART: 3158 case TIOCSTI: 3159 case TIOCSPGRP: 3160 case TIOCMSET: 3161 case TIOCMBIS: 3162 case TIOCMBIC: 3163 case TIOCREMOTE: 3164 case TIOCSIGNAL: 3165 case LDSETT: 3166 case LDSMAP: /* Obsolete */ 3167 case DIOCSETP: 3168 case I_FLUSH: 3169 case I_SRDOPT: 3170 case I_SETSIG: 3171 case I_SWROPT: 3172 case I_FLUSHBAND: 3173 case I_SETCLTIME: 3174 case I_SERROPT: 3175 case I_ESETSIG: 3176 case FIONBIO: 3177 case FIOASYNC: 3178 case FIOSETOWN: 3179 case JBOOT: /* Obsolete */ 3180 case JTERM: /* Obsolete */ 3181 case JTIMOM: /* Obsolete */ 3182 case JZOMBOOT: /* Obsolete */ 3183 case JAGENT: /* Obsolete */ 3184 case JTRUN: /* Obsolete */ 3185 case JXTPROTO: /* Obsolete */ 3186 case TIOCSETLD: 3187 return (JCSETP); 3188 } 3189 3190 return (JCGETP); 3191 } 3192 3193 /* 3194 * ioctl for streams 3195 */ 3196 int 3197 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, 3198 cred_t *crp, int *rvalp) 3199 { 3200 struct stdata *stp; 3201 struct strcmd *scp; 3202 struct strioctl strioc; 3203 struct uio uio; 3204 struct iovec iov; 3205 int access; 3206 mblk_t *mp; 3207 int error = 0; 3208 int done = 0; 3209 ssize_t rmin, rmax; 3210 queue_t *wrq; 3211 queue_t *rdq; 3212 boolean_t kioctl = B_FALSE; 3213 3214 if (flag & FKIOCTL) { 3215 copyflag = K_TO_K; 3216 kioctl = B_TRUE; 3217 } 3218 ASSERT(vp->v_stream); 3219 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 3220 stp = vp->v_stream; 3221 3222 TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER, 3223 "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg); 3224 3225 if (audit_active) 3226 audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp); 3227 3228 /* 3229 * If the copy is kernel to kernel, make sure that the FNATIVE 3230 * flag is set. After this it would be a serious error to have 3231 * no model flag. 3232 */ 3233 if (copyflag == K_TO_K) 3234 flag = (flag & ~FMODELS) | FNATIVE; 3235 3236 ASSERT((flag & FMODELS) != 0); 3237 3238 wrq = stp->sd_wrq; 3239 rdq = _RD(wrq); 3240 3241 access = job_control_type(cmd); 3242 3243 /* We should never see these here, should be handled by iwscn */ 3244 if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR) 3245 return (EINVAL); 3246 3247 mutex_enter(&stp->sd_lock); 3248 if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) { 3249 mutex_exit(&stp->sd_lock); 3250 return (error); 3251 } 3252 mutex_exit(&stp->sd_lock); 3253 3254 /* 3255 * Check for sgttyb-related ioctls first, and complain as 3256 * necessary. 3257 */ 3258 switch (cmd) { 3259 case TIOCGETP: 3260 case TIOCSETP: 3261 case TIOCSETN: 3262 if (sgttyb_handling >= 2 && !sgttyb_complaint) { 3263 sgttyb_complaint = B_TRUE; 3264 cmn_err(CE_NOTE, 3265 "application used obsolete TIOC[GS]ET"); 3266 } 3267 if (sgttyb_handling >= 3) { 3268 tsignal(curthread, SIGSYS); 3269 return (EIO); 3270 } 3271 break; 3272 } 3273 3274 mutex_enter(&stp->sd_lock); 3275 3276 switch (cmd) { 3277 case I_RECVFD: 3278 case I_E_RECVFD: 3279 case I_PEEK: 3280 case I_NREAD: 3281 case FIONREAD: 3282 case FIORDCHK: 3283 case I_ATMARK: 3284 case FIONBIO: 3285 case FIOASYNC: 3286 if (stp->sd_flag & (STRDERR|STPLEX)) { 3287 error = strgeterr(stp, STRDERR|STPLEX, 0); 3288 if (error != 0) { 3289 mutex_exit(&stp->sd_lock); 3290 return (error); 3291 } 3292 } 3293 break; 3294 3295 default: 3296 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) { 3297 error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0); 3298 if (error != 0) { 3299 mutex_exit(&stp->sd_lock); 3300 return (error); 3301 } 3302 } 3303 } 3304 3305 mutex_exit(&stp->sd_lock); 3306 3307 switch (cmd) { 3308 default: 3309 /* 3310 * The stream head has hardcoded knowledge of a 3311 * miscellaneous collection of terminal-, keyboard- and 3312 * mouse-related ioctls, enumerated below. This hardcoded 3313 * knowledge allows the stream head to automatically 3314 * convert transparent ioctl requests made by userland 3315 * programs into I_STR ioctls which many old STREAMS 3316 * modules and drivers require. 3317 * 3318 * No new ioctls should ever be added to this list. 3319 * Instead, the STREAMS module or driver should be written 3320 * to either handle transparent ioctls or require any 3321 * userland programs to use I_STR ioctls (by returning 3322 * EINVAL to any transparent ioctl requests). 3323 * 3324 * More importantly, removing ioctls from this list should 3325 * be done with the utmost care, since our STREAMS modules 3326 * and drivers *count* on the stream head performing this 3327 * conversion, and thus may panic while processing 3328 * transparent ioctl request for one of these ioctls (keep 3329 * in mind that third party modules and drivers may have 3330 * similar problems). 3331 */ 3332 if (((cmd & IOCTYPE) == LDIOC) || 3333 ((cmd & IOCTYPE) == tIOC) || 3334 ((cmd & IOCTYPE) == TIOC) || 3335 ((cmd & IOCTYPE) == KIOC) || 3336 ((cmd & IOCTYPE) == MSIOC) || 3337 ((cmd & IOCTYPE) == VUIOC)) { 3338 /* 3339 * The ioctl is a tty ioctl - set up strioc buffer 3340 * and call strdoioctl() to do the work. 3341 */ 3342 if (stp->sd_flag & STRHUP) 3343 return (ENXIO); 3344 strioc.ic_cmd = cmd; 3345 strioc.ic_timout = INFTIM; 3346 3347 switch (cmd) { 3348 3349 case TCXONC: 3350 case TCSBRK: 3351 case TCFLSH: 3352 case TCDSET: 3353 { 3354 int native_arg = (int)arg; 3355 strioc.ic_len = sizeof (int); 3356 strioc.ic_dp = (char *)&native_arg; 3357 return (strdoioctl(stp, &strioc, flag, 3358 K_TO_K, crp, rvalp)); 3359 } 3360 3361 case TCSETA: 3362 case TCSETAW: 3363 case TCSETAF: 3364 strioc.ic_len = sizeof (struct termio); 3365 strioc.ic_dp = (char *)arg; 3366 return (strdoioctl(stp, &strioc, flag, 3367 copyflag, crp, rvalp)); 3368 3369 case TCSETS: 3370 case TCSETSW: 3371 case TCSETSF: 3372 strioc.ic_len = sizeof (struct termios); 3373 strioc.ic_dp = (char *)arg; 3374 return (strdoioctl(stp, &strioc, flag, 3375 copyflag, crp, rvalp)); 3376 3377 case LDSETT: 3378 strioc.ic_len = sizeof (struct termcb); 3379 strioc.ic_dp = (char *)arg; 3380 return (strdoioctl(stp, &strioc, flag, 3381 copyflag, crp, rvalp)); 3382 3383 case TIOCSETP: 3384 strioc.ic_len = sizeof (struct sgttyb); 3385 strioc.ic_dp = (char *)arg; 3386 return (strdoioctl(stp, &strioc, flag, 3387 copyflag, crp, rvalp)); 3388 3389 case TIOCSTI: 3390 if ((flag & FREAD) == 0 && 3391 secpolicy_sti(crp) != 0) { 3392 return (EPERM); 3393 } 3394 mutex_enter(&stp->sd_lock); 3395 mutex_enter(&curproc->p_splock); 3396 if (stp->sd_sidp != curproc->p_sessp->s_sidp && 3397 secpolicy_sti(crp) != 0) { 3398 mutex_exit(&curproc->p_splock); 3399 mutex_exit(&stp->sd_lock); 3400 return (EACCES); 3401 } 3402 mutex_exit(&curproc->p_splock); 3403 mutex_exit(&stp->sd_lock); 3404 3405 strioc.ic_len = sizeof (char); 3406 strioc.ic_dp = (char *)arg; 3407 return (strdoioctl(stp, &strioc, flag, 3408 copyflag, crp, rvalp)); 3409 3410 case TIOCSWINSZ: 3411 strioc.ic_len = sizeof (struct winsize); 3412 strioc.ic_dp = (char *)arg; 3413 return (strdoioctl(stp, &strioc, flag, 3414 copyflag, crp, rvalp)); 3415 3416 case TIOCSSIZE: 3417 strioc.ic_len = sizeof (struct ttysize); 3418 strioc.ic_dp = (char *)arg; 3419 return (strdoioctl(stp, &strioc, flag, 3420 copyflag, crp, rvalp)); 3421 3422 case TIOCSSOFTCAR: 3423 case KIOCTRANS: 3424 case KIOCTRANSABLE: 3425 case KIOCCMD: 3426 case KIOCSDIRECT: 3427 case KIOCSCOMPAT: 3428 case KIOCSKABORTEN: 3429 case KIOCSRPTDELAY: 3430 case KIOCSRPTRATE: 3431 case VUIDSFORMAT: 3432 case TIOCSPPS: 3433 strioc.ic_len = sizeof (int); 3434 strioc.ic_dp = (char *)arg; 3435 return (strdoioctl(stp, &strioc, flag, 3436 copyflag, crp, rvalp)); 3437 3438 case KIOCSETKEY: 3439 case KIOCGETKEY: 3440 strioc.ic_len = sizeof (struct kiockey); 3441 strioc.ic_dp = (char *)arg; 3442 return (strdoioctl(stp, &strioc, flag, 3443 copyflag, crp, rvalp)); 3444 3445 case KIOCSKEY: 3446 case KIOCGKEY: 3447 strioc.ic_len = sizeof (struct kiockeymap); 3448 strioc.ic_dp = (char *)arg; 3449 return (strdoioctl(stp, &strioc, flag, 3450 copyflag, crp, rvalp)); 3451 3452 case KIOCSLED: 3453 /* arg is a pointer to char */ 3454 strioc.ic_len = sizeof (char); 3455 strioc.ic_dp = (char *)arg; 3456 return (strdoioctl(stp, &strioc, flag, 3457 copyflag, crp, rvalp)); 3458 3459 case MSIOSETPARMS: 3460 strioc.ic_len = sizeof (Ms_parms); 3461 strioc.ic_dp = (char *)arg; 3462 return (strdoioctl(stp, &strioc, flag, 3463 copyflag, crp, rvalp)); 3464 3465 case VUIDSADDR: 3466 case VUIDGADDR: 3467 strioc.ic_len = sizeof (struct vuid_addr_probe); 3468 strioc.ic_dp = (char *)arg; 3469 return (strdoioctl(stp, &strioc, flag, 3470 copyflag, crp, rvalp)); 3471 3472 /* 3473 * These M_IOCTL's don't require any data to be sent 3474 * downstream, and the driver will allocate and link 3475 * on its own mblk_t upon M_IOCACK -- thus we set 3476 * ic_len to zero and set ic_dp to arg so we know 3477 * where to copyout to later. 3478 */ 3479 case TIOCGSOFTCAR: 3480 case TIOCGWINSZ: 3481 case TIOCGSIZE: 3482 case KIOCGTRANS: 3483 case KIOCGTRANSABLE: 3484 case KIOCTYPE: 3485 case KIOCGDIRECT: 3486 case KIOCGCOMPAT: 3487 case KIOCLAYOUT: 3488 case KIOCGLED: 3489 case MSIOGETPARMS: 3490 case MSIOBUTTONS: 3491 case VUIDGFORMAT: 3492 case TIOCGPPS: 3493 case TIOCGPPSEV: 3494 case TCGETA: 3495 case TCGETS: 3496 case LDGETT: 3497 case TIOCGETP: 3498 case KIOCGRPTDELAY: 3499 case KIOCGRPTRATE: 3500 strioc.ic_len = 0; 3501 strioc.ic_dp = (char *)arg; 3502 return (strdoioctl(stp, &strioc, flag, 3503 copyflag, crp, rvalp)); 3504 } 3505 } 3506 3507 /* 3508 * Unknown cmd - send it down as a transparent ioctl. 3509 */ 3510 strioc.ic_cmd = cmd; 3511 strioc.ic_timout = INFTIM; 3512 strioc.ic_len = TRANSPARENT; 3513 strioc.ic_dp = (char *)&arg; 3514 3515 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp)); 3516 3517 case I_STR: 3518 /* 3519 * Stream ioctl. Read in an strioctl buffer from the user 3520 * along with any data specified and send it downstream. 3521 * Strdoioctl will wait allow only one ioctl message at 3522 * a time, and waits for the acknowledgement. 3523 */ 3524 3525 if (stp->sd_flag & STRHUP) 3526 return (ENXIO); 3527 3528 error = strcopyin_strioctl((void *)arg, &strioc, flag, 3529 copyflag); 3530 if (error != 0) 3531 return (error); 3532 3533 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1)) 3534 return (EINVAL); 3535 3536 access = job_control_type(strioc.ic_cmd); 3537 mutex_enter(&stp->sd_lock); 3538 if ((access != -1) && 3539 ((error = i_straccess(stp, access)) != 0)) { 3540 mutex_exit(&stp->sd_lock); 3541 return (error); 3542 } 3543 mutex_exit(&stp->sd_lock); 3544 3545 /* 3546 * The I_STR facility provides a trap door for malicious 3547 * code to send down bogus streamio(7I) ioctl commands to 3548 * unsuspecting STREAMS modules and drivers which expect to 3549 * only get these messages from the stream head. 3550 * Explicitly prohibit any streamio ioctls which can be 3551 * passed downstream by the stream head. Note that we do 3552 * not block all streamio ioctls because the ioctl 3553 * numberspace is not well managed and thus it's possible 3554 * that a module or driver's ioctl numbers may accidentally 3555 * collide with them. 3556 */ 3557 switch (strioc.ic_cmd) { 3558 case I_LINK: 3559 case I_PLINK: 3560 case I_UNLINK: 3561 case I_PUNLINK: 3562 case _I_GETPEERCRED: 3563 case _I_PLINK_LH: 3564 return (EINVAL); 3565 } 3566 3567 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp); 3568 if (error == 0) { 3569 error = strcopyout_strioctl(&strioc, (void *)arg, 3570 flag, copyflag); 3571 } 3572 return (error); 3573 3574 case _I_CMD: 3575 /* 3576 * Like I_STR, but without using M_IOC* messages and without 3577 * copyins/copyouts beyond the passed-in argument. 3578 */ 3579 if (stp->sd_flag & STRHUP) 3580 return (ENXIO); 3581 3582 if ((scp = kmem_alloc(sizeof (strcmd_t), KM_NOSLEEP)) == NULL) 3583 return (ENOMEM); 3584 3585 if (copyin((void *)arg, scp, sizeof (strcmd_t))) { 3586 kmem_free(scp, sizeof (strcmd_t)); 3587 return (EFAULT); 3588 } 3589 3590 access = job_control_type(scp->sc_cmd); 3591 mutex_enter(&stp->sd_lock); 3592 if (access != -1 && (error = i_straccess(stp, access)) != 0) { 3593 mutex_exit(&stp->sd_lock); 3594 kmem_free(scp, sizeof (strcmd_t)); 3595 return (error); 3596 } 3597 mutex_exit(&stp->sd_lock); 3598 3599 *rvalp = 0; 3600 if ((error = strdocmd(stp, scp, crp)) == 0) { 3601 if (copyout(scp, (void *)arg, sizeof (strcmd_t))) 3602 error = EFAULT; 3603 } 3604 kmem_free(scp, sizeof (strcmd_t)); 3605 return (error); 3606 3607 case I_NREAD: 3608 /* 3609 * Return number of bytes of data in first message 3610 * in queue in "arg" and return the number of messages 3611 * in queue in return value. 3612 */ 3613 { 3614 size_t size; 3615 int retval; 3616 int count = 0; 3617 3618 mutex_enter(QLOCK(rdq)); 3619 3620 size = msgdsize(rdq->q_first); 3621 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3622 count++; 3623 3624 mutex_exit(QLOCK(rdq)); 3625 if (stp->sd_struiordq) { 3626 infod_t infod; 3627 3628 infod.d_cmd = INFOD_COUNT; 3629 infod.d_count = 0; 3630 if (count == 0) { 3631 infod.d_cmd |= INFOD_FIRSTBYTES; 3632 infod.d_bytes = 0; 3633 } 3634 infod.d_res = 0; 3635 (void) infonext(rdq, &infod); 3636 count += infod.d_count; 3637 if (infod.d_res & INFOD_FIRSTBYTES) 3638 size = infod.d_bytes; 3639 } 3640 3641 /* 3642 * Drop down from size_t to the "int" required by the 3643 * interface. Cap at INT_MAX. 3644 */ 3645 retval = MIN(size, INT_MAX); 3646 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3647 copyflag); 3648 if (!error) 3649 *rvalp = count; 3650 return (error); 3651 } 3652 3653 case FIONREAD: 3654 /* 3655 * Return number of bytes of data in all data messages 3656 * in queue in "arg". 3657 */ 3658 { 3659 size_t size = 0; 3660 int retval; 3661 3662 mutex_enter(QLOCK(rdq)); 3663 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3664 size += msgdsize(mp); 3665 mutex_exit(QLOCK(rdq)); 3666 3667 if (stp->sd_struiordq) { 3668 infod_t infod; 3669 3670 infod.d_cmd = INFOD_BYTES; 3671 infod.d_res = 0; 3672 infod.d_bytes = 0; 3673 (void) infonext(rdq, &infod); 3674 size += infod.d_bytes; 3675 } 3676 3677 /* 3678 * Drop down from size_t to the "int" required by the 3679 * interface. Cap at INT_MAX. 3680 */ 3681 retval = MIN(size, INT_MAX); 3682 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3683 copyflag); 3684 3685 *rvalp = 0; 3686 return (error); 3687 } 3688 case FIORDCHK: 3689 /* 3690 * FIORDCHK does not use arg value (like FIONREAD), 3691 * instead a count is returned. I_NREAD value may 3692 * not be accurate but safe. The real thing to do is 3693 * to add the msgdsizes of all data messages until 3694 * a non-data message. 3695 */ 3696 { 3697 size_t size = 0; 3698 3699 mutex_enter(QLOCK(rdq)); 3700 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3701 size += msgdsize(mp); 3702 mutex_exit(QLOCK(rdq)); 3703 3704 if (stp->sd_struiordq) { 3705 infod_t infod; 3706 3707 infod.d_cmd = INFOD_BYTES; 3708 infod.d_res = 0; 3709 infod.d_bytes = 0; 3710 (void) infonext(rdq, &infod); 3711 size += infod.d_bytes; 3712 } 3713 3714 /* 3715 * Since ioctl returns an int, and memory sizes under 3716 * LP64 may not fit, we return INT_MAX if the count was 3717 * actually greater. 3718 */ 3719 *rvalp = MIN(size, INT_MAX); 3720 return (0); 3721 } 3722 3723 case I_FIND: 3724 /* 3725 * Get module name. 3726 */ 3727 { 3728 char mname[FMNAMESZ + 1]; 3729 queue_t *q; 3730 3731 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3732 mname, FMNAMESZ + 1, NULL); 3733 if (error) 3734 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3735 3736 /* 3737 * Return EINVAL if we're handed a bogus module name. 3738 */ 3739 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) { 3740 TRACE_0(TR_FAC_STREAMS_FR, 3741 TR_I_CANT_FIND, "couldn't I_FIND"); 3742 return (EINVAL); 3743 } 3744 3745 *rvalp = 0; 3746 3747 /* Look downstream to see if module is there. */ 3748 claimstr(stp->sd_wrq); 3749 for (q = stp->sd_wrq->q_next; q; q = q->q_next) { 3750 if (q->q_flag & QREADR) { 3751 q = NULL; 3752 break; 3753 } 3754 if (strcmp(mname, Q2NAME(q)) == 0) 3755 break; 3756 } 3757 releasestr(stp->sd_wrq); 3758 3759 *rvalp = (q ? 1 : 0); 3760 return (error); 3761 } 3762 3763 case I_PUSH: 3764 case __I_PUSH_NOCTTY: 3765 /* 3766 * Push a module. 3767 * For the case __I_PUSH_NOCTTY push a module but 3768 * do not allocate controlling tty. See bugid 4025044 3769 */ 3770 3771 { 3772 char mname[FMNAMESZ + 1]; 3773 fmodsw_impl_t *fp; 3774 dev_t dummydev; 3775 3776 if (stp->sd_flag & STRHUP) 3777 return (ENXIO); 3778 3779 /* 3780 * Get module name and look up in fmodsw. 3781 */ 3782 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3783 mname, FMNAMESZ + 1, NULL); 3784 if (error) 3785 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3786 3787 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) == 3788 NULL) 3789 return (EINVAL); 3790 3791 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH, 3792 "I_PUSH:fp %p stp %p", fp, stp); 3793 3794 if (error = strstartplumb(stp, flag, cmd)) { 3795 fmodsw_rele(fp); 3796 return (error); 3797 } 3798 3799 /* 3800 * See if any more modules can be pushed on this stream. 3801 * Note that this check must be done after strstartplumb() 3802 * since otherwise multiple threads issuing I_PUSHes on 3803 * the same stream will be able to exceed nstrpush. 3804 */ 3805 mutex_enter(&stp->sd_lock); 3806 if (stp->sd_pushcnt >= nstrpush) { 3807 fmodsw_rele(fp); 3808 strendplumb(stp); 3809 mutex_exit(&stp->sd_lock); 3810 return (EINVAL); 3811 } 3812 mutex_exit(&stp->sd_lock); 3813 3814 /* 3815 * Push new module and call its open routine 3816 * via qattach(). Modules don't change device 3817 * numbers, so just ignore dummydev here. 3818 */ 3819 dummydev = vp->v_rdev; 3820 if ((error = qattach(rdq, &dummydev, 0, crp, fp, 3821 B_FALSE)) == 0) { 3822 if (vp->v_type == VCHR && /* sorry, no pipes allowed */ 3823 (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) { 3824 /* 3825 * try to allocate it as a controlling terminal 3826 */ 3827 (void) strctty(stp); 3828 } 3829 } 3830 3831 mutex_enter(&stp->sd_lock); 3832 3833 /* 3834 * As a performance concern we are caching the values of 3835 * q_minpsz and q_maxpsz of the module below the stream 3836 * head in the stream head. 3837 */ 3838 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 3839 rmin = stp->sd_wrq->q_next->q_minpsz; 3840 rmax = stp->sd_wrq->q_next->q_maxpsz; 3841 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 3842 3843 /* Do this processing here as a performance concern */ 3844 if (strmsgsz != 0) { 3845 if (rmax == INFPSZ) 3846 rmax = strmsgsz; 3847 else { 3848 if (vp->v_type == VFIFO) 3849 rmax = MIN(PIPE_BUF, rmax); 3850 else rmax = MIN(strmsgsz, rmax); 3851 } 3852 } 3853 3854 mutex_enter(QLOCK(wrq)); 3855 stp->sd_qn_minpsz = rmin; 3856 stp->sd_qn_maxpsz = rmax; 3857 mutex_exit(QLOCK(wrq)); 3858 3859 strendplumb(stp); 3860 mutex_exit(&stp->sd_lock); 3861 return (error); 3862 } 3863 3864 case I_POP: 3865 { 3866 queue_t *q; 3867 3868 if (stp->sd_flag & STRHUP) 3869 return (ENXIO); 3870 if (!wrq->q_next) /* for broken pipes */ 3871 return (EINVAL); 3872 3873 if (error = strstartplumb(stp, flag, cmd)) 3874 return (error); 3875 3876 /* 3877 * If there is an anchor on this stream and popping 3878 * the current module would attempt to pop through the 3879 * anchor, then disallow the pop unless we have sufficient 3880 * privileges; take the cheapest (non-locking) check 3881 * first. 3882 */ 3883 if (secpolicy_ip_config(crp, B_TRUE) != 0 || 3884 (stp->sd_anchorzone != crgetzoneid(crp))) { 3885 mutex_enter(&stp->sd_lock); 3886 /* 3887 * Anchors only apply if there's at least one 3888 * module on the stream (sd_pushcnt > 0). 3889 */ 3890 if (stp->sd_pushcnt > 0 && 3891 stp->sd_pushcnt == stp->sd_anchor && 3892 stp->sd_vnode->v_type != VFIFO) { 3893 strendplumb(stp); 3894 mutex_exit(&stp->sd_lock); 3895 if (stp->sd_anchorzone != crgetzoneid(crp)) 3896 return (EINVAL); 3897 /* Audit and report error */ 3898 return (secpolicy_ip_config(crp, B_FALSE)); 3899 } 3900 mutex_exit(&stp->sd_lock); 3901 } 3902 3903 q = wrq->q_next; 3904 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP, 3905 "I_POP:%p from %p", q, stp); 3906 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) { 3907 error = EINVAL; 3908 } else { 3909 qdetach(_RD(q), 1, flag, crp, B_FALSE); 3910 error = 0; 3911 } 3912 mutex_enter(&stp->sd_lock); 3913 3914 /* 3915 * As a performance concern we are caching the values of 3916 * q_minpsz and q_maxpsz of the module below the stream 3917 * head in the stream head. 3918 */ 3919 mutex_enter(QLOCK(wrq->q_next)); 3920 rmin = wrq->q_next->q_minpsz; 3921 rmax = wrq->q_next->q_maxpsz; 3922 mutex_exit(QLOCK(wrq->q_next)); 3923 3924 /* Do this processing here as a performance concern */ 3925 if (strmsgsz != 0) { 3926 if (rmax == INFPSZ) 3927 rmax = strmsgsz; 3928 else { 3929 if (vp->v_type == VFIFO) 3930 rmax = MIN(PIPE_BUF, rmax); 3931 else rmax = MIN(strmsgsz, rmax); 3932 } 3933 } 3934 3935 mutex_enter(QLOCK(wrq)); 3936 stp->sd_qn_minpsz = rmin; 3937 stp->sd_qn_maxpsz = rmax; 3938 mutex_exit(QLOCK(wrq)); 3939 3940 /* If we popped through the anchor, then reset the anchor. */ 3941 if (stp->sd_pushcnt < stp->sd_anchor) { 3942 stp->sd_anchor = 0; 3943 stp->sd_anchorzone = 0; 3944 } 3945 strendplumb(stp); 3946 mutex_exit(&stp->sd_lock); 3947 return (error); 3948 } 3949 3950 case _I_MUXID2FD: 3951 { 3952 /* 3953 * Create a fd for a I_PLINK'ed lower stream with a given 3954 * muxid. With the fd, application can send down ioctls, 3955 * like I_LIST, to the previously I_PLINK'ed stream. Note 3956 * that after getting the fd, the application has to do an 3957 * I_PUNLINK on the muxid before it can do any operation 3958 * on the lower stream. This is required by spec1170. 3959 * 3960 * The fd used to do this ioctl should point to the same 3961 * controlling device used to do the I_PLINK. If it uses 3962 * a different stream or an invalid muxid, I_MUXID2FD will 3963 * fail. The error code is set to EINVAL. 3964 * 3965 * The intended use of this interface is the following. 3966 * An application I_PLINK'ed a stream and exits. The fd 3967 * to the lower stream is gone. Another application 3968 * wants to get a fd to the lower stream, it uses I_MUXID2FD. 3969 */ 3970 int muxid = (int)arg; 3971 int fd; 3972 linkinfo_t *linkp; 3973 struct file *fp; 3974 netstack_t *ns; 3975 str_stack_t *ss; 3976 3977 /* 3978 * Do not allow the wildcard muxid. This ioctl is not 3979 * intended to find arbitrary link. 3980 */ 3981 if (muxid == 0) { 3982 return (EINVAL); 3983 } 3984 3985 ns = netstack_find_by_cred(crp); 3986 ASSERT(ns != NULL); 3987 ss = ns->netstack_str; 3988 ASSERT(ss != NULL); 3989 3990 mutex_enter(&muxifier); 3991 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss); 3992 if (linkp == NULL) { 3993 mutex_exit(&muxifier); 3994 netstack_rele(ss->ss_netstack); 3995 return (EINVAL); 3996 } 3997 3998 if ((fd = ufalloc(0)) == -1) { 3999 mutex_exit(&muxifier); 4000 netstack_rele(ss->ss_netstack); 4001 return (EMFILE); 4002 } 4003 fp = linkp->li_fpdown; 4004 mutex_enter(&fp->f_tlock); 4005 fp->f_count++; 4006 mutex_exit(&fp->f_tlock); 4007 mutex_exit(&muxifier); 4008 setf(fd, fp); 4009 *rvalp = fd; 4010 netstack_rele(ss->ss_netstack); 4011 return (0); 4012 } 4013 4014 case _I_INSERT: 4015 { 4016 /* 4017 * To insert a module to a given position in a stream. 4018 * In the first release, only allow privileged user 4019 * to use this ioctl. Furthermore, the insert is only allowed 4020 * below an anchor if the zoneid is the same as the zoneid 4021 * which created the anchor. 4022 * 4023 * Note that we do not plan to support this ioctl 4024 * on pipes in the first release. We want to learn more 4025 * about the implications of these ioctls before extending 4026 * their support. And we do not think these features are 4027 * valuable for pipes. 4028 */ 4029 STRUCT_DECL(strmodconf, strmodinsert); 4030 char mod_name[FMNAMESZ + 1]; 4031 fmodsw_impl_t *fp; 4032 dev_t dummydev; 4033 queue_t *tmp_wrq; 4034 int pos; 4035 boolean_t is_insert; 4036 4037 STRUCT_INIT(strmodinsert, flag); 4038 if (stp->sd_flag & STRHUP) 4039 return (ENXIO); 4040 if (STRMATED(stp)) 4041 return (EINVAL); 4042 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4043 return (error); 4044 if (stp->sd_anchor != 0 && 4045 stp->sd_anchorzone != crgetzoneid(crp)) 4046 return (EINVAL); 4047 4048 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert), 4049 STRUCT_SIZE(strmodinsert), copyflag); 4050 if (error) 4051 return (error); 4052 4053 /* 4054 * Get module name and look up in fmodsw. 4055 */ 4056 error = (copyflag & U_TO_K ? copyinstr : 4057 copystr)(STRUCT_FGETP(strmodinsert, mod_name), 4058 mod_name, FMNAMESZ + 1, NULL); 4059 if (error) 4060 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4061 4062 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) == 4063 NULL) 4064 return (EINVAL); 4065 4066 if (error = strstartplumb(stp, flag, cmd)) { 4067 fmodsw_rele(fp); 4068 return (error); 4069 } 4070 4071 /* 4072 * Is this _I_INSERT just like an I_PUSH? We need to know 4073 * this because we do some optimizations if this is a 4074 * module being pushed. 4075 */ 4076 pos = STRUCT_FGET(strmodinsert, pos); 4077 is_insert = (pos != 0); 4078 4079 /* 4080 * Make sure pos is valid. Even though it is not an I_PUSH, 4081 * we impose the same limit on the number of modules in a 4082 * stream. 4083 */ 4084 mutex_enter(&stp->sd_lock); 4085 if (stp->sd_pushcnt >= nstrpush || pos < 0 || 4086 pos > stp->sd_pushcnt) { 4087 fmodsw_rele(fp); 4088 strendplumb(stp); 4089 mutex_exit(&stp->sd_lock); 4090 return (EINVAL); 4091 } 4092 if (stp->sd_anchor != 0) { 4093 /* 4094 * Is this insert below the anchor? 4095 * Pushcnt hasn't been increased yet hence 4096 * we test for greater than here, and greater or 4097 * equal after qattach. 4098 */ 4099 if (pos > (stp->sd_pushcnt - stp->sd_anchor) && 4100 stp->sd_anchorzone != crgetzoneid(crp)) { 4101 fmodsw_rele(fp); 4102 strendplumb(stp); 4103 mutex_exit(&stp->sd_lock); 4104 return (EPERM); 4105 } 4106 } 4107 4108 mutex_exit(&stp->sd_lock); 4109 4110 /* 4111 * First find the correct position this module to 4112 * be inserted. We don't need to call claimstr() 4113 * as the stream should not be changing at this point. 4114 * 4115 * Insert new module and call its open routine 4116 * via qattach(). Modules don't change device 4117 * numbers, so just ignore dummydev here. 4118 */ 4119 for (tmp_wrq = stp->sd_wrq; pos > 0; 4120 tmp_wrq = tmp_wrq->q_next, pos--) { 4121 ASSERT(SAMESTR(tmp_wrq)); 4122 } 4123 dummydev = vp->v_rdev; 4124 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp, 4125 fp, is_insert)) != 0) { 4126 mutex_enter(&stp->sd_lock); 4127 strendplumb(stp); 4128 mutex_exit(&stp->sd_lock); 4129 return (error); 4130 } 4131 4132 mutex_enter(&stp->sd_lock); 4133 4134 /* 4135 * As a performance concern we are caching the values of 4136 * q_minpsz and q_maxpsz of the module below the stream 4137 * head in the stream head. 4138 */ 4139 if (!is_insert) { 4140 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 4141 rmin = stp->sd_wrq->q_next->q_minpsz; 4142 rmax = stp->sd_wrq->q_next->q_maxpsz; 4143 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 4144 4145 /* Do this processing here as a performance concern */ 4146 if (strmsgsz != 0) { 4147 if (rmax == INFPSZ) { 4148 rmax = strmsgsz; 4149 } else { 4150 rmax = MIN(strmsgsz, rmax); 4151 } 4152 } 4153 4154 mutex_enter(QLOCK(wrq)); 4155 stp->sd_qn_minpsz = rmin; 4156 stp->sd_qn_maxpsz = rmax; 4157 mutex_exit(QLOCK(wrq)); 4158 } 4159 4160 /* 4161 * Need to update the anchor value if this module is 4162 * inserted below the anchor point. 4163 */ 4164 if (stp->sd_anchor != 0) { 4165 pos = STRUCT_FGET(strmodinsert, pos); 4166 if (pos >= (stp->sd_pushcnt - stp->sd_anchor)) 4167 stp->sd_anchor++; 4168 } 4169 4170 strendplumb(stp); 4171 mutex_exit(&stp->sd_lock); 4172 return (0); 4173 } 4174 4175 case _I_REMOVE: 4176 { 4177 /* 4178 * To remove a module with a given name in a stream. The 4179 * caller of this ioctl needs to provide both the name and 4180 * the position of the module to be removed. This eliminates 4181 * the ambiguity of removal if a module is inserted/pushed 4182 * multiple times in a stream. In the first release, only 4183 * allow privileged user to use this ioctl. 4184 * Furthermore, the remove is only allowed 4185 * below an anchor if the zoneid is the same as the zoneid 4186 * which created the anchor. 4187 * 4188 * Note that we do not plan to support this ioctl 4189 * on pipes in the first release. We want to learn more 4190 * about the implications of these ioctls before extending 4191 * their support. And we do not think these features are 4192 * valuable for pipes. 4193 * 4194 * Also note that _I_REMOVE cannot be used to remove a 4195 * driver or the stream head. 4196 */ 4197 STRUCT_DECL(strmodconf, strmodremove); 4198 queue_t *q; 4199 int pos; 4200 char mod_name[FMNAMESZ + 1]; 4201 boolean_t is_remove; 4202 4203 STRUCT_INIT(strmodremove, flag); 4204 if (stp->sd_flag & STRHUP) 4205 return (ENXIO); 4206 if (STRMATED(stp)) 4207 return (EINVAL); 4208 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4209 return (error); 4210 if (stp->sd_anchor != 0 && 4211 stp->sd_anchorzone != crgetzoneid(crp)) 4212 return (EINVAL); 4213 4214 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove), 4215 STRUCT_SIZE(strmodremove), copyflag); 4216 if (error) 4217 return (error); 4218 4219 error = (copyflag & U_TO_K ? copyinstr : 4220 copystr)(STRUCT_FGETP(strmodremove, mod_name), 4221 mod_name, FMNAMESZ + 1, NULL); 4222 if (error) 4223 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4224 4225 if ((error = strstartplumb(stp, flag, cmd)) != 0) 4226 return (error); 4227 4228 /* 4229 * Match the name of given module to the name of module at 4230 * the given position. 4231 */ 4232 pos = STRUCT_FGET(strmodremove, pos); 4233 4234 is_remove = (pos != 0); 4235 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0; 4236 q = q->q_next, pos--) 4237 ; 4238 if (pos > 0 || !SAMESTR(q) || 4239 strcmp(Q2NAME(q), mod_name) != 0) { 4240 mutex_enter(&stp->sd_lock); 4241 strendplumb(stp); 4242 mutex_exit(&stp->sd_lock); 4243 return (EINVAL); 4244 } 4245 4246 /* 4247 * If the position is at or below an anchor, then the zoneid 4248 * must match the zoneid that created the anchor. 4249 */ 4250 if (stp->sd_anchor != 0) { 4251 pos = STRUCT_FGET(strmodremove, pos); 4252 if (pos >= (stp->sd_pushcnt - stp->sd_anchor) && 4253 stp->sd_anchorzone != crgetzoneid(crp)) { 4254 mutex_enter(&stp->sd_lock); 4255 strendplumb(stp); 4256 mutex_exit(&stp->sd_lock); 4257 return (EPERM); 4258 } 4259 } 4260 4261 4262 ASSERT(!(q->q_flag & QREADR)); 4263 qdetach(_RD(q), 1, flag, crp, is_remove); 4264 4265 mutex_enter(&stp->sd_lock); 4266 4267 /* 4268 * As a performance concern we are caching the values of 4269 * q_minpsz and q_maxpsz of the module below the stream 4270 * head in the stream head. 4271 */ 4272 if (!is_remove) { 4273 mutex_enter(QLOCK(wrq->q_next)); 4274 rmin = wrq->q_next->q_minpsz; 4275 rmax = wrq->q_next->q_maxpsz; 4276 mutex_exit(QLOCK(wrq->q_next)); 4277 4278 /* Do this processing here as a performance concern */ 4279 if (strmsgsz != 0) { 4280 if (rmax == INFPSZ) 4281 rmax = strmsgsz; 4282 else { 4283 if (vp->v_type == VFIFO) 4284 rmax = MIN(PIPE_BUF, rmax); 4285 else rmax = MIN(strmsgsz, rmax); 4286 } 4287 } 4288 4289 mutex_enter(QLOCK(wrq)); 4290 stp->sd_qn_minpsz = rmin; 4291 stp->sd_qn_maxpsz = rmax; 4292 mutex_exit(QLOCK(wrq)); 4293 } 4294 4295 /* 4296 * Need to update the anchor value if this module is removed 4297 * at or below the anchor point. If the removed module is at 4298 * the anchor point, remove the anchor for this stream if 4299 * there is no module above the anchor point. Otherwise, if 4300 * the removed module is below the anchor point, decrement the 4301 * anchor point by 1. 4302 */ 4303 if (stp->sd_anchor != 0) { 4304 pos = STRUCT_FGET(strmodremove, pos); 4305 if (pos == stp->sd_pushcnt - stp->sd_anchor + 1) 4306 stp->sd_anchor = 0; 4307 else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1)) 4308 stp->sd_anchor--; 4309 } 4310 4311 strendplumb(stp); 4312 mutex_exit(&stp->sd_lock); 4313 return (0); 4314 } 4315 4316 case I_ANCHOR: 4317 /* 4318 * Set the anchor position on the stream to reside at 4319 * the top module (in other words, the top module 4320 * cannot be popped). Anchors with a FIFO make no 4321 * obvious sense, so they're not allowed. 4322 */ 4323 mutex_enter(&stp->sd_lock); 4324 4325 if (stp->sd_vnode->v_type == VFIFO) { 4326 mutex_exit(&stp->sd_lock); 4327 return (EINVAL); 4328 } 4329 /* Only allow the same zoneid to update the anchor */ 4330 if (stp->sd_anchor != 0 && 4331 stp->sd_anchorzone != crgetzoneid(crp)) { 4332 mutex_exit(&stp->sd_lock); 4333 return (EINVAL); 4334 } 4335 stp->sd_anchor = stp->sd_pushcnt; 4336 stp->sd_anchorzone = crgetzoneid(crp); 4337 mutex_exit(&stp->sd_lock); 4338 return (0); 4339 4340 case I_LOOK: 4341 /* 4342 * Get name of first module downstream. 4343 * If no module, return an error. 4344 */ 4345 claimstr(wrq); 4346 if (_SAMESTR(wrq) && wrq->q_next->q_next != NULL) { 4347 char *name = Q2NAME(wrq->q_next); 4348 4349 error = strcopyout(name, (void *)arg, strlen(name) + 1, 4350 copyflag); 4351 releasestr(wrq); 4352 return (error); 4353 } 4354 releasestr(wrq); 4355 return (EINVAL); 4356 4357 case I_LINK: 4358 case I_PLINK: 4359 /* 4360 * Link a multiplexor. 4361 */ 4362 return (mlink(vp, cmd, (int)arg, crp, rvalp, 0)); 4363 4364 case _I_PLINK_LH: 4365 /* 4366 * Link a multiplexor: Call must originate from kernel. 4367 */ 4368 if (kioctl) 4369 return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp)); 4370 4371 return (EINVAL); 4372 case I_UNLINK: 4373 case I_PUNLINK: 4374 /* 4375 * Unlink a multiplexor. 4376 * If arg is -1, unlink all links for which this is the 4377 * controlling stream. Otherwise, arg is an index number 4378 * for a link to be removed. 4379 */ 4380 { 4381 struct linkinfo *linkp; 4382 int native_arg = (int)arg; 4383 int type; 4384 netstack_t *ns; 4385 str_stack_t *ss; 4386 4387 TRACE_1(TR_FAC_STREAMS_FR, 4388 TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp); 4389 if (vp->v_type == VFIFO) { 4390 return (EINVAL); 4391 } 4392 if (cmd == I_UNLINK) 4393 type = LINKNORMAL; 4394 else /* I_PUNLINK */ 4395 type = LINKPERSIST; 4396 if (native_arg == 0) { 4397 return (EINVAL); 4398 } 4399 ns = netstack_find_by_cred(crp); 4400 ASSERT(ns != NULL); 4401 ss = ns->netstack_str; 4402 ASSERT(ss != NULL); 4403 4404 if (native_arg == MUXID_ALL) 4405 error = munlinkall(stp, type, crp, rvalp, ss); 4406 else { 4407 mutex_enter(&muxifier); 4408 if (!(linkp = findlinks(stp, (int)arg, type, ss))) { 4409 /* invalid user supplied index number */ 4410 mutex_exit(&muxifier); 4411 netstack_rele(ss->ss_netstack); 4412 return (EINVAL); 4413 } 4414 /* munlink drops the muxifier lock */ 4415 error = munlink(stp, linkp, type, crp, rvalp, ss); 4416 } 4417 netstack_rele(ss->ss_netstack); 4418 return (error); 4419 } 4420 4421 case I_FLUSH: 4422 /* 4423 * send a flush message downstream 4424 * flush message can indicate 4425 * FLUSHR - flush read queue 4426 * FLUSHW - flush write queue 4427 * FLUSHRW - flush read/write queue 4428 */ 4429 if (stp->sd_flag & STRHUP) 4430 return (ENXIO); 4431 if (arg & ~FLUSHRW) 4432 return (EINVAL); 4433 4434 for (;;) { 4435 if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) { 4436 break; 4437 } 4438 if (error = strwaitbuf(1, BPRI_HI)) { 4439 return (error); 4440 } 4441 } 4442 4443 /* 4444 * Send down an unsupported ioctl and wait for the nack 4445 * in order to allow the M_FLUSH to propagate back 4446 * up to the stream head. 4447 * Replaces if (qready()) runqueues(); 4448 */ 4449 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4450 strioc.ic_timout = 0; 4451 strioc.ic_len = 0; 4452 strioc.ic_dp = NULL; 4453 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4454 *rvalp = 0; 4455 return (0); 4456 4457 case I_FLUSHBAND: 4458 { 4459 struct bandinfo binfo; 4460 4461 error = strcopyin((void *)arg, &binfo, sizeof (binfo), 4462 copyflag); 4463 if (error) 4464 return (error); 4465 if (stp->sd_flag & STRHUP) 4466 return (ENXIO); 4467 if (binfo.bi_flag & ~FLUSHRW) 4468 return (EINVAL); 4469 while (!(mp = allocb(2, BPRI_HI))) { 4470 if (error = strwaitbuf(2, BPRI_HI)) 4471 return (error); 4472 } 4473 mp->b_datap->db_type = M_FLUSH; 4474 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND; 4475 *mp->b_wptr++ = binfo.bi_pri; 4476 putnext(stp->sd_wrq, mp); 4477 /* 4478 * Send down an unsupported ioctl and wait for the nack 4479 * in order to allow the M_FLUSH to propagate back 4480 * up to the stream head. 4481 * Replaces if (qready()) runqueues(); 4482 */ 4483 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4484 strioc.ic_timout = 0; 4485 strioc.ic_len = 0; 4486 strioc.ic_dp = NULL; 4487 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4488 *rvalp = 0; 4489 return (0); 4490 } 4491 4492 case I_SRDOPT: 4493 /* 4494 * Set read options 4495 * 4496 * RNORM - default stream mode 4497 * RMSGN - message no discard 4498 * RMSGD - message discard 4499 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs 4500 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs 4501 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs 4502 */ 4503 if (arg & ~(RMODEMASK | RPROTMASK)) 4504 return (EINVAL); 4505 4506 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN)) 4507 return (EINVAL); 4508 4509 mutex_enter(&stp->sd_lock); 4510 switch (arg & RMODEMASK) { 4511 case RNORM: 4512 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 4513 break; 4514 case RMSGD: 4515 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) | 4516 RD_MSGDIS; 4517 break; 4518 case RMSGN: 4519 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) | 4520 RD_MSGNODIS; 4521 break; 4522 } 4523 4524 switch (arg & RPROTMASK) { 4525 case RPROTNORM: 4526 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 4527 break; 4528 4529 case RPROTDAT: 4530 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) | 4531 RD_PROTDAT); 4532 break; 4533 4534 case RPROTDIS: 4535 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) | 4536 RD_PROTDIS); 4537 break; 4538 } 4539 mutex_exit(&stp->sd_lock); 4540 return (0); 4541 4542 case I_GRDOPT: 4543 /* 4544 * Get read option and return the value 4545 * to spot pointed to by arg 4546 */ 4547 { 4548 int rdopt; 4549 4550 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD : 4551 ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM)); 4552 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT : 4553 ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM)); 4554 4555 return (strcopyout(&rdopt, (void *)arg, sizeof (int), 4556 copyflag)); 4557 } 4558 4559 case I_SERROPT: 4560 /* 4561 * Set error options 4562 * 4563 * RERRNORM - persistent read errors 4564 * RERRNONPERSIST - non-persistent read errors 4565 * WERRNORM - persistent write errors 4566 * WERRNONPERSIST - non-persistent write errors 4567 */ 4568 if (arg & ~(RERRMASK | WERRMASK)) 4569 return (EINVAL); 4570 4571 mutex_enter(&stp->sd_lock); 4572 switch (arg & RERRMASK) { 4573 case RERRNORM: 4574 stp->sd_flag &= ~STRDERRNONPERSIST; 4575 break; 4576 case RERRNONPERSIST: 4577 stp->sd_flag |= STRDERRNONPERSIST; 4578 break; 4579 } 4580 switch (arg & WERRMASK) { 4581 case WERRNORM: 4582 stp->sd_flag &= ~STWRERRNONPERSIST; 4583 break; 4584 case WERRNONPERSIST: 4585 stp->sd_flag |= STWRERRNONPERSIST; 4586 break; 4587 } 4588 mutex_exit(&stp->sd_lock); 4589 return (0); 4590 4591 case I_GERROPT: 4592 /* 4593 * Get error option and return the value 4594 * to spot pointed to by arg 4595 */ 4596 { 4597 int erropt = 0; 4598 4599 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST : 4600 RERRNORM; 4601 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST : 4602 WERRNORM; 4603 return (strcopyout(&erropt, (void *)arg, sizeof (int), 4604 copyflag)); 4605 } 4606 4607 case I_SETSIG: 4608 /* 4609 * Register the calling proc to receive the SIGPOLL 4610 * signal based on the events given in arg. If 4611 * arg is zero, remove the proc from register list. 4612 */ 4613 { 4614 strsig_t *ssp, *pssp; 4615 struct pid *pidp; 4616 4617 pssp = NULL; 4618 pidp = curproc->p_pidp; 4619 /* 4620 * Hold sd_lock to prevent traversal of sd_siglist while 4621 * it is modified. 4622 */ 4623 mutex_enter(&stp->sd_lock); 4624 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp); 4625 pssp = ssp, ssp = ssp->ss_next) 4626 ; 4627 4628 if (arg) { 4629 if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4630 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4631 mutex_exit(&stp->sd_lock); 4632 return (EINVAL); 4633 } 4634 if ((arg & S_BANDURG) && !(arg & S_RDBAND)) { 4635 mutex_exit(&stp->sd_lock); 4636 return (EINVAL); 4637 } 4638 4639 /* 4640 * If proc not already registered, add it 4641 * to list. 4642 */ 4643 if (!ssp) { 4644 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4645 ssp->ss_pidp = pidp; 4646 ssp->ss_pid = pidp->pid_id; 4647 ssp->ss_next = NULL; 4648 if (pssp) 4649 pssp->ss_next = ssp; 4650 else 4651 stp->sd_siglist = ssp; 4652 mutex_enter(&pidlock); 4653 PID_HOLD(pidp); 4654 mutex_exit(&pidlock); 4655 } 4656 4657 /* 4658 * Set events. 4659 */ 4660 ssp->ss_events = (int)arg; 4661 } else { 4662 /* 4663 * Remove proc from register list. 4664 */ 4665 if (ssp) { 4666 mutex_enter(&pidlock); 4667 PID_RELE(pidp); 4668 mutex_exit(&pidlock); 4669 if (pssp) 4670 pssp->ss_next = ssp->ss_next; 4671 else 4672 stp->sd_siglist = ssp->ss_next; 4673 kmem_free(ssp, sizeof (strsig_t)); 4674 } else { 4675 mutex_exit(&stp->sd_lock); 4676 return (EINVAL); 4677 } 4678 } 4679 4680 /* 4681 * Recalculate OR of sig events. 4682 */ 4683 stp->sd_sigflags = 0; 4684 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4685 stp->sd_sigflags |= ssp->ss_events; 4686 mutex_exit(&stp->sd_lock); 4687 return (0); 4688 } 4689 4690 case I_GETSIG: 4691 /* 4692 * Return (in arg) the current registration of events 4693 * for which the calling proc is to be signaled. 4694 */ 4695 { 4696 struct strsig *ssp; 4697 struct pid *pidp; 4698 4699 pidp = curproc->p_pidp; 4700 mutex_enter(&stp->sd_lock); 4701 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4702 if (ssp->ss_pidp == pidp) { 4703 error = strcopyout(&ssp->ss_events, (void *)arg, 4704 sizeof (int), copyflag); 4705 mutex_exit(&stp->sd_lock); 4706 return (error); 4707 } 4708 mutex_exit(&stp->sd_lock); 4709 return (EINVAL); 4710 } 4711 4712 case I_ESETSIG: 4713 /* 4714 * Register the ss_pid to receive the SIGPOLL 4715 * signal based on the events is ss_events arg. If 4716 * ss_events is zero, remove the proc from register list. 4717 */ 4718 { 4719 struct strsig *ssp, *pssp; 4720 struct proc *proc; 4721 struct pid *pidp; 4722 pid_t pid; 4723 struct strsigset ss; 4724 4725 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4726 if (error) 4727 return (error); 4728 4729 pid = ss.ss_pid; 4730 4731 if (ss.ss_events != 0) { 4732 /* 4733 * Permissions check by sending signal 0. 4734 * Note that when kill fails it does a set_errno 4735 * causing the system call to fail. 4736 */ 4737 error = kill(pid, 0); 4738 if (error) { 4739 return (error); 4740 } 4741 } 4742 mutex_enter(&pidlock); 4743 if (pid == 0) 4744 proc = curproc; 4745 else if (pid < 0) 4746 proc = pgfind(-pid); 4747 else 4748 proc = prfind(pid); 4749 if (proc == NULL) { 4750 mutex_exit(&pidlock); 4751 return (ESRCH); 4752 } 4753 if (pid < 0) 4754 pidp = proc->p_pgidp; 4755 else 4756 pidp = proc->p_pidp; 4757 ASSERT(pidp); 4758 /* 4759 * Get a hold on the pid structure while referencing it. 4760 * There is a separate PID_HOLD should it be inserted 4761 * in the list below. 4762 */ 4763 PID_HOLD(pidp); 4764 mutex_exit(&pidlock); 4765 4766 pssp = NULL; 4767 /* 4768 * Hold sd_lock to prevent traversal of sd_siglist while 4769 * it is modified. 4770 */ 4771 mutex_enter(&stp->sd_lock); 4772 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid); 4773 pssp = ssp, ssp = ssp->ss_next) 4774 ; 4775 4776 if (ss.ss_events) { 4777 if (ss.ss_events & 4778 ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4779 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4780 mutex_exit(&stp->sd_lock); 4781 mutex_enter(&pidlock); 4782 PID_RELE(pidp); 4783 mutex_exit(&pidlock); 4784 return (EINVAL); 4785 } 4786 if ((ss.ss_events & S_BANDURG) && 4787 !(ss.ss_events & S_RDBAND)) { 4788 mutex_exit(&stp->sd_lock); 4789 mutex_enter(&pidlock); 4790 PID_RELE(pidp); 4791 mutex_exit(&pidlock); 4792 return (EINVAL); 4793 } 4794 4795 /* 4796 * If proc not already registered, add it 4797 * to list. 4798 */ 4799 if (!ssp) { 4800 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4801 ssp->ss_pidp = pidp; 4802 ssp->ss_pid = pid; 4803 ssp->ss_next = NULL; 4804 if (pssp) 4805 pssp->ss_next = ssp; 4806 else 4807 stp->sd_siglist = ssp; 4808 mutex_enter(&pidlock); 4809 PID_HOLD(pidp); 4810 mutex_exit(&pidlock); 4811 } 4812 4813 /* 4814 * Set events. 4815 */ 4816 ssp->ss_events = ss.ss_events; 4817 } else { 4818 /* 4819 * Remove proc from register list. 4820 */ 4821 if (ssp) { 4822 mutex_enter(&pidlock); 4823 PID_RELE(pidp); 4824 mutex_exit(&pidlock); 4825 if (pssp) 4826 pssp->ss_next = ssp->ss_next; 4827 else 4828 stp->sd_siglist = ssp->ss_next; 4829 kmem_free(ssp, sizeof (strsig_t)); 4830 } else { 4831 mutex_exit(&stp->sd_lock); 4832 mutex_enter(&pidlock); 4833 PID_RELE(pidp); 4834 mutex_exit(&pidlock); 4835 return (EINVAL); 4836 } 4837 } 4838 4839 /* 4840 * Recalculate OR of sig events. 4841 */ 4842 stp->sd_sigflags = 0; 4843 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4844 stp->sd_sigflags |= ssp->ss_events; 4845 mutex_exit(&stp->sd_lock); 4846 mutex_enter(&pidlock); 4847 PID_RELE(pidp); 4848 mutex_exit(&pidlock); 4849 return (0); 4850 } 4851 4852 case I_EGETSIG: 4853 /* 4854 * Return (in arg) the current registration of events 4855 * for which the calling proc is to be signaled. 4856 */ 4857 { 4858 struct strsig *ssp; 4859 struct proc *proc; 4860 pid_t pid; 4861 struct pid *pidp; 4862 struct strsigset ss; 4863 4864 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4865 if (error) 4866 return (error); 4867 4868 pid = ss.ss_pid; 4869 mutex_enter(&pidlock); 4870 if (pid == 0) 4871 proc = curproc; 4872 else if (pid < 0) 4873 proc = pgfind(-pid); 4874 else 4875 proc = prfind(pid); 4876 if (proc == NULL) { 4877 mutex_exit(&pidlock); 4878 return (ESRCH); 4879 } 4880 if (pid < 0) 4881 pidp = proc->p_pgidp; 4882 else 4883 pidp = proc->p_pidp; 4884 4885 /* Prevent the pidp from being reassigned */ 4886 PID_HOLD(pidp); 4887 mutex_exit(&pidlock); 4888 4889 mutex_enter(&stp->sd_lock); 4890 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4891 if (ssp->ss_pid == pid) { 4892 ss.ss_pid = ssp->ss_pid; 4893 ss.ss_events = ssp->ss_events; 4894 error = strcopyout(&ss, (void *)arg, 4895 sizeof (struct strsigset), copyflag); 4896 mutex_exit(&stp->sd_lock); 4897 mutex_enter(&pidlock); 4898 PID_RELE(pidp); 4899 mutex_exit(&pidlock); 4900 return (error); 4901 } 4902 mutex_exit(&stp->sd_lock); 4903 mutex_enter(&pidlock); 4904 PID_RELE(pidp); 4905 mutex_exit(&pidlock); 4906 return (EINVAL); 4907 } 4908 4909 case I_PEEK: 4910 { 4911 STRUCT_DECL(strpeek, strpeek); 4912 size_t n; 4913 mblk_t *fmp, *tmp_mp = NULL; 4914 4915 STRUCT_INIT(strpeek, flag); 4916 4917 error = strcopyin((void *)arg, STRUCT_BUF(strpeek), 4918 STRUCT_SIZE(strpeek), copyflag); 4919 if (error) 4920 return (error); 4921 4922 mutex_enter(QLOCK(rdq)); 4923 /* 4924 * Skip the invalid messages 4925 */ 4926 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 4927 if (mp->b_datap->db_type != M_SIG) 4928 break; 4929 4930 /* 4931 * If user has requested to peek at a high priority message 4932 * and first message is not, return 0 4933 */ 4934 if (mp != NULL) { 4935 if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) && 4936 queclass(mp) == QNORM) { 4937 *rvalp = 0; 4938 mutex_exit(QLOCK(rdq)); 4939 return (0); 4940 } 4941 } else if (stp->sd_struiordq == NULL || 4942 (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) { 4943 /* 4944 * No mblks to look at at the streamhead and 4945 * 1). This isn't a synch stream or 4946 * 2). This is a synch stream but caller wants high 4947 * priority messages which is not supported by 4948 * the synch stream. (it only supports QNORM) 4949 */ 4950 *rvalp = 0; 4951 mutex_exit(QLOCK(rdq)); 4952 return (0); 4953 } 4954 4955 fmp = mp; 4956 4957 if (mp && mp->b_datap->db_type == M_PASSFP) { 4958 mutex_exit(QLOCK(rdq)); 4959 return (EBADMSG); 4960 } 4961 4962 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO || 4963 mp->b_datap->db_type == M_PROTO || 4964 mp->b_datap->db_type == M_DATA); 4965 4966 if (mp && mp->b_datap->db_type == M_PCPROTO) { 4967 STRUCT_FSET(strpeek, flags, RS_HIPRI); 4968 } else { 4969 STRUCT_FSET(strpeek, flags, 0); 4970 } 4971 4972 4973 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) { 4974 mutex_exit(QLOCK(rdq)); 4975 return (ENOSR); 4976 } 4977 mutex_exit(QLOCK(rdq)); 4978 4979 /* 4980 * set mp = tmp_mp, so that I_PEEK processing can continue. 4981 * tmp_mp is used to free the dup'd message. 4982 */ 4983 mp = tmp_mp; 4984 4985 uio.uio_fmode = 0; 4986 uio.uio_extflg = UIO_COPY_CACHED; 4987 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 4988 UIO_SYSSPACE; 4989 uio.uio_limit = 0; 4990 /* 4991 * First process PROTO blocks, if any. 4992 * If user doesn't want to get ctl info by setting maxlen <= 0, 4993 * then set len to -1/0 and skip control blocks part. 4994 */ 4995 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0) 4996 STRUCT_FSET(strpeek, ctlbuf.len, -1); 4997 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0) 4998 STRUCT_FSET(strpeek, ctlbuf.len, 0); 4999 else { 5000 int ctl_part = 0; 5001 5002 iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf); 5003 iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen); 5004 uio.uio_iov = &iov; 5005 uio.uio_resid = iov.iov_len; 5006 uio.uio_loffset = 0; 5007 uio.uio_iovcnt = 1; 5008 while (mp && mp->b_datap->db_type != M_DATA && 5009 uio.uio_resid >= 0) { 5010 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ? 5011 mp->b_datap->db_type == M_PROTO : 5012 mp->b_datap->db_type == M_PCPROTO); 5013 5014 if ((n = MIN(uio.uio_resid, 5015 mp->b_wptr - mp->b_rptr)) != 0 && 5016 (error = uiomove((char *)mp->b_rptr, n, 5017 UIO_READ, &uio)) != 0) { 5018 freemsg(tmp_mp); 5019 return (error); 5020 } 5021 ctl_part = 1; 5022 mp = mp->b_cont; 5023 } 5024 /* No ctl message */ 5025 if (ctl_part == 0) 5026 STRUCT_FSET(strpeek, ctlbuf.len, -1); 5027 else 5028 STRUCT_FSET(strpeek, ctlbuf.len, 5029 STRUCT_FGET(strpeek, ctlbuf.maxlen) - 5030 uio.uio_resid); 5031 } 5032 5033 /* 5034 * Now process DATA blocks, if any. 5035 * If user doesn't want to get data info by setting maxlen <= 0, 5036 * then set len to -1/0 and skip data blocks part. 5037 */ 5038 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0) 5039 STRUCT_FSET(strpeek, databuf.len, -1); 5040 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0) 5041 STRUCT_FSET(strpeek, databuf.len, 0); 5042 else { 5043 int data_part = 0; 5044 5045 iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf); 5046 iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen); 5047 uio.uio_iov = &iov; 5048 uio.uio_resid = iov.iov_len; 5049 uio.uio_loffset = 0; 5050 uio.uio_iovcnt = 1; 5051 while (mp && uio.uio_resid) { 5052 if (mp->b_datap->db_type == M_DATA) { 5053 if ((n = MIN(uio.uio_resid, 5054 mp->b_wptr - mp->b_rptr)) != 0 && 5055 (error = uiomove((char *)mp->b_rptr, 5056 n, UIO_READ, &uio)) != 0) { 5057 freemsg(tmp_mp); 5058 return (error); 5059 } 5060 data_part = 1; 5061 } 5062 ASSERT(data_part == 0 || 5063 mp->b_datap->db_type == M_DATA); 5064 mp = mp->b_cont; 5065 } 5066 /* No data message */ 5067 if (data_part == 0) 5068 STRUCT_FSET(strpeek, databuf.len, -1); 5069 else 5070 STRUCT_FSET(strpeek, databuf.len, 5071 STRUCT_FGET(strpeek, databuf.maxlen) - 5072 uio.uio_resid); 5073 } 5074 freemsg(tmp_mp); 5075 5076 /* 5077 * It is a synch stream and user wants to get 5078 * data (maxlen > 0). 5079 * uio setup is done by the codes that process DATA 5080 * blocks above. 5081 */ 5082 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) { 5083 infod_t infod; 5084 5085 infod.d_cmd = INFOD_COPYOUT; 5086 infod.d_res = 0; 5087 infod.d_uiop = &uio; 5088 error = infonext(rdq, &infod); 5089 if (error == EINVAL || error == EBUSY) 5090 error = 0; 5091 if (error) 5092 return (error); 5093 STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek, 5094 databuf.maxlen) - uio.uio_resid); 5095 if (STRUCT_FGET(strpeek, databuf.len) == 0) { 5096 /* 5097 * No data found by the infonext(). 5098 */ 5099 STRUCT_FSET(strpeek, databuf.len, -1); 5100 } 5101 } 5102 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg, 5103 STRUCT_SIZE(strpeek), copyflag); 5104 if (error) { 5105 return (error); 5106 } 5107 /* 5108 * If there is no message retrieved, set return code to 0 5109 * otherwise, set it to 1. 5110 */ 5111 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 && 5112 STRUCT_FGET(strpeek, databuf.len) == -1) 5113 *rvalp = 0; 5114 else 5115 *rvalp = 1; 5116 return (0); 5117 } 5118 5119 case I_FDINSERT: 5120 { 5121 STRUCT_DECL(strfdinsert, strfdinsert); 5122 struct file *resftp; 5123 struct stdata *resstp; 5124 t_uscalar_t ival; 5125 ssize_t msgsize; 5126 struct strbuf mctl; 5127 5128 STRUCT_INIT(strfdinsert, flag); 5129 if (stp->sd_flag & STRHUP) 5130 return (ENXIO); 5131 /* 5132 * STRDERR, STWRERR and STPLEX tested above. 5133 */ 5134 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert), 5135 STRUCT_SIZE(strfdinsert), copyflag); 5136 if (error) 5137 return (error); 5138 5139 if (STRUCT_FGET(strfdinsert, offset) < 0 || 5140 (STRUCT_FGET(strfdinsert, offset) % 5141 sizeof (t_uscalar_t)) != 0) 5142 return (EINVAL); 5143 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) { 5144 if ((resstp = resftp->f_vnode->v_stream) == NULL) { 5145 releasef(STRUCT_FGET(strfdinsert, fildes)); 5146 return (EINVAL); 5147 } 5148 } else 5149 return (EINVAL); 5150 5151 mutex_enter(&resstp->sd_lock); 5152 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) { 5153 error = strgeterr(resstp, 5154 STRDERR|STWRERR|STRHUP|STPLEX, 0); 5155 if (error != 0) { 5156 mutex_exit(&resstp->sd_lock); 5157 releasef(STRUCT_FGET(strfdinsert, fildes)); 5158 return (error); 5159 } 5160 } 5161 mutex_exit(&resstp->sd_lock); 5162 5163 #ifdef _ILP32 5164 { 5165 queue_t *q; 5166 queue_t *mate = NULL; 5167 5168 /* get read queue of stream terminus */ 5169 claimstr(resstp->sd_wrq); 5170 for (q = resstp->sd_wrq->q_next; q->q_next != NULL; 5171 q = q->q_next) 5172 if (!STRMATED(resstp) && STREAM(q) != resstp && 5173 mate == NULL) { 5174 ASSERT(q->q_qinfo->qi_srvp); 5175 ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp); 5176 claimstr(q); 5177 mate = q; 5178 } 5179 q = _RD(q); 5180 if (mate) 5181 releasestr(mate); 5182 releasestr(resstp->sd_wrq); 5183 ival = (t_uscalar_t)q; 5184 } 5185 #else 5186 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev); 5187 #endif /* _ILP32 */ 5188 5189 if (STRUCT_FGET(strfdinsert, ctlbuf.len) < 5190 STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) { 5191 releasef(STRUCT_FGET(strfdinsert, fildes)); 5192 return (EINVAL); 5193 } 5194 5195 /* 5196 * Check for legal flag value. 5197 */ 5198 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) { 5199 releasef(STRUCT_FGET(strfdinsert, fildes)); 5200 return (EINVAL); 5201 } 5202 5203 /* get these values from those cached in the stream head */ 5204 mutex_enter(QLOCK(stp->sd_wrq)); 5205 rmin = stp->sd_qn_minpsz; 5206 rmax = stp->sd_qn_maxpsz; 5207 mutex_exit(QLOCK(stp->sd_wrq)); 5208 5209 /* 5210 * Make sure ctl and data sizes together fall within 5211 * the limits of the max and min receive packet sizes 5212 * and do not exceed system limit. A negative data 5213 * length means that no data part is to be sent. 5214 */ 5215 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 5216 if (rmax == 0) { 5217 releasef(STRUCT_FGET(strfdinsert, fildes)); 5218 return (ERANGE); 5219 } 5220 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0) 5221 msgsize = 0; 5222 if ((msgsize < rmin) || 5223 ((msgsize > rmax) && (rmax != INFPSZ)) || 5224 (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) { 5225 releasef(STRUCT_FGET(strfdinsert, fildes)); 5226 return (ERANGE); 5227 } 5228 5229 mutex_enter(&stp->sd_lock); 5230 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) && 5231 !canputnext(stp->sd_wrq)) { 5232 if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, 5233 flag, -1, &done)) != 0 || done) { 5234 mutex_exit(&stp->sd_lock); 5235 releasef(STRUCT_FGET(strfdinsert, fildes)); 5236 return (error); 5237 } 5238 if ((error = i_straccess(stp, access)) != 0) { 5239 mutex_exit(&stp->sd_lock); 5240 releasef( 5241 STRUCT_FGET(strfdinsert, fildes)); 5242 return (error); 5243 } 5244 } 5245 mutex_exit(&stp->sd_lock); 5246 5247 /* 5248 * Copy strfdinsert.ctlbuf into native form of 5249 * ctlbuf to pass down into strmakemsg(). 5250 */ 5251 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen); 5252 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len); 5253 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf); 5254 5255 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf); 5256 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len); 5257 uio.uio_iov = &iov; 5258 uio.uio_iovcnt = 1; 5259 uio.uio_loffset = 0; 5260 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 5261 UIO_SYSSPACE; 5262 uio.uio_fmode = 0; 5263 uio.uio_extflg = UIO_COPY_CACHED; 5264 uio.uio_resid = iov.iov_len; 5265 if ((error = strmakemsg(&mctl, 5266 &msgsize, &uio, stp, 5267 STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) { 5268 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5269 releasef(STRUCT_FGET(strfdinsert, fildes)); 5270 return (error); 5271 } 5272 5273 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5274 5275 /* 5276 * Place the possibly reencoded queue pointer 'offset' bytes 5277 * from the start of the control portion of the message. 5278 */ 5279 *((t_uscalar_t *)(mp->b_rptr + 5280 STRUCT_FGET(strfdinsert, offset))) = ival; 5281 5282 /* 5283 * Put message downstream. 5284 */ 5285 stream_willservice(stp); 5286 putnext(stp->sd_wrq, mp); 5287 stream_runservice(stp); 5288 releasef(STRUCT_FGET(strfdinsert, fildes)); 5289 return (error); 5290 } 5291 5292 case I_SENDFD: 5293 { 5294 struct file *fp; 5295 5296 if ((fp = getf((int)arg)) == NULL) 5297 return (EBADF); 5298 error = do_sendfp(stp, fp, crp); 5299 if (audit_active) { 5300 audit_fdsend((int)arg, fp, error); 5301 } 5302 releasef((int)arg); 5303 return (error); 5304 } 5305 5306 case I_RECVFD: 5307 case I_E_RECVFD: 5308 { 5309 struct k_strrecvfd *srf; 5310 int i, fd; 5311 5312 mutex_enter(&stp->sd_lock); 5313 while (!(mp = getq(rdq))) { 5314 if (stp->sd_flag & (STRHUP|STREOF)) { 5315 mutex_exit(&stp->sd_lock); 5316 return (ENXIO); 5317 } 5318 if ((error = strwaitq(stp, GETWAIT, (ssize_t)0, 5319 flag, -1, &done)) != 0 || done) { 5320 mutex_exit(&stp->sd_lock); 5321 return (error); 5322 } 5323 if ((error = i_straccess(stp, access)) != 0) { 5324 mutex_exit(&stp->sd_lock); 5325 return (error); 5326 } 5327 } 5328 if (mp->b_datap->db_type != M_PASSFP) { 5329 putback(stp, rdq, mp, mp->b_band); 5330 mutex_exit(&stp->sd_lock); 5331 return (EBADMSG); 5332 } 5333 mutex_exit(&stp->sd_lock); 5334 5335 srf = (struct k_strrecvfd *)mp->b_rptr; 5336 if ((fd = ufalloc(0)) == -1) { 5337 mutex_enter(&stp->sd_lock); 5338 putback(stp, rdq, mp, mp->b_band); 5339 mutex_exit(&stp->sd_lock); 5340 return (EMFILE); 5341 } 5342 if (cmd == I_RECVFD) { 5343 struct o_strrecvfd ostrfd; 5344 5345 /* check to see if uid/gid values are too large. */ 5346 5347 if (srf->uid > (o_uid_t)USHRT_MAX || 5348 srf->gid > (o_gid_t)USHRT_MAX) { 5349 mutex_enter(&stp->sd_lock); 5350 putback(stp, rdq, mp, mp->b_band); 5351 mutex_exit(&stp->sd_lock); 5352 setf(fd, NULL); /* release fd entry */ 5353 return (EOVERFLOW); 5354 } 5355 5356 ostrfd.fd = fd; 5357 ostrfd.uid = (o_uid_t)srf->uid; 5358 ostrfd.gid = (o_gid_t)srf->gid; 5359 5360 /* Null the filler bits */ 5361 for (i = 0; i < 8; i++) 5362 ostrfd.fill[i] = 0; 5363 5364 error = strcopyout(&ostrfd, (void *)arg, 5365 sizeof (struct o_strrecvfd), copyflag); 5366 } else { /* I_E_RECVFD */ 5367 struct strrecvfd strfd; 5368 5369 strfd.fd = fd; 5370 strfd.uid = srf->uid; 5371 strfd.gid = srf->gid; 5372 5373 /* null the filler bits */ 5374 for (i = 0; i < 8; i++) 5375 strfd.fill[i] = 0; 5376 5377 error = strcopyout(&strfd, (void *)arg, 5378 sizeof (struct strrecvfd), copyflag); 5379 } 5380 5381 if (error) { 5382 setf(fd, NULL); /* release fd entry */ 5383 mutex_enter(&stp->sd_lock); 5384 putback(stp, rdq, mp, mp->b_band); 5385 mutex_exit(&stp->sd_lock); 5386 return (error); 5387 } 5388 if (audit_active) { 5389 audit_fdrecv(fd, srf->fp); 5390 } 5391 5392 /* 5393 * Always increment f_count since the freemsg() below will 5394 * always call free_passfp() which performs a closef(). 5395 */ 5396 mutex_enter(&srf->fp->f_tlock); 5397 srf->fp->f_count++; 5398 mutex_exit(&srf->fp->f_tlock); 5399 setf(fd, srf->fp); 5400 freemsg(mp); 5401 return (0); 5402 } 5403 5404 case I_SWROPT: 5405 /* 5406 * Set/clear the write options. arg is a bit 5407 * mask with any of the following bits set... 5408 * SNDZERO - send zero length message 5409 * SNDPIPE - send sigpipe to process if 5410 * sd_werror is set and process is 5411 * doing a write or putmsg. 5412 * The new stream head write options should reflect 5413 * what is in arg. 5414 */ 5415 if (arg & ~(SNDZERO|SNDPIPE)) 5416 return (EINVAL); 5417 5418 mutex_enter(&stp->sd_lock); 5419 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO); 5420 if (arg & SNDZERO) 5421 stp->sd_wput_opt |= SW_SNDZERO; 5422 if (arg & SNDPIPE) 5423 stp->sd_wput_opt |= SW_SIGPIPE; 5424 mutex_exit(&stp->sd_lock); 5425 return (0); 5426 5427 case I_GWROPT: 5428 { 5429 int wropt = 0; 5430 5431 if (stp->sd_wput_opt & SW_SNDZERO) 5432 wropt |= SNDZERO; 5433 if (stp->sd_wput_opt & SW_SIGPIPE) 5434 wropt |= SNDPIPE; 5435 return (strcopyout(&wropt, (void *)arg, sizeof (wropt), 5436 copyflag)); 5437 } 5438 5439 case I_LIST: 5440 /* 5441 * Returns all the modules found on this stream, 5442 * upto the driver. If argument is NULL, return the 5443 * number of modules (including driver). If argument 5444 * is not NULL, copy the names into the structure 5445 * provided. 5446 */ 5447 5448 { 5449 queue_t *q; 5450 char *qname; 5451 int i, nmods; 5452 struct str_mlist *mlist; 5453 STRUCT_DECL(str_list, strlist); 5454 5455 if (arg == NULL) { /* Return number of modules plus driver */ 5456 if (stp->sd_vnode->v_type == VFIFO) 5457 *rvalp = stp->sd_pushcnt; 5458 else 5459 *rvalp = stp->sd_pushcnt + 1; 5460 return (0); 5461 } 5462 5463 STRUCT_INIT(strlist, flag); 5464 5465 error = strcopyin((void *)arg, STRUCT_BUF(strlist), 5466 STRUCT_SIZE(strlist), copyflag); 5467 if (error != 0) 5468 return (error); 5469 5470 mlist = STRUCT_FGETP(strlist, sl_modlist); 5471 nmods = STRUCT_FGET(strlist, sl_nmods); 5472 if (nmods <= 0) 5473 return (EINVAL); 5474 5475 claimstr(stp->sd_wrq); 5476 q = stp->sd_wrq; 5477 for (i = 0; i < nmods && _SAMESTR(q); i++, q = q->q_next) { 5478 qname = Q2NAME(q->q_next); 5479 error = strcopyout(qname, &mlist[i], strlen(qname) + 1, 5480 copyflag); 5481 if (error != 0) { 5482 releasestr(stp->sd_wrq); 5483 return (error); 5484 } 5485 } 5486 releasestr(stp->sd_wrq); 5487 return (strcopyout(&i, (void *)arg, sizeof (int), copyflag)); 5488 } 5489 5490 case I_CKBAND: 5491 { 5492 queue_t *q; 5493 qband_t *qbp; 5494 5495 if ((arg < 0) || (arg >= NBAND)) 5496 return (EINVAL); 5497 q = _RD(stp->sd_wrq); 5498 mutex_enter(QLOCK(q)); 5499 if (arg > (int)q->q_nband) { 5500 *rvalp = 0; 5501 } else { 5502 if (arg == 0) { 5503 if (q->q_first) 5504 *rvalp = 1; 5505 else 5506 *rvalp = 0; 5507 } else { 5508 qbp = q->q_bandp; 5509 while (--arg > 0) 5510 qbp = qbp->qb_next; 5511 if (qbp->qb_first) 5512 *rvalp = 1; 5513 else 5514 *rvalp = 0; 5515 } 5516 } 5517 mutex_exit(QLOCK(q)); 5518 return (0); 5519 } 5520 5521 case I_GETBAND: 5522 { 5523 int intpri; 5524 queue_t *q; 5525 5526 q = _RD(stp->sd_wrq); 5527 mutex_enter(QLOCK(q)); 5528 mp = q->q_first; 5529 if (!mp) { 5530 mutex_exit(QLOCK(q)); 5531 return (ENODATA); 5532 } 5533 intpri = (int)mp->b_band; 5534 error = strcopyout(&intpri, (void *)arg, sizeof (int), 5535 copyflag); 5536 mutex_exit(QLOCK(q)); 5537 return (error); 5538 } 5539 5540 case I_ATMARK: 5541 { 5542 queue_t *q; 5543 5544 if (arg & ~(ANYMARK|LASTMARK)) 5545 return (EINVAL); 5546 q = _RD(stp->sd_wrq); 5547 mutex_enter(&stp->sd_lock); 5548 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) { 5549 *rvalp = 1; 5550 } else { 5551 mutex_enter(QLOCK(q)); 5552 mp = q->q_first; 5553 5554 if (mp == NULL) 5555 *rvalp = 0; 5556 else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK)) 5557 *rvalp = 1; 5558 else if ((arg == LASTMARK) && (mp == stp->sd_mark)) 5559 *rvalp = 1; 5560 else 5561 *rvalp = 0; 5562 mutex_exit(QLOCK(q)); 5563 } 5564 mutex_exit(&stp->sd_lock); 5565 return (0); 5566 } 5567 5568 case I_CANPUT: 5569 { 5570 char band; 5571 5572 if ((arg < 0) || (arg >= NBAND)) 5573 return (EINVAL); 5574 band = (char)arg; 5575 *rvalp = bcanputnext(stp->sd_wrq, band); 5576 return (0); 5577 } 5578 5579 case I_SETCLTIME: 5580 { 5581 int closetime; 5582 5583 error = strcopyin((void *)arg, &closetime, sizeof (int), 5584 copyflag); 5585 if (error) 5586 return (error); 5587 if (closetime < 0) 5588 return (EINVAL); 5589 5590 stp->sd_closetime = closetime; 5591 return (0); 5592 } 5593 5594 case I_GETCLTIME: 5595 { 5596 int closetime; 5597 5598 closetime = stp->sd_closetime; 5599 return (strcopyout(&closetime, (void *)arg, sizeof (int), 5600 copyflag)); 5601 } 5602 5603 case TIOCGSID: 5604 { 5605 pid_t sid; 5606 5607 mutex_enter(&stp->sd_lock); 5608 if (stp->sd_sidp == NULL) { 5609 mutex_exit(&stp->sd_lock); 5610 return (ENOTTY); 5611 } 5612 sid = stp->sd_sidp->pid_id; 5613 mutex_exit(&stp->sd_lock); 5614 return (strcopyout(&sid, (void *)arg, sizeof (pid_t), 5615 copyflag)); 5616 } 5617 5618 case TIOCSPGRP: 5619 { 5620 pid_t pgrp; 5621 proc_t *q; 5622 pid_t sid, fg_pgid, bg_pgid; 5623 5624 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t), 5625 copyflag)) 5626 return (error); 5627 mutex_enter(&stp->sd_lock); 5628 mutex_enter(&pidlock); 5629 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) { 5630 mutex_exit(&pidlock); 5631 mutex_exit(&stp->sd_lock); 5632 return (ENOTTY); 5633 } 5634 if (pgrp == stp->sd_pgidp->pid_id) { 5635 mutex_exit(&pidlock); 5636 mutex_exit(&stp->sd_lock); 5637 return (0); 5638 } 5639 if (pgrp <= 0 || pgrp >= maxpid) { 5640 mutex_exit(&pidlock); 5641 mutex_exit(&stp->sd_lock); 5642 return (EINVAL); 5643 } 5644 if ((q = pgfind(pgrp)) == NULL || 5645 q->p_sessp != ttoproc(curthread)->p_sessp) { 5646 mutex_exit(&pidlock); 5647 mutex_exit(&stp->sd_lock); 5648 return (EPERM); 5649 } 5650 sid = stp->sd_sidp->pid_id; 5651 fg_pgid = q->p_pgrp; 5652 bg_pgid = stp->sd_pgidp->pid_id; 5653 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid); 5654 PID_RELE(stp->sd_pgidp); 5655 ctty_clear_sighuped(); 5656 stp->sd_pgidp = q->p_pgidp; 5657 PID_HOLD(stp->sd_pgidp); 5658 mutex_exit(&pidlock); 5659 mutex_exit(&stp->sd_lock); 5660 return (0); 5661 } 5662 5663 case TIOCGPGRP: 5664 { 5665 pid_t pgrp; 5666 5667 mutex_enter(&stp->sd_lock); 5668 if (stp->sd_sidp == NULL) { 5669 mutex_exit(&stp->sd_lock); 5670 return (ENOTTY); 5671 } 5672 pgrp = stp->sd_pgidp->pid_id; 5673 mutex_exit(&stp->sd_lock); 5674 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t), 5675 copyflag)); 5676 } 5677 5678 case TIOCSCTTY: 5679 { 5680 return (strctty(stp)); 5681 } 5682 5683 case TIOCNOTTY: 5684 { 5685 /* freectty() always assumes curproc. */ 5686 if (freectty(B_FALSE) != 0) 5687 return (0); 5688 return (ENOTTY); 5689 } 5690 5691 case FIONBIO: 5692 case FIOASYNC: 5693 return (0); /* handled by the upper layer */ 5694 } 5695 } 5696 5697 /* 5698 * Custom free routine used for M_PASSFP messages. 5699 */ 5700 static void 5701 free_passfp(struct k_strrecvfd *srf) 5702 { 5703 (void) closef(srf->fp); 5704 kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t)); 5705 } 5706 5707 /* ARGSUSED */ 5708 int 5709 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr) 5710 { 5711 queue_t *qp, *nextqp; 5712 struct k_strrecvfd *srf; 5713 mblk_t *mp; 5714 frtn_t *frtnp; 5715 size_t bufsize; 5716 queue_t *mate = NULL; 5717 syncq_t *sq = NULL; 5718 int retval = 0; 5719 5720 if (stp->sd_flag & STRHUP) 5721 return (ENXIO); 5722 5723 claimstr(stp->sd_wrq); 5724 5725 /* Fastpath, we have a pipe, and we are already mated, use it. */ 5726 if (STRMATED(stp)) { 5727 qp = _RD(stp->sd_mate->sd_wrq); 5728 claimstr(qp); 5729 mate = qp; 5730 } else { /* Not already mated. */ 5731 5732 /* 5733 * Walk the stream to the end of this one. 5734 * assumes that the claimstr() will prevent 5735 * plumbing between the stream head and the 5736 * driver from changing 5737 */ 5738 qp = stp->sd_wrq; 5739 5740 /* 5741 * Loop until we reach the end of this stream. 5742 * On completion, qp points to the write queue 5743 * at the end of the stream, or the read queue 5744 * at the stream head if this is a fifo. 5745 */ 5746 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp)) 5747 ; 5748 5749 /* 5750 * Just in case we get a q_next which is NULL, but 5751 * not at the end of the stream. This is actually 5752 * broken, so we set an assert to catch it in 5753 * debug, and set an error and return if not debug. 5754 */ 5755 ASSERT(qp); 5756 if (qp == NULL) { 5757 releasestr(stp->sd_wrq); 5758 return (EINVAL); 5759 } 5760 5761 /* 5762 * Enter the syncq for the driver, so (hopefully) 5763 * the queue values will not change on us. 5764 * XXXX - This will only prevent the race IFF only 5765 * the write side modifies the q_next member, and 5766 * the put procedure is protected by at least 5767 * MT_PERQ. 5768 */ 5769 if ((sq = qp->q_syncq) != NULL) 5770 entersq(sq, SQ_PUT); 5771 5772 /* Now get the q_next value from this qp. */ 5773 nextqp = qp->q_next; 5774 5775 /* 5776 * If nextqp exists and the other stream is different 5777 * from this one claim the stream, set the mate, and 5778 * get the read queue at the stream head of the other 5779 * stream. Assumes that nextqp was at least valid when 5780 * we got it. Hopefully the entersq of the driver 5781 * will prevent it from changing on us. 5782 */ 5783 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) { 5784 ASSERT(qp->q_qinfo->qi_srvp); 5785 ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp); 5786 ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp); 5787 claimstr(nextqp); 5788 5789 /* Make sure we still have a q_next */ 5790 if (nextqp != qp->q_next) { 5791 releasestr(stp->sd_wrq); 5792 releasestr(nextqp); 5793 return (EINVAL); 5794 } 5795 5796 qp = _RD(STREAM(nextqp)->sd_wrq); 5797 mate = qp; 5798 } 5799 /* If we entered the synq above, leave it. */ 5800 if (sq != NULL) 5801 leavesq(sq, SQ_PUT); 5802 } /* STRMATED(STP) */ 5803 5804 /* XXX prevents substitution of the ops vector */ 5805 if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) { 5806 retval = EINVAL; 5807 goto out; 5808 } 5809 5810 if (qp->q_flag & QFULL) { 5811 retval = EAGAIN; 5812 goto out; 5813 } 5814 5815 /* 5816 * Since M_PASSFP messages include a file descriptor, we use 5817 * esballoc() and specify a custom free routine (free_passfp()) that 5818 * will close the descriptor as part of freeing the message. For 5819 * convenience, we stash the frtn_t right after the data block. 5820 */ 5821 bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t); 5822 srf = kmem_alloc(bufsize, KM_NOSLEEP); 5823 if (srf == NULL) { 5824 retval = EAGAIN; 5825 goto out; 5826 } 5827 5828 frtnp = (frtn_t *)(srf + 1); 5829 frtnp->free_arg = (caddr_t)srf; 5830 frtnp->free_func = free_passfp; 5831 5832 mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp); 5833 if (mp == NULL) { 5834 kmem_free(srf, bufsize); 5835 retval = EAGAIN; 5836 goto out; 5837 } 5838 mp->b_wptr += sizeof (struct k_strrecvfd); 5839 mp->b_datap->db_type = M_PASSFP; 5840 5841 srf->fp = fp; 5842 srf->uid = crgetuid(curthread->t_cred); 5843 srf->gid = crgetgid(curthread->t_cred); 5844 mutex_enter(&fp->f_tlock); 5845 fp->f_count++; 5846 mutex_exit(&fp->f_tlock); 5847 5848 put(qp, mp); 5849 out: 5850 releasestr(stp->sd_wrq); 5851 if (mate) 5852 releasestr(mate); 5853 return (retval); 5854 } 5855 5856 /* 5857 * Send an ioctl message downstream and wait for acknowledgement. 5858 * flags may be set to either U_TO_K or K_TO_K and a combination 5859 * of STR_NOERROR or STR_NOSIG 5860 * STR_NOSIG: Signals are essentially ignored or held and have 5861 * no effect for the duration of the call. 5862 * STR_NOERROR: Ignores stream head read, write and hup errors. 5863 * Additionally, if an existing ioctl times out, it is assumed 5864 * lost and and this ioctl will continue as if the previous ioctl had 5865 * finished. ETIME may be returned if this ioctl times out (i.e. 5866 * ic_timout is not INFTIM). Non-stream head errors may be returned if 5867 * the ioc_error indicates that the driver/module had problems, 5868 * an EFAULT was found when accessing user data, a lack of 5869 * resources, etc. 5870 */ 5871 int 5872 strdoioctl( 5873 struct stdata *stp, 5874 struct strioctl *strioc, 5875 int fflags, /* file flags with model info */ 5876 int flag, 5877 cred_t *crp, 5878 int *rvalp) 5879 { 5880 mblk_t *bp; 5881 struct iocblk *iocbp; 5882 struct copyreq *reqp; 5883 struct copyresp *resp; 5884 int id; 5885 int transparent = 0; 5886 int error = 0; 5887 int len = 0; 5888 caddr_t taddr; 5889 int copyflag = (flag & (U_TO_K | K_TO_K)); 5890 int sigflag = (flag & STR_NOSIG); 5891 int errs; 5892 uint_t waitflags; 5893 5894 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 5895 ASSERT((fflags & FMODELS) != 0); 5896 5897 TRACE_2(TR_FAC_STREAMS_FR, 5898 TR_STRDOIOCTL, 5899 "strdoioctl:stp %p strioc %p", stp, strioc); 5900 if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */ 5901 transparent = 1; 5902 strioc->ic_len = sizeof (intptr_t); 5903 } 5904 5905 if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz)) 5906 return (EINVAL); 5907 5908 if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error, 5909 crp, curproc->p_pid)) == NULL) 5910 return (error); 5911 5912 bzero(bp->b_wptr, sizeof (union ioctypes)); 5913 5914 iocbp = (struct iocblk *)bp->b_wptr; 5915 iocbp->ioc_count = strioc->ic_len; 5916 iocbp->ioc_cmd = strioc->ic_cmd; 5917 iocbp->ioc_flag = (fflags & FMODELS); 5918 5919 crhold(crp); 5920 iocbp->ioc_cr = crp; 5921 DB_TYPE(bp) = M_IOCTL; 5922 bp->b_wptr += sizeof (struct iocblk); 5923 5924 if (flag & STR_NOERROR) 5925 errs = STPLEX; 5926 else 5927 errs = STRHUP|STRDERR|STWRERR|STPLEX; 5928 5929 /* 5930 * If there is data to copy into ioctl block, do so. 5931 */ 5932 if (iocbp->ioc_count > 0) { 5933 if (transparent) 5934 /* 5935 * Note: STR_NOERROR does not have an effect 5936 * in putiocd() 5937 */ 5938 id = K_TO_K | sigflag; 5939 else 5940 id = flag; 5941 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) { 5942 freemsg(bp); 5943 crfree(crp); 5944 return (error); 5945 } 5946 5947 /* 5948 * We could have slept copying in user pages. 5949 * Recheck the stream head state (the other end 5950 * of a pipe could have gone away). 5951 */ 5952 if (stp->sd_flag & errs) { 5953 mutex_enter(&stp->sd_lock); 5954 error = strgeterr(stp, errs, 0); 5955 mutex_exit(&stp->sd_lock); 5956 if (error != 0) { 5957 freemsg(bp); 5958 crfree(crp); 5959 return (error); 5960 } 5961 } 5962 } 5963 if (transparent) 5964 iocbp->ioc_count = TRANSPARENT; 5965 5966 /* 5967 * Block for up to STRTIMOUT milliseconds if there is an outstanding 5968 * ioctl for this stream already running. All processes 5969 * sleeping here will be awakened as a result of an ACK 5970 * or NAK being received for the outstanding ioctl, or 5971 * as a result of the timer expiring on the outstanding 5972 * ioctl (a failure), or as a result of any waiting 5973 * process's timer expiring (also a failure). 5974 */ 5975 5976 error = 0; 5977 mutex_enter(&stp->sd_lock); 5978 while (stp->sd_flag & (IOCWAIT | IOCWAITNE)) { 5979 clock_t cv_rval; 5980 5981 TRACE_0(TR_FAC_STREAMS_FR, 5982 TR_STRDOIOCTL_WAIT, 5983 "strdoioctl sleeps - IOCWAIT"); 5984 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock, 5985 STRTIMOUT, sigflag); 5986 if (cv_rval <= 0) { 5987 if (cv_rval == 0) { 5988 error = EINTR; 5989 } else { 5990 if (flag & STR_NOERROR) { 5991 /* 5992 * Terminating current ioctl in 5993 * progress -- assume it got lost and 5994 * wake up the other thread so that the 5995 * operation completes. 5996 */ 5997 if (!(stp->sd_flag & IOCWAITNE)) { 5998 stp->sd_flag |= IOCWAITNE; 5999 cv_broadcast(&stp->sd_monitor); 6000 } 6001 /* 6002 * Otherwise, there's a running 6003 * STR_NOERROR -- we have no choice 6004 * here but to wait forever (or until 6005 * interrupted). 6006 */ 6007 } else { 6008 /* 6009 * pending ioctl has caused 6010 * us to time out 6011 */ 6012 error = ETIME; 6013 } 6014 } 6015 } else if ((stp->sd_flag & errs)) { 6016 error = strgeterr(stp, errs, 0); 6017 } 6018 if (error) { 6019 mutex_exit(&stp->sd_lock); 6020 freemsg(bp); 6021 crfree(crp); 6022 return (error); 6023 } 6024 } 6025 6026 /* 6027 * Have control of ioctl mechanism. 6028 * Send down ioctl packet and wait for response. 6029 */ 6030 if (stp->sd_iocblk != (mblk_t *)-1) { 6031 freemsg(stp->sd_iocblk); 6032 } 6033 stp->sd_iocblk = NULL; 6034 6035 /* 6036 * If this is marked with 'noerror' (internal; mostly 6037 * I_{P,}{UN,}LINK), then make sure nobody else is able to get 6038 * in here by setting IOCWAITNE. 6039 */ 6040 waitflags = IOCWAIT; 6041 if (flag & STR_NOERROR) 6042 waitflags |= IOCWAITNE; 6043 6044 stp->sd_flag |= waitflags; 6045 6046 /* 6047 * Assign sequence number. 6048 */ 6049 iocbp->ioc_id = stp->sd_iocid = getiocseqno(); 6050 6051 mutex_exit(&stp->sd_lock); 6052 6053 TRACE_1(TR_FAC_STREAMS_FR, 6054 TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp); 6055 stream_willservice(stp); 6056 putnext(stp->sd_wrq, bp); 6057 stream_runservice(stp); 6058 6059 /* 6060 * Timed wait for acknowledgment. The wait time is limited by the 6061 * timeout value, which must be a positive integer (number of 6062 * milliseconds) to wait, or 0 (use default value of STRTIMOUT 6063 * milliseconds), or -1 (wait forever). This will be awakened 6064 * either by an ACK/NAK message arriving, the timer expiring, or 6065 * the timer expiring on another ioctl waiting for control of the 6066 * mechanism. 6067 */ 6068 waitioc: 6069 mutex_enter(&stp->sd_lock); 6070 6071 6072 /* 6073 * If the reply has already arrived, don't sleep. If awakened from 6074 * the sleep, fail only if the reply has not arrived by then. 6075 * Otherwise, process the reply. 6076 */ 6077 while (!stp->sd_iocblk) { 6078 clock_t cv_rval; 6079 6080 if (stp->sd_flag & errs) { 6081 error = strgeterr(stp, errs, 0); 6082 if (error != 0) { 6083 stp->sd_flag &= ~waitflags; 6084 cv_broadcast(&stp->sd_iocmonitor); 6085 mutex_exit(&stp->sd_lock); 6086 crfree(crp); 6087 return (error); 6088 } 6089 } 6090 6091 TRACE_0(TR_FAC_STREAMS_FR, 6092 TR_STRDOIOCTL_WAIT2, 6093 "strdoioctl sleeps awaiting reply"); 6094 ASSERT(error == 0); 6095 6096 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, 6097 (strioc->ic_timout ? 6098 strioc->ic_timout * 1000 : STRTIMOUT), sigflag); 6099 6100 /* 6101 * There are four possible cases here: interrupt, timeout, 6102 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a 6103 * valid M_IOCTL reply). 6104 * 6105 * If we've been awakened by a STR_NOERROR ioctl on some other 6106 * thread, then sd_iocblk will still be NULL, and IOCWAITNE 6107 * will be set. Pretend as if we just timed out. Note that 6108 * this other thread waited at least STRTIMOUT before trying to 6109 * awaken our thread, so this is indistinguishable (even for 6110 * INFTIM) from the case where we failed with ETIME waiting on 6111 * IOCWAIT in the prior loop. 6112 */ 6113 if (cv_rval > 0 && !(flag & STR_NOERROR) && 6114 stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) { 6115 cv_rval = -1; 6116 } 6117 6118 /* 6119 * note: STR_NOERROR does not protect 6120 * us here.. use ic_timout < 0 6121 */ 6122 if (cv_rval <= 0) { 6123 if (cv_rval == 0) { 6124 error = EINTR; 6125 } else { 6126 error = ETIME; 6127 } 6128 /* 6129 * A message could have come in after we were scheduled 6130 * but before we were actually run. 6131 */ 6132 bp = stp->sd_iocblk; 6133 stp->sd_iocblk = NULL; 6134 if (bp != NULL) { 6135 if ((bp->b_datap->db_type == M_COPYIN) || 6136 (bp->b_datap->db_type == M_COPYOUT)) { 6137 mutex_exit(&stp->sd_lock); 6138 if (bp->b_cont) { 6139 freemsg(bp->b_cont); 6140 bp->b_cont = NULL; 6141 } 6142 bp->b_datap->db_type = M_IOCDATA; 6143 bp->b_wptr = bp->b_rptr + 6144 sizeof (struct copyresp); 6145 resp = (struct copyresp *)bp->b_rptr; 6146 resp->cp_rval = 6147 (caddr_t)1; /* failure */ 6148 stream_willservice(stp); 6149 putnext(stp->sd_wrq, bp); 6150 stream_runservice(stp); 6151 mutex_enter(&stp->sd_lock); 6152 } else { 6153 freemsg(bp); 6154 } 6155 } 6156 stp->sd_flag &= ~waitflags; 6157 cv_broadcast(&stp->sd_iocmonitor); 6158 mutex_exit(&stp->sd_lock); 6159 crfree(crp); 6160 return (error); 6161 } 6162 } 6163 bp = stp->sd_iocblk; 6164 /* 6165 * Note: it is strictly impossible to get here with sd_iocblk set to 6166 * -1. This is because the initial loop above doesn't allow any new 6167 * ioctls into the fray until all others have passed this point. 6168 */ 6169 ASSERT(bp != NULL && bp != (mblk_t *)-1); 6170 TRACE_1(TR_FAC_STREAMS_FR, 6171 TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp); 6172 if ((bp->b_datap->db_type == M_IOCACK) || 6173 (bp->b_datap->db_type == M_IOCNAK)) { 6174 /* for detection of duplicate ioctl replies */ 6175 stp->sd_iocblk = (mblk_t *)-1; 6176 stp->sd_flag &= ~waitflags; 6177 cv_broadcast(&stp->sd_iocmonitor); 6178 mutex_exit(&stp->sd_lock); 6179 } else { 6180 /* 6181 * flags not cleared here because we're still doing 6182 * copy in/out for ioctl. 6183 */ 6184 stp->sd_iocblk = NULL; 6185 mutex_exit(&stp->sd_lock); 6186 } 6187 6188 6189 /* 6190 * Have received acknowledgment. 6191 */ 6192 6193 switch (bp->b_datap->db_type) { 6194 case M_IOCACK: 6195 /* 6196 * Positive ack. 6197 */ 6198 iocbp = (struct iocblk *)bp->b_rptr; 6199 6200 /* 6201 * Set error if indicated. 6202 */ 6203 if (iocbp->ioc_error) { 6204 error = iocbp->ioc_error; 6205 break; 6206 } 6207 6208 /* 6209 * Set return value. 6210 */ 6211 *rvalp = iocbp->ioc_rval; 6212 6213 /* 6214 * Data may have been returned in ACK message (ioc_count > 0). 6215 * If so, copy it out to the user's buffer. 6216 */ 6217 if (iocbp->ioc_count && !transparent) { 6218 if (error = getiocd(bp, strioc->ic_dp, copyflag)) 6219 break; 6220 } 6221 if (!transparent) { 6222 if (len) /* an M_COPYOUT was used with I_STR */ 6223 strioc->ic_len = len; 6224 else 6225 strioc->ic_len = (int)iocbp->ioc_count; 6226 } 6227 break; 6228 6229 case M_IOCNAK: 6230 /* 6231 * Negative ack. 6232 * 6233 * The only thing to do is set error as specified 6234 * in neg ack packet. 6235 */ 6236 iocbp = (struct iocblk *)bp->b_rptr; 6237 6238 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL); 6239 break; 6240 6241 case M_COPYIN: 6242 /* 6243 * Driver or module has requested user ioctl data. 6244 */ 6245 reqp = (struct copyreq *)bp->b_rptr; 6246 6247 /* 6248 * M_COPYIN should *never* have a message attached, though 6249 * it's harmless if it does -- thus, panic on a DEBUG 6250 * kernel and just free it on a non-DEBUG build. 6251 */ 6252 ASSERT(bp->b_cont == NULL); 6253 if (bp->b_cont != NULL) { 6254 freemsg(bp->b_cont); 6255 bp->b_cont = NULL; 6256 } 6257 6258 error = putiocd(bp, reqp->cq_addr, flag, crp); 6259 if (error && bp->b_cont) { 6260 freemsg(bp->b_cont); 6261 bp->b_cont = NULL; 6262 } 6263 6264 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6265 bp->b_datap->db_type = M_IOCDATA; 6266 6267 mblk_setcred(bp, crp, curproc->p_pid); 6268 resp = (struct copyresp *)bp->b_rptr; 6269 resp->cp_rval = (caddr_t)(uintptr_t)error; 6270 resp->cp_flag = (fflags & FMODELS); 6271 6272 stream_willservice(stp); 6273 putnext(stp->sd_wrq, bp); 6274 stream_runservice(stp); 6275 6276 if (error) { 6277 mutex_enter(&stp->sd_lock); 6278 stp->sd_flag &= ~waitflags; 6279 cv_broadcast(&stp->sd_iocmonitor); 6280 mutex_exit(&stp->sd_lock); 6281 crfree(crp); 6282 return (error); 6283 } 6284 6285 goto waitioc; 6286 6287 case M_COPYOUT: 6288 /* 6289 * Driver or module has ioctl data for a user. 6290 */ 6291 reqp = (struct copyreq *)bp->b_rptr; 6292 ASSERT(bp->b_cont != NULL); 6293 6294 /* 6295 * Always (transparent or non-transparent ) 6296 * use the address specified in the request 6297 */ 6298 taddr = reqp->cq_addr; 6299 if (!transparent) 6300 len = (int)reqp->cq_size; 6301 6302 /* copyout data to the provided address */ 6303 error = getiocd(bp, taddr, copyflag); 6304 6305 freemsg(bp->b_cont); 6306 bp->b_cont = NULL; 6307 6308 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6309 bp->b_datap->db_type = M_IOCDATA; 6310 6311 mblk_setcred(bp, crp, curproc->p_pid); 6312 resp = (struct copyresp *)bp->b_rptr; 6313 resp->cp_rval = (caddr_t)(uintptr_t)error; 6314 resp->cp_flag = (fflags & FMODELS); 6315 6316 stream_willservice(stp); 6317 putnext(stp->sd_wrq, bp); 6318 stream_runservice(stp); 6319 6320 if (error) { 6321 mutex_enter(&stp->sd_lock); 6322 stp->sd_flag &= ~waitflags; 6323 cv_broadcast(&stp->sd_iocmonitor); 6324 mutex_exit(&stp->sd_lock); 6325 crfree(crp); 6326 return (error); 6327 } 6328 goto waitioc; 6329 6330 default: 6331 ASSERT(0); 6332 mutex_enter(&stp->sd_lock); 6333 stp->sd_flag &= ~waitflags; 6334 cv_broadcast(&stp->sd_iocmonitor); 6335 mutex_exit(&stp->sd_lock); 6336 break; 6337 } 6338 6339 freemsg(bp); 6340 crfree(crp); 6341 return (error); 6342 } 6343 6344 /* 6345 * Send an M_CMD message downstream and wait for a reply. This is a ptools 6346 * special used to retrieve information from modules/drivers a stream without 6347 * being subjected to flow control or interfering with pending messages on the 6348 * stream (e.g. an ioctl in flight). 6349 */ 6350 int 6351 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp) 6352 { 6353 mblk_t *mp; 6354 struct cmdblk *cmdp; 6355 int error = 0; 6356 int errs = STRHUP|STRDERR|STWRERR|STPLEX; 6357 clock_t rval, timeout = STRTIMOUT; 6358 6359 if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) || 6360 scp->sc_timeout < -1) 6361 return (EINVAL); 6362 6363 if (scp->sc_timeout > 0) 6364 timeout = scp->sc_timeout * MILLISEC; 6365 6366 if ((mp = allocb_cred(sizeof (struct cmdblk), crp, 6367 curproc->p_pid)) == NULL) 6368 return (ENOMEM); 6369 6370 crhold(crp); 6371 6372 cmdp = (struct cmdblk *)mp->b_wptr; 6373 cmdp->cb_cr = crp; 6374 cmdp->cb_cmd = scp->sc_cmd; 6375 cmdp->cb_len = scp->sc_len; 6376 cmdp->cb_error = 0; 6377 mp->b_wptr += sizeof (struct cmdblk); 6378 6379 DB_TYPE(mp) = M_CMD; 6380 DB_CPID(mp) = curproc->p_pid; 6381 6382 /* 6383 * Copy in the payload. 6384 */ 6385 if (cmdp->cb_len > 0) { 6386 mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp, 6387 curproc->p_pid); 6388 if (mp->b_cont == NULL) { 6389 error = ENOMEM; 6390 goto out; 6391 } 6392 6393 /* cb_len comes from sc_len, which has already been checked */ 6394 ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf)); 6395 (void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len); 6396 mp->b_cont->b_wptr += cmdp->cb_len; 6397 DB_CPID(mp->b_cont) = curproc->p_pid; 6398 } 6399 6400 /* 6401 * Since this mechanism is strictly for ptools, and since only one 6402 * process can be grabbed at a time, we simply fail if there's 6403 * currently an operation pending. 6404 */ 6405 mutex_enter(&stp->sd_lock); 6406 if (stp->sd_flag & STRCMDWAIT) { 6407 mutex_exit(&stp->sd_lock); 6408 error = EBUSY; 6409 goto out; 6410 } 6411 stp->sd_flag |= STRCMDWAIT; 6412 ASSERT(stp->sd_cmdblk == NULL); 6413 mutex_exit(&stp->sd_lock); 6414 6415 putnext(stp->sd_wrq, mp); 6416 mp = NULL; 6417 6418 /* 6419 * Timed wait for acknowledgment. If the reply has already arrived, 6420 * don't sleep. If awakened from the sleep, fail only if the reply 6421 * has not arrived by then. Otherwise, process the reply. 6422 */ 6423 mutex_enter(&stp->sd_lock); 6424 while (stp->sd_cmdblk == NULL) { 6425 if (stp->sd_flag & errs) { 6426 if ((error = strgeterr(stp, errs, 0)) != 0) 6427 goto waitout; 6428 } 6429 6430 rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0); 6431 if (stp->sd_cmdblk != NULL) 6432 break; 6433 6434 if (rval <= 0) { 6435 error = (rval == 0) ? EINTR : ETIME; 6436 goto waitout; 6437 } 6438 } 6439 6440 /* 6441 * We received a reply. 6442 */ 6443 mp = stp->sd_cmdblk; 6444 stp->sd_cmdblk = NULL; 6445 ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD); 6446 ASSERT(stp->sd_flag & STRCMDWAIT); 6447 stp->sd_flag &= ~STRCMDWAIT; 6448 mutex_exit(&stp->sd_lock); 6449 6450 cmdp = (struct cmdblk *)mp->b_rptr; 6451 if ((error = cmdp->cb_error) != 0) 6452 goto out; 6453 6454 /* 6455 * Data may have been returned in the reply (cb_len > 0). 6456 * If so, copy it out to the user's buffer. 6457 */ 6458 if (cmdp->cb_len > 0) { 6459 if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) { 6460 error = EPROTO; 6461 goto out; 6462 } 6463 6464 cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf)); 6465 (void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len); 6466 } 6467 scp->sc_len = cmdp->cb_len; 6468 out: 6469 freemsg(mp); 6470 crfree(crp); 6471 return (error); 6472 waitout: 6473 ASSERT(stp->sd_cmdblk == NULL); 6474 stp->sd_flag &= ~STRCMDWAIT; 6475 mutex_exit(&stp->sd_lock); 6476 crfree(crp); 6477 return (error); 6478 } 6479 6480 /* 6481 * For the SunOS keyboard driver. 6482 * Return the next available "ioctl" sequence number. 6483 * Exported, so that streams modules can send "ioctl" messages 6484 * downstream from their open routine. 6485 */ 6486 int 6487 getiocseqno(void) 6488 { 6489 int i; 6490 6491 mutex_enter(&strresources); 6492 i = ++ioc_id; 6493 mutex_exit(&strresources); 6494 return (i); 6495 } 6496 6497 /* 6498 * Get the next message from the read queue. If the message is 6499 * priority, STRPRI will have been set by strrput(). This flag 6500 * should be reset only when the entire message at the front of the 6501 * queue as been consumed. 6502 * 6503 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 6504 */ 6505 int 6506 strgetmsg( 6507 struct vnode *vp, 6508 struct strbuf *mctl, 6509 struct strbuf *mdata, 6510 unsigned char *prip, 6511 int *flagsp, 6512 int fmode, 6513 rval_t *rvp) 6514 { 6515 struct stdata *stp; 6516 mblk_t *bp, *nbp; 6517 mblk_t *savemp = NULL; 6518 mblk_t *savemptail = NULL; 6519 uint_t old_sd_flag; 6520 int flg; 6521 int more = 0; 6522 int error = 0; 6523 char first = 1; 6524 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 6525 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 6526 unsigned char pri = 0; 6527 queue_t *q; 6528 int pr = 0; /* Partial read successful */ 6529 struct uio uios; 6530 struct uio *uiop = &uios; 6531 struct iovec iovs; 6532 unsigned char type; 6533 6534 TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER, 6535 "strgetmsg:%p", vp); 6536 6537 ASSERT(vp->v_stream); 6538 stp = vp->v_stream; 6539 rvp->r_val1 = 0; 6540 6541 mutex_enter(&stp->sd_lock); 6542 6543 if ((error = i_straccess(stp, JCREAD)) != 0) { 6544 mutex_exit(&stp->sd_lock); 6545 return (error); 6546 } 6547 6548 if (stp->sd_flag & (STRDERR|STPLEX)) { 6549 error = strgeterr(stp, STRDERR|STPLEX, 0); 6550 if (error != 0) { 6551 mutex_exit(&stp->sd_lock); 6552 return (error); 6553 } 6554 } 6555 mutex_exit(&stp->sd_lock); 6556 6557 switch (*flagsp) { 6558 case MSG_HIPRI: 6559 if (*prip != 0) 6560 return (EINVAL); 6561 break; 6562 6563 case MSG_ANY: 6564 case MSG_BAND: 6565 break; 6566 6567 default: 6568 return (EINVAL); 6569 } 6570 /* 6571 * Setup uio and iov for data part 6572 */ 6573 iovs.iov_base = mdata->buf; 6574 iovs.iov_len = mdata->maxlen; 6575 uios.uio_iov = &iovs; 6576 uios.uio_iovcnt = 1; 6577 uios.uio_loffset = 0; 6578 uios.uio_segflg = UIO_USERSPACE; 6579 uios.uio_fmode = 0; 6580 uios.uio_extflg = UIO_COPY_CACHED; 6581 uios.uio_resid = mdata->maxlen; 6582 uios.uio_offset = 0; 6583 6584 q = _RD(stp->sd_wrq); 6585 mutex_enter(&stp->sd_lock); 6586 old_sd_flag = stp->sd_flag; 6587 mark = 0; 6588 for (;;) { 6589 int done = 0; 6590 mblk_t *q_first = q->q_first; 6591 6592 /* 6593 * Get the next message of appropriate priority 6594 * from the stream head. If the caller is interested 6595 * in band or hipri messages, then they should already 6596 * be enqueued at the stream head. On the other hand 6597 * if the caller wants normal (band 0) messages, they 6598 * might be deferred in a synchronous stream and they 6599 * will need to be pulled up. 6600 * 6601 * After we have dequeued a message, we might find that 6602 * it was a deferred M_SIG that was enqueued at the 6603 * stream head. It must now be posted as part of the 6604 * read by calling strsignal_nolock(). 6605 * 6606 * Also note that strrput does not enqueue an M_PCSIG, 6607 * and there cannot be more than one hipri message, 6608 * so there was no need to have the M_PCSIG case. 6609 * 6610 * At some time it might be nice to try and wrap the 6611 * functionality of kstrgetmsg() and strgetmsg() into 6612 * a common routine so to reduce the amount of replicated 6613 * code (since they are extremely similar). 6614 */ 6615 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) { 6616 /* Asking for normal, band0 data */ 6617 bp = strget(stp, q, uiop, first, &error); 6618 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6619 if (bp != NULL) { 6620 if (DB_TYPE(bp) == M_SIG) { 6621 strsignal_nolock(stp, *bp->b_rptr, 6622 bp->b_band); 6623 freemsg(bp); 6624 continue; 6625 } else { 6626 break; 6627 } 6628 } 6629 if (error != 0) 6630 goto getmout; 6631 6632 /* 6633 * We can't depend on the value of STRPRI here because 6634 * the stream head may be in transit. Therefore, we 6635 * must look at the type of the first message to 6636 * determine if a high priority messages is waiting 6637 */ 6638 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL && 6639 DB_TYPE(q_first) >= QPCTL && 6640 (bp = getq_noenab(q, 0)) != NULL) { 6641 /* Asked for HIPRI and got one */ 6642 ASSERT(DB_TYPE(bp) >= QPCTL); 6643 break; 6644 } else if ((*flagsp & MSG_BAND) && q_first != NULL && 6645 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) && 6646 (bp = getq_noenab(q, 0)) != NULL) { 6647 /* 6648 * Asked for at least band "prip" and got either at 6649 * least that band or a hipri message. 6650 */ 6651 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL); 6652 if (DB_TYPE(bp) == M_SIG) { 6653 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 6654 freemsg(bp); 6655 continue; 6656 } else { 6657 break; 6658 } 6659 } 6660 6661 /* No data. Time to sleep? */ 6662 qbackenable(q, 0); 6663 6664 /* 6665 * If STRHUP or STREOF, return 0 length control and data. 6666 * If resid is 0, then a read(fd,buf,0) was done. Do not 6667 * sleep to satisfy this request because by default we have 6668 * zero bytes to return. 6669 */ 6670 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 && 6671 mdata->maxlen == 0)) { 6672 mctl->len = mdata->len = 0; 6673 *flagsp = 0; 6674 mutex_exit(&stp->sd_lock); 6675 return (0); 6676 } 6677 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT, 6678 "strgetmsg calls strwaitq:%p, %p", 6679 vp, uiop); 6680 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1, 6681 &done)) != 0) || done) { 6682 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE, 6683 "strgetmsg error or done:%p, %p", 6684 vp, uiop); 6685 mutex_exit(&stp->sd_lock); 6686 return (error); 6687 } 6688 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE, 6689 "strgetmsg awakes:%p, %p", vp, uiop); 6690 if ((error = i_straccess(stp, JCREAD)) != 0) { 6691 mutex_exit(&stp->sd_lock); 6692 return (error); 6693 } 6694 first = 0; 6695 } 6696 ASSERT(bp != NULL); 6697 /* 6698 * Extract any mark information. If the message is not completely 6699 * consumed this information will be put in the mblk 6700 * that is putback. 6701 * If MSGMARKNEXT is set and the message is completely consumed 6702 * the STRATMARK flag will be set below. Likewise, if 6703 * MSGNOTMARKNEXT is set and the message is 6704 * completely consumed STRNOTATMARK will be set. 6705 */ 6706 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 6707 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 6708 (MSGMARKNEXT|MSGNOTMARKNEXT)); 6709 if (mark != 0 && bp == stp->sd_mark) { 6710 mark |= _LASTMARK; 6711 stp->sd_mark = NULL; 6712 } 6713 /* 6714 * keep track of the original message type and priority 6715 */ 6716 pri = bp->b_band; 6717 type = bp->b_datap->db_type; 6718 if (type == M_PASSFP) { 6719 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 6720 stp->sd_mark = bp; 6721 bp->b_flag |= mark & ~_LASTMARK; 6722 putback(stp, q, bp, pri); 6723 qbackenable(q, pri); 6724 mutex_exit(&stp->sd_lock); 6725 return (EBADMSG); 6726 } 6727 ASSERT(type != M_SIG); 6728 6729 /* 6730 * Set this flag so strrput will not generate signals. Need to 6731 * make sure this flag is cleared before leaving this routine 6732 * else signals will stop being sent. 6733 */ 6734 stp->sd_flag |= STRGETINPROG; 6735 mutex_exit(&stp->sd_lock); 6736 6737 if (STREAM_NEEDSERVICE(stp)) 6738 stream_runservice(stp); 6739 6740 /* 6741 * Set HIPRI flag if message is priority. 6742 */ 6743 if (type >= QPCTL) 6744 flg = MSG_HIPRI; 6745 else 6746 flg = MSG_BAND; 6747 6748 /* 6749 * First process PROTO or PCPROTO blocks, if any. 6750 */ 6751 if (mctl->maxlen >= 0 && type != M_DATA) { 6752 size_t n, bcnt; 6753 char *ubuf; 6754 6755 bcnt = mctl->maxlen; 6756 ubuf = mctl->buf; 6757 while (bp != NULL && bp->b_datap->db_type != M_DATA) { 6758 if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 && 6759 copyout(bp->b_rptr, ubuf, n)) { 6760 error = EFAULT; 6761 mutex_enter(&stp->sd_lock); 6762 /* 6763 * clear stream head pri flag based on 6764 * first message type 6765 */ 6766 if (type >= QPCTL) { 6767 ASSERT(type == M_PCPROTO); 6768 stp->sd_flag &= ~STRPRI; 6769 } 6770 more = 0; 6771 freemsg(bp); 6772 goto getmout; 6773 } 6774 ubuf += n; 6775 bp->b_rptr += n; 6776 if (bp->b_rptr >= bp->b_wptr) { 6777 nbp = bp; 6778 bp = bp->b_cont; 6779 freeb(nbp); 6780 } 6781 ASSERT(n <= bcnt); 6782 bcnt -= n; 6783 if (bcnt == 0) 6784 break; 6785 } 6786 mctl->len = mctl->maxlen - bcnt; 6787 } else 6788 mctl->len = -1; 6789 6790 if (bp && bp->b_datap->db_type != M_DATA) { 6791 /* 6792 * More PROTO blocks in msg. 6793 */ 6794 more |= MORECTL; 6795 savemp = bp; 6796 while (bp && bp->b_datap->db_type != M_DATA) { 6797 savemptail = bp; 6798 bp = bp->b_cont; 6799 } 6800 savemptail->b_cont = NULL; 6801 } 6802 6803 /* 6804 * Now process DATA blocks, if any. 6805 */ 6806 if (mdata->maxlen >= 0 && bp) { 6807 /* 6808 * struiocopyout will consume a potential zero-length 6809 * M_DATA even if uio_resid is zero. 6810 */ 6811 size_t oldresid = uiop->uio_resid; 6812 6813 bp = struiocopyout(bp, uiop, &error); 6814 if (error != 0) { 6815 mutex_enter(&stp->sd_lock); 6816 /* 6817 * clear stream head hi pri flag based on 6818 * first message 6819 */ 6820 if (type >= QPCTL) { 6821 ASSERT(type == M_PCPROTO); 6822 stp->sd_flag &= ~STRPRI; 6823 } 6824 more = 0; 6825 freemsg(savemp); 6826 goto getmout; 6827 } 6828 /* 6829 * (pr == 1) indicates a partial read. 6830 */ 6831 if (oldresid > uiop->uio_resid) 6832 pr = 1; 6833 mdata->len = mdata->maxlen - uiop->uio_resid; 6834 } else 6835 mdata->len = -1; 6836 6837 if (bp) { /* more data blocks in msg */ 6838 more |= MOREDATA; 6839 if (savemp) 6840 savemptail->b_cont = bp; 6841 else 6842 savemp = bp; 6843 } 6844 6845 mutex_enter(&stp->sd_lock); 6846 if (savemp) { 6847 if (pr && (savemp->b_datap->db_type == M_DATA) && 6848 msgnodata(savemp)) { 6849 /* 6850 * Avoid queuing a zero-length tail part of 6851 * a message. pr=1 indicates that we read some of 6852 * the message. 6853 */ 6854 freemsg(savemp); 6855 more &= ~MOREDATA; 6856 /* 6857 * clear stream head hi pri flag based on 6858 * first message 6859 */ 6860 if (type >= QPCTL) { 6861 ASSERT(type == M_PCPROTO); 6862 stp->sd_flag &= ~STRPRI; 6863 } 6864 } else { 6865 savemp->b_band = pri; 6866 /* 6867 * If the first message was HIPRI and the one we're 6868 * putting back isn't, then clear STRPRI, otherwise 6869 * set STRPRI again. Note that we must set STRPRI 6870 * again since the flush logic in strrput_nondata() 6871 * may have cleared it while we had sd_lock dropped. 6872 */ 6873 if (type >= QPCTL) { 6874 ASSERT(type == M_PCPROTO); 6875 if (queclass(savemp) < QPCTL) 6876 stp->sd_flag &= ~STRPRI; 6877 else 6878 stp->sd_flag |= STRPRI; 6879 } else if (queclass(savemp) >= QPCTL) { 6880 /* 6881 * The first message was not a HIPRI message, 6882 * but the one we are about to putback is. 6883 * For simplicitly, we do not allow for HIPRI 6884 * messages to be embedded in the message 6885 * body, so just force it to same type as 6886 * first message. 6887 */ 6888 ASSERT(type == M_DATA || type == M_PROTO); 6889 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 6890 savemp->b_datap->db_type = type; 6891 } 6892 if (mark != 0) { 6893 savemp->b_flag |= mark & ~_LASTMARK; 6894 if ((mark & _LASTMARK) && 6895 (stp->sd_mark == NULL)) { 6896 /* 6897 * If another marked message arrived 6898 * while sd_lock was not held sd_mark 6899 * would be non-NULL. 6900 */ 6901 stp->sd_mark = savemp; 6902 } 6903 } 6904 putback(stp, q, savemp, pri); 6905 } 6906 } else { 6907 /* 6908 * The complete message was consumed. 6909 * 6910 * If another M_PCPROTO arrived while sd_lock was not held 6911 * it would have been discarded since STRPRI was still set. 6912 * 6913 * Move the MSG*MARKNEXT information 6914 * to the stream head just in case 6915 * the read queue becomes empty. 6916 * clear stream head hi pri flag based on 6917 * first message 6918 * 6919 * If the stream head was at the mark 6920 * (STRATMARK) before we dropped sd_lock above 6921 * and some data was consumed then we have 6922 * moved past the mark thus STRATMARK is 6923 * cleared. However, if a message arrived in 6924 * strrput during the copyout above causing 6925 * STRATMARK to be set we can not clear that 6926 * flag. 6927 */ 6928 if (type >= QPCTL) { 6929 ASSERT(type == M_PCPROTO); 6930 stp->sd_flag &= ~STRPRI; 6931 } 6932 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 6933 if (mark & MSGMARKNEXT) { 6934 stp->sd_flag &= ~STRNOTATMARK; 6935 stp->sd_flag |= STRATMARK; 6936 } else if (mark & MSGNOTMARKNEXT) { 6937 stp->sd_flag &= ~STRATMARK; 6938 stp->sd_flag |= STRNOTATMARK; 6939 } else { 6940 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 6941 } 6942 } else if (pr && (old_sd_flag & STRATMARK)) { 6943 stp->sd_flag &= ~STRATMARK; 6944 } 6945 } 6946 6947 *flagsp = flg; 6948 *prip = pri; 6949 6950 /* 6951 * Getmsg cleanup processing - if the state of the queue has changed 6952 * some signals may need to be sent and/or poll awakened. 6953 */ 6954 getmout: 6955 qbackenable(q, pri); 6956 6957 /* 6958 * We dropped the stream head lock above. Send all M_SIG messages 6959 * before processing stream head for SIGPOLL messages. 6960 */ 6961 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6962 while ((bp = q->q_first) != NULL && 6963 (bp->b_datap->db_type == M_SIG)) { 6964 /* 6965 * sd_lock is held so the content of the read queue can not 6966 * change. 6967 */ 6968 bp = getq(q); 6969 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 6970 6971 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 6972 mutex_exit(&stp->sd_lock); 6973 freemsg(bp); 6974 if (STREAM_NEEDSERVICE(stp)) 6975 stream_runservice(stp); 6976 mutex_enter(&stp->sd_lock); 6977 } 6978 6979 /* 6980 * stream head cannot change while we make the determination 6981 * whether or not to send a signal. Drop the flag to allow strrput 6982 * to send firstmsgsigs again. 6983 */ 6984 stp->sd_flag &= ~STRGETINPROG; 6985 6986 /* 6987 * If the type of message at the front of the queue changed 6988 * due to the receive the appropriate signals and pollwakeup events 6989 * are generated. The type of changes are: 6990 * Processed a hipri message, q_first is not hipri. 6991 * Processed a band X message, and q_first is band Y. 6992 * The generated signals and pollwakeups are identical to what 6993 * strrput() generates should the message that is now on q_first 6994 * arrive to an empty read queue. 6995 * 6996 * Note: only strrput will send a signal for a hipri message. 6997 */ 6998 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 6999 strsigset_t signals = 0; 7000 strpollset_t pollwakeups = 0; 7001 7002 if (flg & MSG_HIPRI) { 7003 /* 7004 * Removed a hipri message. Regular data at 7005 * the front of the queue. 7006 */ 7007 if (bp->b_band == 0) { 7008 signals = S_INPUT | S_RDNORM; 7009 pollwakeups = POLLIN | POLLRDNORM; 7010 } else { 7011 signals = S_INPUT | S_RDBAND; 7012 pollwakeups = POLLIN | POLLRDBAND; 7013 } 7014 } else if (pri != bp->b_band) { 7015 /* 7016 * The band is different for the new q_first. 7017 */ 7018 if (bp->b_band == 0) { 7019 signals = S_RDNORM; 7020 pollwakeups = POLLIN | POLLRDNORM; 7021 } else { 7022 signals = S_RDBAND; 7023 pollwakeups = POLLIN | POLLRDBAND; 7024 } 7025 } 7026 7027 if (pollwakeups != 0) { 7028 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7029 if (!(stp->sd_rput_opt & SR_POLLIN)) 7030 goto no_pollwake; 7031 stp->sd_rput_opt &= ~SR_POLLIN; 7032 } 7033 mutex_exit(&stp->sd_lock); 7034 pollwakeup(&stp->sd_pollist, pollwakeups); 7035 mutex_enter(&stp->sd_lock); 7036 } 7037 no_pollwake: 7038 7039 if (stp->sd_sigflags & signals) 7040 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7041 } 7042 mutex_exit(&stp->sd_lock); 7043 7044 rvp->r_val1 = more; 7045 return (error); 7046 #undef _LASTMARK 7047 } 7048 7049 /* 7050 * Get the next message from the read queue. If the message is 7051 * priority, STRPRI will have been set by strrput(). This flag 7052 * should be reset only when the entire message at the front of the 7053 * queue as been consumed. 7054 * 7055 * If uiop is NULL all data is returned in mctlp. 7056 * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed 7057 * not enabled. 7058 * The timeout parameter is in milliseconds; -1 for infinity. 7059 * This routine handles the consolidation private flags: 7060 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7061 * MSG_DELAYERROR Defer the error check until the queue is empty. 7062 * MSG_HOLDSIG Hold signals while waiting for data. 7063 * MSG_IPEEK Only peek at messages. 7064 * MSG_DISCARDTAIL Discard the tail M_DATA part of the message 7065 * that doesn't fit. 7066 * MSG_NOMARK If the message is marked leave it on the queue. 7067 * 7068 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 7069 */ 7070 int 7071 kstrgetmsg( 7072 struct vnode *vp, 7073 mblk_t **mctlp, 7074 struct uio *uiop, 7075 unsigned char *prip, 7076 int *flagsp, 7077 clock_t timout, 7078 rval_t *rvp) 7079 { 7080 struct stdata *stp; 7081 mblk_t *bp, *nbp; 7082 mblk_t *savemp = NULL; 7083 mblk_t *savemptail = NULL; 7084 int flags; 7085 uint_t old_sd_flag; 7086 int flg; 7087 int more = 0; 7088 int error = 0; 7089 char first = 1; 7090 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 7091 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 7092 unsigned char pri = 0; 7093 queue_t *q; 7094 int pr = 0; /* Partial read successful */ 7095 unsigned char type; 7096 7097 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER, 7098 "kstrgetmsg:%p", vp); 7099 7100 ASSERT(vp->v_stream); 7101 stp = vp->v_stream; 7102 rvp->r_val1 = 0; 7103 7104 mutex_enter(&stp->sd_lock); 7105 7106 if ((error = i_straccess(stp, JCREAD)) != 0) { 7107 mutex_exit(&stp->sd_lock); 7108 return (error); 7109 } 7110 7111 flags = *flagsp; 7112 if (stp->sd_flag & (STRDERR|STPLEX)) { 7113 if ((stp->sd_flag & STPLEX) || 7114 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) { 7115 error = strgeterr(stp, STRDERR|STPLEX, 7116 (flags & MSG_IPEEK)); 7117 if (error != 0) { 7118 mutex_exit(&stp->sd_lock); 7119 return (error); 7120 } 7121 } 7122 } 7123 mutex_exit(&stp->sd_lock); 7124 7125 switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) { 7126 case MSG_HIPRI: 7127 if (*prip != 0) 7128 return (EINVAL); 7129 break; 7130 7131 case MSG_ANY: 7132 case MSG_BAND: 7133 break; 7134 7135 default: 7136 return (EINVAL); 7137 } 7138 7139 retry: 7140 q = _RD(stp->sd_wrq); 7141 mutex_enter(&stp->sd_lock); 7142 old_sd_flag = stp->sd_flag; 7143 mark = 0; 7144 for (;;) { 7145 int done = 0; 7146 int waitflag; 7147 int fmode; 7148 mblk_t *q_first = q->q_first; 7149 7150 /* 7151 * This section of the code operates just like the code 7152 * in strgetmsg(). There is a comment there about what 7153 * is going on here. 7154 */ 7155 if (!(flags & (MSG_HIPRI|MSG_BAND))) { 7156 /* Asking for normal, band0 data */ 7157 bp = strget(stp, q, uiop, first, &error); 7158 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7159 if (bp != NULL) { 7160 if (DB_TYPE(bp) == M_SIG) { 7161 strsignal_nolock(stp, *bp->b_rptr, 7162 bp->b_band); 7163 freemsg(bp); 7164 continue; 7165 } else { 7166 break; 7167 } 7168 } 7169 if (error != 0) { 7170 goto getmout; 7171 } 7172 /* 7173 * We can't depend on the value of STRPRI here because 7174 * the stream head may be in transit. Therefore, we 7175 * must look at the type of the first message to 7176 * determine if a high priority messages is waiting 7177 */ 7178 } else if ((flags & MSG_HIPRI) && q_first != NULL && 7179 DB_TYPE(q_first) >= QPCTL && 7180 (bp = getq_noenab(q, 0)) != NULL) { 7181 ASSERT(DB_TYPE(bp) >= QPCTL); 7182 break; 7183 } else if ((flags & MSG_BAND) && q_first != NULL && 7184 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) && 7185 (bp = getq_noenab(q, 0)) != NULL) { 7186 /* 7187 * Asked for at least band "prip" and got either at 7188 * least that band or a hipri message. 7189 */ 7190 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL); 7191 if (DB_TYPE(bp) == M_SIG) { 7192 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7193 freemsg(bp); 7194 continue; 7195 } else { 7196 break; 7197 } 7198 } 7199 7200 /* No data. Time to sleep? */ 7201 qbackenable(q, 0); 7202 7203 /* 7204 * Delayed error notification? 7205 */ 7206 if ((stp->sd_flag & (STRDERR|STPLEX)) && 7207 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) { 7208 error = strgeterr(stp, STRDERR|STPLEX, 7209 (flags & MSG_IPEEK)); 7210 if (error != 0) { 7211 mutex_exit(&stp->sd_lock); 7212 return (error); 7213 } 7214 } 7215 7216 /* 7217 * If STRHUP or STREOF, return 0 length control and data. 7218 * If a read(fd,buf,0) has been done, do not sleep, just 7219 * return. 7220 * 7221 * If mctlp == NULL and uiop == NULL, then the code will 7222 * do the strwaitq. This is an understood way of saying 7223 * sleep "polling" until a message is received. 7224 */ 7225 if ((stp->sd_flag & (STRHUP|STREOF)) || 7226 (uiop != NULL && uiop->uio_resid == 0)) { 7227 if (mctlp != NULL) 7228 *mctlp = NULL; 7229 *flagsp = 0; 7230 mutex_exit(&stp->sd_lock); 7231 return (0); 7232 } 7233 7234 waitflag = GETWAIT; 7235 if (flags & 7236 (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) { 7237 if (flags & MSG_HOLDSIG) 7238 waitflag |= STR_NOSIG; 7239 if (flags & MSG_IGNERROR) 7240 waitflag |= STR_NOERROR; 7241 if (flags & MSG_IPEEK) 7242 waitflag |= STR_PEEK; 7243 if (flags & MSG_DELAYERROR) 7244 waitflag |= STR_DELAYERR; 7245 } 7246 if (uiop != NULL) 7247 fmode = uiop->uio_fmode; 7248 else 7249 fmode = 0; 7250 7251 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT, 7252 "kstrgetmsg calls strwaitq:%p, %p", 7253 vp, uiop); 7254 if (((error = strwaitq(stp, waitflag, (ssize_t)0, 7255 fmode, timout, &done))) != 0 || done) { 7256 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE, 7257 "kstrgetmsg error or done:%p, %p", 7258 vp, uiop); 7259 mutex_exit(&stp->sd_lock); 7260 return (error); 7261 } 7262 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE, 7263 "kstrgetmsg awakes:%p, %p", vp, uiop); 7264 if ((error = i_straccess(stp, JCREAD)) != 0) { 7265 mutex_exit(&stp->sd_lock); 7266 return (error); 7267 } 7268 first = 0; 7269 } 7270 ASSERT(bp != NULL); 7271 /* 7272 * Extract any mark information. If the message is not completely 7273 * consumed this information will be put in the mblk 7274 * that is putback. 7275 * If MSGMARKNEXT is set and the message is completely consumed 7276 * the STRATMARK flag will be set below. Likewise, if 7277 * MSGNOTMARKNEXT is set and the message is 7278 * completely consumed STRNOTATMARK will be set. 7279 */ 7280 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 7281 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 7282 (MSGMARKNEXT|MSGNOTMARKNEXT)); 7283 pri = bp->b_band; 7284 if (mark != 0) { 7285 /* 7286 * If the caller doesn't want the mark return. 7287 * Used to implement MSG_WAITALL in sockets. 7288 */ 7289 if (flags & MSG_NOMARK) { 7290 putback(stp, q, bp, pri); 7291 qbackenable(q, pri); 7292 mutex_exit(&stp->sd_lock); 7293 return (EWOULDBLOCK); 7294 } 7295 if (bp == stp->sd_mark) { 7296 mark |= _LASTMARK; 7297 stp->sd_mark = NULL; 7298 } 7299 } 7300 7301 /* 7302 * keep track of the first message type 7303 */ 7304 type = bp->b_datap->db_type; 7305 7306 if (bp->b_datap->db_type == M_PASSFP) { 7307 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7308 stp->sd_mark = bp; 7309 bp->b_flag |= mark & ~_LASTMARK; 7310 putback(stp, q, bp, pri); 7311 qbackenable(q, pri); 7312 mutex_exit(&stp->sd_lock); 7313 return (EBADMSG); 7314 } 7315 ASSERT(type != M_SIG); 7316 7317 if (flags & MSG_IPEEK) { 7318 /* 7319 * Clear any struioflag - we do the uiomove over again 7320 * when peeking since it simplifies the code. 7321 * 7322 * Dup the message and put the original back on the queue. 7323 * If dupmsg() fails, try again with copymsg() to see if 7324 * there is indeed a shortage of memory. dupmsg() may fail 7325 * if db_ref in any of the messages reaches its limit. 7326 */ 7327 7328 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) { 7329 /* 7330 * Restore the state of the stream head since we 7331 * need to drop sd_lock (strwaitbuf is sleeping). 7332 */ 7333 size_t size = msgdsize(bp); 7334 7335 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7336 stp->sd_mark = bp; 7337 bp->b_flag |= mark & ~_LASTMARK; 7338 putback(stp, q, bp, pri); 7339 mutex_exit(&stp->sd_lock); 7340 error = strwaitbuf(size, BPRI_HI); 7341 if (error) { 7342 /* 7343 * There is no net change to the queue thus 7344 * no need to qbackenable. 7345 */ 7346 return (error); 7347 } 7348 goto retry; 7349 } 7350 7351 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7352 stp->sd_mark = bp; 7353 bp->b_flag |= mark & ~_LASTMARK; 7354 putback(stp, q, bp, pri); 7355 bp = nbp; 7356 } 7357 7358 /* 7359 * Set this flag so strrput will not generate signals. Need to 7360 * make sure this flag is cleared before leaving this routine 7361 * else signals will stop being sent. 7362 */ 7363 stp->sd_flag |= STRGETINPROG; 7364 mutex_exit(&stp->sd_lock); 7365 7366 if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) { 7367 mblk_t *tmp, *prevmp; 7368 7369 /* 7370 * Put first non-data mblk back to stream head and 7371 * cut the mblk chain so sd_rputdatafunc only sees 7372 * M_DATA mblks. We can skip the first mblk since it 7373 * is M_DATA according to the condition above. 7374 */ 7375 for (prevmp = bp, tmp = bp->b_cont; tmp != NULL; 7376 prevmp = tmp, tmp = tmp->b_cont) { 7377 if (DB_TYPE(tmp) != M_DATA) { 7378 prevmp->b_cont = NULL; 7379 mutex_enter(&stp->sd_lock); 7380 putback(stp, q, tmp, tmp->b_band); 7381 mutex_exit(&stp->sd_lock); 7382 break; 7383 } 7384 } 7385 7386 bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp, 7387 NULL, NULL, NULL, NULL); 7388 7389 if (bp == NULL) 7390 goto retry; 7391 } 7392 7393 if (STREAM_NEEDSERVICE(stp)) 7394 stream_runservice(stp); 7395 7396 /* 7397 * Set HIPRI flag if message is priority. 7398 */ 7399 if (type >= QPCTL) 7400 flg = MSG_HIPRI; 7401 else 7402 flg = MSG_BAND; 7403 7404 /* 7405 * First process PROTO or PCPROTO blocks, if any. 7406 */ 7407 if (mctlp != NULL && type != M_DATA) { 7408 mblk_t *nbp; 7409 7410 *mctlp = bp; 7411 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA) 7412 bp = bp->b_cont; 7413 nbp = bp->b_cont; 7414 bp->b_cont = NULL; 7415 bp = nbp; 7416 } 7417 7418 if (bp && bp->b_datap->db_type != M_DATA) { 7419 /* 7420 * More PROTO blocks in msg. Will only happen if mctlp is NULL. 7421 */ 7422 more |= MORECTL; 7423 savemp = bp; 7424 while (bp && bp->b_datap->db_type != M_DATA) { 7425 savemptail = bp; 7426 bp = bp->b_cont; 7427 } 7428 savemptail->b_cont = NULL; 7429 } 7430 7431 /* 7432 * Now process DATA blocks, if any. 7433 */ 7434 if (uiop == NULL) { 7435 /* Append data to tail of mctlp */ 7436 7437 if (mctlp != NULL) { 7438 mblk_t **mpp = mctlp; 7439 7440 while (*mpp != NULL) 7441 mpp = &((*mpp)->b_cont); 7442 *mpp = bp; 7443 bp = NULL; 7444 } 7445 } else if (uiop->uio_resid >= 0 && bp) { 7446 size_t oldresid = uiop->uio_resid; 7447 7448 /* 7449 * If a streams message is likely to consist 7450 * of many small mblks, it is pulled up into 7451 * one continuous chunk of memory. 7452 * The size of the first mblk may be bogus because 7453 * successive read() calls on the socket reduce 7454 * the size of this mblk until it is exhausted 7455 * and then the code walks on to the next. Thus 7456 * the size of the mblk may not be the original size 7457 * that was passed up, it's simply a remainder 7458 * and hence can be very small without any 7459 * implication that the packet is badly fragmented. 7460 * So the size of the possible second mblk is 7461 * used to spot a badly fragmented packet. 7462 * see longer comment at top of page 7463 * by mblk_pull_len declaration. 7464 */ 7465 7466 if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) { 7467 (void) pullupmsg(bp, -1); 7468 } 7469 7470 bp = struiocopyout(bp, uiop, &error); 7471 if (error != 0) { 7472 if (mctlp != NULL) { 7473 freemsg(*mctlp); 7474 *mctlp = NULL; 7475 } else 7476 freemsg(savemp); 7477 mutex_enter(&stp->sd_lock); 7478 /* 7479 * clear stream head hi pri flag based on 7480 * first message 7481 */ 7482 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7483 ASSERT(type == M_PCPROTO); 7484 stp->sd_flag &= ~STRPRI; 7485 } 7486 more = 0; 7487 goto getmout; 7488 } 7489 /* 7490 * (pr == 1) indicates a partial read. 7491 */ 7492 if (oldresid > uiop->uio_resid) 7493 pr = 1; 7494 } 7495 7496 if (bp) { /* more data blocks in msg */ 7497 more |= MOREDATA; 7498 if (savemp) 7499 savemptail->b_cont = bp; 7500 else 7501 savemp = bp; 7502 } 7503 7504 mutex_enter(&stp->sd_lock); 7505 if (savemp) { 7506 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) { 7507 /* 7508 * When MSG_DISCARDTAIL is set or 7509 * when peeking discard any tail. When peeking this 7510 * is the tail of the dup that was copied out - the 7511 * message has already been putback on the queue. 7512 * Return MOREDATA to the caller even though the data 7513 * is discarded. This is used by sockets (to 7514 * set MSG_TRUNC). 7515 */ 7516 freemsg(savemp); 7517 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7518 ASSERT(type == M_PCPROTO); 7519 stp->sd_flag &= ~STRPRI; 7520 } 7521 } else if (pr && (savemp->b_datap->db_type == M_DATA) && 7522 msgnodata(savemp)) { 7523 /* 7524 * Avoid queuing a zero-length tail part of 7525 * a message. pr=1 indicates that we read some of 7526 * the message. 7527 */ 7528 freemsg(savemp); 7529 more &= ~MOREDATA; 7530 if (type >= QPCTL) { 7531 ASSERT(type == M_PCPROTO); 7532 stp->sd_flag &= ~STRPRI; 7533 } 7534 } else { 7535 savemp->b_band = pri; 7536 /* 7537 * If the first message was HIPRI and the one we're 7538 * putting back isn't, then clear STRPRI, otherwise 7539 * set STRPRI again. Note that we must set STRPRI 7540 * again since the flush logic in strrput_nondata() 7541 * may have cleared it while we had sd_lock dropped. 7542 */ 7543 7544 if (type >= QPCTL) { 7545 ASSERT(type == M_PCPROTO); 7546 if (queclass(savemp) < QPCTL) 7547 stp->sd_flag &= ~STRPRI; 7548 else 7549 stp->sd_flag |= STRPRI; 7550 } else if (queclass(savemp) >= QPCTL) { 7551 /* 7552 * The first message was not a HIPRI message, 7553 * but the one we are about to putback is. 7554 * For simplicitly, we do not allow for HIPRI 7555 * messages to be embedded in the message 7556 * body, so just force it to same type as 7557 * first message. 7558 */ 7559 ASSERT(type == M_DATA || type == M_PROTO); 7560 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 7561 savemp->b_datap->db_type = type; 7562 } 7563 if (mark != 0) { 7564 if ((mark & _LASTMARK) && 7565 (stp->sd_mark == NULL)) { 7566 /* 7567 * If another marked message arrived 7568 * while sd_lock was not held sd_mark 7569 * would be non-NULL. 7570 */ 7571 stp->sd_mark = savemp; 7572 } 7573 savemp->b_flag |= mark & ~_LASTMARK; 7574 } 7575 putback(stp, q, savemp, pri); 7576 } 7577 } else if (!(flags & MSG_IPEEK)) { 7578 /* 7579 * The complete message was consumed. 7580 * 7581 * If another M_PCPROTO arrived while sd_lock was not held 7582 * it would have been discarded since STRPRI was still set. 7583 * 7584 * Move the MSG*MARKNEXT information 7585 * to the stream head just in case 7586 * the read queue becomes empty. 7587 * clear stream head hi pri flag based on 7588 * first message 7589 * 7590 * If the stream head was at the mark 7591 * (STRATMARK) before we dropped sd_lock above 7592 * and some data was consumed then we have 7593 * moved past the mark thus STRATMARK is 7594 * cleared. However, if a message arrived in 7595 * strrput during the copyout above causing 7596 * STRATMARK to be set we can not clear that 7597 * flag. 7598 * XXX A "perimeter" would help by single-threading strrput, 7599 * strread, strgetmsg and kstrgetmsg. 7600 */ 7601 if (type >= QPCTL) { 7602 ASSERT(type == M_PCPROTO); 7603 stp->sd_flag &= ~STRPRI; 7604 } 7605 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 7606 if (mark & MSGMARKNEXT) { 7607 stp->sd_flag &= ~STRNOTATMARK; 7608 stp->sd_flag |= STRATMARK; 7609 } else if (mark & MSGNOTMARKNEXT) { 7610 stp->sd_flag &= ~STRATMARK; 7611 stp->sd_flag |= STRNOTATMARK; 7612 } else { 7613 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 7614 } 7615 } else if (pr && (old_sd_flag & STRATMARK)) { 7616 stp->sd_flag &= ~STRATMARK; 7617 } 7618 } 7619 7620 *flagsp = flg; 7621 *prip = pri; 7622 7623 /* 7624 * Getmsg cleanup processing - if the state of the queue has changed 7625 * some signals may need to be sent and/or poll awakened. 7626 */ 7627 getmout: 7628 qbackenable(q, pri); 7629 7630 /* 7631 * We dropped the stream head lock above. Send all M_SIG messages 7632 * before processing stream head for SIGPOLL messages. 7633 */ 7634 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7635 while ((bp = q->q_first) != NULL && 7636 (bp->b_datap->db_type == M_SIG)) { 7637 /* 7638 * sd_lock is held so the content of the read queue can not 7639 * change. 7640 */ 7641 bp = getq(q); 7642 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 7643 7644 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7645 mutex_exit(&stp->sd_lock); 7646 freemsg(bp); 7647 if (STREAM_NEEDSERVICE(stp)) 7648 stream_runservice(stp); 7649 mutex_enter(&stp->sd_lock); 7650 } 7651 7652 /* 7653 * stream head cannot change while we make the determination 7654 * whether or not to send a signal. Drop the flag to allow strrput 7655 * to send firstmsgsigs again. 7656 */ 7657 stp->sd_flag &= ~STRGETINPROG; 7658 7659 /* 7660 * If the type of message at the front of the queue changed 7661 * due to the receive the appropriate signals and pollwakeup events 7662 * are generated. The type of changes are: 7663 * Processed a hipri message, q_first is not hipri. 7664 * Processed a band X message, and q_first is band Y. 7665 * The generated signals and pollwakeups are identical to what 7666 * strrput() generates should the message that is now on q_first 7667 * arrive to an empty read queue. 7668 * 7669 * Note: only strrput will send a signal for a hipri message. 7670 */ 7671 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7672 strsigset_t signals = 0; 7673 strpollset_t pollwakeups = 0; 7674 7675 if (flg & MSG_HIPRI) { 7676 /* 7677 * Removed a hipri message. Regular data at 7678 * the front of the queue. 7679 */ 7680 if (bp->b_band == 0) { 7681 signals = S_INPUT | S_RDNORM; 7682 pollwakeups = POLLIN | POLLRDNORM; 7683 } else { 7684 signals = S_INPUT | S_RDBAND; 7685 pollwakeups = POLLIN | POLLRDBAND; 7686 } 7687 } else if (pri != bp->b_band) { 7688 /* 7689 * The band is different for the new q_first. 7690 */ 7691 if (bp->b_band == 0) { 7692 signals = S_RDNORM; 7693 pollwakeups = POLLIN | POLLRDNORM; 7694 } else { 7695 signals = S_RDBAND; 7696 pollwakeups = POLLIN | POLLRDBAND; 7697 } 7698 } 7699 7700 if (pollwakeups != 0) { 7701 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7702 if (!(stp->sd_rput_opt & SR_POLLIN)) 7703 goto no_pollwake; 7704 stp->sd_rput_opt &= ~SR_POLLIN; 7705 } 7706 mutex_exit(&stp->sd_lock); 7707 pollwakeup(&stp->sd_pollist, pollwakeups); 7708 mutex_enter(&stp->sd_lock); 7709 } 7710 no_pollwake: 7711 7712 if (stp->sd_sigflags & signals) 7713 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7714 } 7715 mutex_exit(&stp->sd_lock); 7716 7717 rvp->r_val1 = more; 7718 return (error); 7719 #undef _LASTMARK 7720 } 7721 7722 /* 7723 * Put a message downstream. 7724 * 7725 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7726 */ 7727 int 7728 strputmsg( 7729 struct vnode *vp, 7730 struct strbuf *mctl, 7731 struct strbuf *mdata, 7732 unsigned char pri, 7733 int flag, 7734 int fmode) 7735 { 7736 struct stdata *stp; 7737 queue_t *wqp; 7738 mblk_t *mp; 7739 ssize_t msgsize; 7740 ssize_t rmin, rmax; 7741 int error; 7742 struct uio uios; 7743 struct uio *uiop = &uios; 7744 struct iovec iovs; 7745 int xpg4 = 0; 7746 7747 ASSERT(vp->v_stream); 7748 stp = vp->v_stream; 7749 wqp = stp->sd_wrq; 7750 7751 /* 7752 * If it is an XPG4 application, we need to send 7753 * SIGPIPE below 7754 */ 7755 7756 xpg4 = (flag & MSG_XPG4) ? 1 : 0; 7757 flag &= ~MSG_XPG4; 7758 7759 if (audit_active) 7760 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode); 7761 7762 mutex_enter(&stp->sd_lock); 7763 7764 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7765 mutex_exit(&stp->sd_lock); 7766 return (error); 7767 } 7768 7769 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7770 error = strwriteable(stp, B_FALSE, xpg4); 7771 if (error != 0) { 7772 mutex_exit(&stp->sd_lock); 7773 return (error); 7774 } 7775 } 7776 7777 mutex_exit(&stp->sd_lock); 7778 7779 /* 7780 * Check for legal flag value. 7781 */ 7782 switch (flag) { 7783 case MSG_HIPRI: 7784 if ((mctl->len < 0) || (pri != 0)) 7785 return (EINVAL); 7786 break; 7787 case MSG_BAND: 7788 break; 7789 7790 default: 7791 return (EINVAL); 7792 } 7793 7794 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN, 7795 "strputmsg in:stp %p", stp); 7796 7797 /* get these values from those cached in the stream head */ 7798 rmin = stp->sd_qn_minpsz; 7799 rmax = stp->sd_qn_maxpsz; 7800 7801 /* 7802 * Make sure ctl and data sizes together fall within the 7803 * limits of the max and min receive packet sizes and do 7804 * not exceed system limit. 7805 */ 7806 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 7807 if (rmax == 0) { 7808 return (ERANGE); 7809 } 7810 /* 7811 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 7812 * Needed to prevent partial failures in the strmakedata loop. 7813 */ 7814 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 7815 rmax = stp->sd_maxblk; 7816 7817 if ((msgsize = mdata->len) < 0) { 7818 msgsize = 0; 7819 rmin = 0; /* no range check for NULL data part */ 7820 } 7821 if ((msgsize < rmin) || 7822 ((msgsize > rmax) && (rmax != INFPSZ)) || 7823 (mctl->len > strctlsz)) { 7824 return (ERANGE); 7825 } 7826 7827 /* 7828 * Setup uio and iov for data part 7829 */ 7830 iovs.iov_base = mdata->buf; 7831 iovs.iov_len = msgsize; 7832 uios.uio_iov = &iovs; 7833 uios.uio_iovcnt = 1; 7834 uios.uio_loffset = 0; 7835 uios.uio_segflg = UIO_USERSPACE; 7836 uios.uio_fmode = fmode; 7837 uios.uio_extflg = UIO_COPY_DEFAULT; 7838 uios.uio_resid = msgsize; 7839 uios.uio_offset = 0; 7840 7841 /* Ignore flow control in strput for HIPRI */ 7842 if (flag & MSG_HIPRI) 7843 flag |= MSG_IGNFLOW; 7844 7845 for (;;) { 7846 int done = 0; 7847 7848 /* 7849 * strput will always free the ctl mblk - even when strput 7850 * fails. 7851 */ 7852 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) { 7853 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7854 "strputmsg out:stp %p out %d error %d", 7855 stp, 1, error); 7856 return (error); 7857 } 7858 /* 7859 * Verify that the whole message can be transferred by 7860 * strput. 7861 */ 7862 ASSERT(stp->sd_maxblk == INFPSZ || 7863 stp->sd_maxblk >= mdata->len); 7864 7865 msgsize = mdata->len; 7866 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 7867 mdata->len = msgsize; 7868 7869 if (error == 0) 7870 break; 7871 7872 if (error != EWOULDBLOCK) 7873 goto out; 7874 7875 mutex_enter(&stp->sd_lock); 7876 /* 7877 * Check for a missed wakeup. 7878 * Needed since strput did not hold sd_lock across 7879 * the canputnext. 7880 */ 7881 if (bcanputnext(wqp, pri)) { 7882 /* Try again */ 7883 mutex_exit(&stp->sd_lock); 7884 continue; 7885 } 7886 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT, 7887 "strputmsg wait:stp %p waits pri %d", stp, pri); 7888 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1, 7889 &done)) != 0) || done) { 7890 mutex_exit(&stp->sd_lock); 7891 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7892 "strputmsg out:q %p out %d error %d", 7893 stp, 0, error); 7894 return (error); 7895 } 7896 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE, 7897 "strputmsg wake:stp %p wakes", stp); 7898 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7899 mutex_exit(&stp->sd_lock); 7900 return (error); 7901 } 7902 mutex_exit(&stp->sd_lock); 7903 } 7904 out: 7905 /* 7906 * For historic reasons, applications expect EAGAIN 7907 * when data mblk could not be allocated. so change 7908 * ENOMEM back to EAGAIN 7909 */ 7910 if (error == ENOMEM) 7911 error = EAGAIN; 7912 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7913 "strputmsg out:stp %p out %d error %d", stp, 2, error); 7914 return (error); 7915 } 7916 7917 /* 7918 * Put a message downstream. 7919 * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop. 7920 * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio 7921 * and the fmode parameter. 7922 * 7923 * This routine handles the consolidation private flags: 7924 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7925 * MSG_HOLDSIG Hold signals while waiting for data. 7926 * MSG_IGNFLOW Don't check streams flow control. 7927 * 7928 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7929 */ 7930 int 7931 kstrputmsg( 7932 struct vnode *vp, 7933 mblk_t *mctl, 7934 struct uio *uiop, 7935 ssize_t msgsize, 7936 unsigned char pri, 7937 int flag, 7938 int fmode) 7939 { 7940 struct stdata *stp; 7941 queue_t *wqp; 7942 ssize_t rmin, rmax; 7943 int error; 7944 7945 ASSERT(vp->v_stream); 7946 stp = vp->v_stream; 7947 wqp = stp->sd_wrq; 7948 if (audit_active) 7949 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode); 7950 if (mctl == NULL) 7951 return (EINVAL); 7952 7953 mutex_enter(&stp->sd_lock); 7954 7955 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7956 mutex_exit(&stp->sd_lock); 7957 freemsg(mctl); 7958 return (error); 7959 } 7960 7961 if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) { 7962 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7963 error = strwriteable(stp, B_FALSE, B_TRUE); 7964 if (error != 0) { 7965 mutex_exit(&stp->sd_lock); 7966 freemsg(mctl); 7967 return (error); 7968 } 7969 } 7970 } 7971 7972 mutex_exit(&stp->sd_lock); 7973 7974 /* 7975 * Check for legal flag value. 7976 */ 7977 switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) { 7978 case MSG_HIPRI: 7979 if (pri != 0) { 7980 freemsg(mctl); 7981 return (EINVAL); 7982 } 7983 break; 7984 case MSG_BAND: 7985 break; 7986 default: 7987 freemsg(mctl); 7988 return (EINVAL); 7989 } 7990 7991 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN, 7992 "kstrputmsg in:stp %p", stp); 7993 7994 /* get these values from those cached in the stream head */ 7995 rmin = stp->sd_qn_minpsz; 7996 rmax = stp->sd_qn_maxpsz; 7997 7998 /* 7999 * Make sure ctl and data sizes together fall within the 8000 * limits of the max and min receive packet sizes and do 8001 * not exceed system limit. 8002 */ 8003 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 8004 if (rmax == 0) { 8005 freemsg(mctl); 8006 return (ERANGE); 8007 } 8008 /* 8009 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 8010 * Needed to prevent partial failures in the strmakedata loop. 8011 */ 8012 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 8013 rmax = stp->sd_maxblk; 8014 8015 if (uiop == NULL) { 8016 msgsize = -1; 8017 rmin = -1; /* no range check for NULL data part */ 8018 } else { 8019 /* Use uio flags as well as the fmode parameter flags */ 8020 fmode |= uiop->uio_fmode; 8021 8022 if ((msgsize < rmin) || 8023 ((msgsize > rmax) && (rmax != INFPSZ))) { 8024 freemsg(mctl); 8025 return (ERANGE); 8026 } 8027 } 8028 8029 /* Ignore flow control in strput for HIPRI */ 8030 if (flag & MSG_HIPRI) 8031 flag |= MSG_IGNFLOW; 8032 8033 for (;;) { 8034 int done = 0; 8035 int waitflag; 8036 mblk_t *mp; 8037 8038 /* 8039 * strput will always free the ctl mblk - even when strput 8040 * fails. If MSG_IGNFLOW is set then any error returned 8041 * will cause us to break the loop, so we don't need a copy 8042 * of the message. If MSG_IGNFLOW is not set, then we can 8043 * get hit by flow control and be forced to try again. In 8044 * this case we need to have a copy of the message. We 8045 * do this using copymsg since the message may get modified 8046 * by something below us. 8047 * 8048 * We've observed that many TPI providers do not check db_ref 8049 * on the control messages but blindly reuse them for the 8050 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more 8051 * friendly to such providers than using dupmsg. Also, note 8052 * that sockfs uses MSG_IGNFLOW for all TPI control messages. 8053 * Only data messages are subject to flow control, hence 8054 * subject to this copymsg. 8055 */ 8056 if (flag & MSG_IGNFLOW) { 8057 mp = mctl; 8058 mctl = NULL; 8059 } else { 8060 do { 8061 /* 8062 * If a message has a free pointer, the message 8063 * must be dupmsg to maintain this pointer. 8064 * Code using this facility must be sure 8065 * that modules below will not change the 8066 * contents of the dblk without checking db_ref 8067 * first. If db_ref is > 1, then the module 8068 * needs to do a copymsg first. Otherwise, 8069 * the contents of the dblk may become 8070 * inconsistent because the freesmg/freeb below 8071 * may end up calling atomic_add_32_nv. 8072 * The atomic_add_32_nv in freeb (accessing 8073 * all of db_ref, db_type, db_flags, and 8074 * db_struioflag) does not prevent other threads 8075 * from concurrently trying to modify e.g. 8076 * db_type. 8077 */ 8078 if (mctl->b_datap->db_frtnp != NULL) 8079 mp = dupmsg(mctl); 8080 else 8081 mp = copymsg(mctl); 8082 8083 if (mp != NULL) 8084 break; 8085 8086 error = strwaitbuf(msgdsize(mctl), BPRI_MED); 8087 if (error) { 8088 freemsg(mctl); 8089 return (error); 8090 } 8091 } while (mp == NULL); 8092 } 8093 /* 8094 * Verify that all of msgsize can be transferred by 8095 * strput. 8096 */ 8097 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize); 8098 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 8099 if (error == 0) 8100 break; 8101 8102 if (error != EWOULDBLOCK) 8103 goto out; 8104 8105 /* 8106 * IF MSG_IGNFLOW is set we should have broken out of loop 8107 * above. 8108 */ 8109 ASSERT(!(flag & MSG_IGNFLOW)); 8110 mutex_enter(&stp->sd_lock); 8111 /* 8112 * Check for a missed wakeup. 8113 * Needed since strput did not hold sd_lock across 8114 * the canputnext. 8115 */ 8116 if (bcanputnext(wqp, pri)) { 8117 /* Try again */ 8118 mutex_exit(&stp->sd_lock); 8119 continue; 8120 } 8121 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT, 8122 "kstrputmsg wait:stp %p waits pri %d", stp, pri); 8123 8124 waitflag = WRITEWAIT; 8125 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) { 8126 if (flag & MSG_HOLDSIG) 8127 waitflag |= STR_NOSIG; 8128 if (flag & MSG_IGNERROR) 8129 waitflag |= STR_NOERROR; 8130 } 8131 if (((error = strwaitq(stp, waitflag, 8132 (ssize_t)0, fmode, -1, &done)) != 0) || done) { 8133 mutex_exit(&stp->sd_lock); 8134 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8135 "kstrputmsg out:stp %p out %d error %d", 8136 stp, 0, error); 8137 freemsg(mctl); 8138 return (error); 8139 } 8140 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE, 8141 "kstrputmsg wake:stp %p wakes", stp); 8142 if ((error = i_straccess(stp, JCWRITE)) != 0) { 8143 mutex_exit(&stp->sd_lock); 8144 freemsg(mctl); 8145 return (error); 8146 } 8147 mutex_exit(&stp->sd_lock); 8148 } 8149 out: 8150 freemsg(mctl); 8151 /* 8152 * For historic reasons, applications expect EAGAIN 8153 * when data mblk could not be allocated. so change 8154 * ENOMEM back to EAGAIN 8155 */ 8156 if (error == ENOMEM) 8157 error = EAGAIN; 8158 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8159 "kstrputmsg out:stp %p out %d error %d", stp, 2, error); 8160 return (error); 8161 } 8162 8163 /* 8164 * Determines whether the necessary conditions are set on a stream 8165 * for it to be readable, writeable, or have exceptions. 8166 * 8167 * strpoll handles the consolidation private events: 8168 * POLLNOERR Do not return POLLERR even if there are stream 8169 * head errors. 8170 * Used by sockfs. 8171 * POLLRDDATA Do not return POLLIN unless at least one message on 8172 * the queue contains one or more M_DATA mblks. Thus 8173 * when this flag is set a queue with only 8174 * M_PROTO/M_PCPROTO mblks does not return POLLIN. 8175 * Used by sockfs to ignore T_EXDATA_IND messages. 8176 * 8177 * Note: POLLRDDATA assumes that synch streams only return messages with 8178 * an M_DATA attached (i.e. not messages consisting of only 8179 * an M_PROTO/M_PCPROTO part). 8180 */ 8181 int 8182 strpoll( 8183 struct stdata *stp, 8184 short events_arg, 8185 int anyyet, 8186 short *reventsp, 8187 struct pollhead **phpp) 8188 { 8189 int events = (ushort_t)events_arg; 8190 int retevents = 0; 8191 mblk_t *mp; 8192 qband_t *qbp; 8193 long sd_flags = stp->sd_flag; 8194 int headlocked = 0; 8195 8196 /* 8197 * For performance, a single 'if' tests for most possible edge 8198 * conditions in one shot 8199 */ 8200 if (sd_flags & (STPLEX | STRDERR | STWRERR)) { 8201 if (sd_flags & STPLEX) { 8202 *reventsp = POLLNVAL; 8203 return (EINVAL); 8204 } 8205 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) && 8206 (sd_flags & STRDERR)) || 8207 ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) && 8208 (sd_flags & STWRERR))) { 8209 if (!(events & POLLNOERR)) { 8210 *reventsp = POLLERR; 8211 return (0); 8212 } 8213 } 8214 } 8215 if (sd_flags & STRHUP) { 8216 retevents |= POLLHUP; 8217 } else if (events & (POLLWRNORM | POLLWRBAND)) { 8218 queue_t *tq; 8219 queue_t *qp = stp->sd_wrq; 8220 8221 claimstr(qp); 8222 /* Find next module forward that has a service procedure */ 8223 tq = qp->q_next->q_nfsrv; 8224 ASSERT(tq != NULL); 8225 8226 polllock(&stp->sd_pollist, QLOCK(tq)); 8227 if (events & POLLWRNORM) { 8228 queue_t *sqp; 8229 8230 if (tq->q_flag & QFULL) 8231 /* ensure backq svc procedure runs */ 8232 tq->q_flag |= QWANTW; 8233 else if ((sqp = stp->sd_struiowrq) != NULL) { 8234 /* Check sync stream barrier write q */ 8235 mutex_exit(QLOCK(tq)); 8236 polllock(&stp->sd_pollist, QLOCK(sqp)); 8237 if (sqp->q_flag & QFULL) 8238 /* ensure pollwakeup() is done */ 8239 sqp->q_flag |= QWANTWSYNC; 8240 else 8241 retevents |= POLLOUT; 8242 /* More write events to process ??? */ 8243 if (! (events & POLLWRBAND)) { 8244 mutex_exit(QLOCK(sqp)); 8245 releasestr(qp); 8246 goto chkrd; 8247 } 8248 mutex_exit(QLOCK(sqp)); 8249 polllock(&stp->sd_pollist, QLOCK(tq)); 8250 } else 8251 retevents |= POLLOUT; 8252 } 8253 if (events & POLLWRBAND) { 8254 qbp = tq->q_bandp; 8255 if (qbp) { 8256 while (qbp) { 8257 if (qbp->qb_flag & QB_FULL) 8258 qbp->qb_flag |= QB_WANTW; 8259 else 8260 retevents |= POLLWRBAND; 8261 qbp = qbp->qb_next; 8262 } 8263 } else { 8264 retevents |= POLLWRBAND; 8265 } 8266 } 8267 mutex_exit(QLOCK(tq)); 8268 releasestr(qp); 8269 } 8270 chkrd: 8271 if (sd_flags & STRPRI) { 8272 retevents |= (events & POLLPRI); 8273 } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) { 8274 queue_t *qp = _RD(stp->sd_wrq); 8275 int normevents = (events & (POLLIN | POLLRDNORM)); 8276 8277 /* 8278 * Note: Need to do polllock() here since ps_lock may be 8279 * held. See bug 4191544. 8280 */ 8281 polllock(&stp->sd_pollist, &stp->sd_lock); 8282 headlocked = 1; 8283 mp = qp->q_first; 8284 while (mp) { 8285 /* 8286 * For POLLRDDATA we scan b_cont and b_next until we 8287 * find an M_DATA. 8288 */ 8289 if ((events & POLLRDDATA) && 8290 mp->b_datap->db_type != M_DATA) { 8291 mblk_t *nmp = mp->b_cont; 8292 8293 while (nmp != NULL && 8294 nmp->b_datap->db_type != M_DATA) 8295 nmp = nmp->b_cont; 8296 if (nmp == NULL) { 8297 mp = mp->b_next; 8298 continue; 8299 } 8300 } 8301 if (mp->b_band == 0) 8302 retevents |= normevents; 8303 else 8304 retevents |= (events & (POLLIN | POLLRDBAND)); 8305 break; 8306 } 8307 if (! (retevents & normevents) && 8308 (stp->sd_wakeq & RSLEEP)) { 8309 /* 8310 * Sync stream barrier read queue has data. 8311 */ 8312 retevents |= normevents; 8313 } 8314 /* Treat eof as normal data */ 8315 if (sd_flags & STREOF) 8316 retevents |= normevents; 8317 } 8318 8319 *reventsp = (short)retevents; 8320 if (retevents) { 8321 if (headlocked) 8322 mutex_exit(&stp->sd_lock); 8323 return (0); 8324 } 8325 8326 /* 8327 * If poll() has not found any events yet, set up event cell 8328 * to wake up the poll if a requested event occurs on this 8329 * stream. Check for collisions with outstanding poll requests. 8330 */ 8331 if (!anyyet) { 8332 *phpp = &stp->sd_pollist; 8333 if (headlocked == 0) { 8334 polllock(&stp->sd_pollist, &stp->sd_lock); 8335 headlocked = 1; 8336 } 8337 stp->sd_rput_opt |= SR_POLLIN; 8338 } 8339 if (headlocked) 8340 mutex_exit(&stp->sd_lock); 8341 return (0); 8342 } 8343 8344 /* 8345 * The purpose of putback() is to assure sleeping polls/reads 8346 * are awakened when there are no new messages arriving at the, 8347 * stream head, and a message is placed back on the read queue. 8348 * 8349 * sd_lock must be held when messages are placed back on stream 8350 * head. (getq() holds sd_lock when it removes messages from 8351 * the queue) 8352 */ 8353 8354 static void 8355 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band) 8356 { 8357 mblk_t *qfirst; 8358 ASSERT(MUTEX_HELD(&stp->sd_lock)); 8359 8360 /* 8361 * As a result of lock-step ordering around q_lock and sd_lock, 8362 * it's possible for function calls like putnext() and 8363 * canputnext() to get an inaccurate picture of how much 8364 * data is really being processed at the stream head. 8365 * We only consolidate with existing messages on the queue 8366 * if the length of the message we want to put back is smaller 8367 * than the queue hiwater mark. 8368 */ 8369 if ((stp->sd_rput_opt & SR_CONSOL_DATA) && 8370 (DB_TYPE(bp) == M_DATA) && ((qfirst = q->q_first) != NULL) && 8371 (DB_TYPE(qfirst) == M_DATA) && 8372 ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0) && 8373 ((bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT)) == 0) && 8374 (mp_cont_len(bp, NULL) < q->q_hiwat)) { 8375 /* 8376 * We use the same logic as defined in strrput() 8377 * but in reverse as we are putting back onto the 8378 * queue and want to retain byte ordering. 8379 * Consolidate M_DATA messages with M_DATA ONLY. 8380 * strrput() allows the consolidation of M_DATA onto 8381 * M_PROTO | M_PCPROTO but not the other way round. 8382 * 8383 * The consolidation does not take place if the message 8384 * we are returning to the queue is marked with either 8385 * of the marks or the delim flag or if q_first 8386 * is marked with MSGMARK. The MSGMARK check is needed to 8387 * handle the odd semantics of MSGMARK where essentially 8388 * the whole message is to be treated as marked. 8389 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first 8390 * to the front of the b_cont chain. 8391 */ 8392 rmvq_noenab(q, qfirst); 8393 8394 /* 8395 * The first message in the b_cont list 8396 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 8397 * We need to handle the case where we 8398 * are appending: 8399 * 8400 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 8401 * 2) a MSGMARKNEXT to a plain message. 8402 * 3) a MSGNOTMARKNEXT to a plain message 8403 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 8404 * message. 8405 * 8406 * Thus we never append a MSGMARKNEXT or 8407 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 8408 */ 8409 if (qfirst->b_flag & MSGMARKNEXT) { 8410 bp->b_flag |= MSGMARKNEXT; 8411 bp->b_flag &= ~MSGNOTMARKNEXT; 8412 qfirst->b_flag &= ~MSGMARKNEXT; 8413 } else if (qfirst->b_flag & MSGNOTMARKNEXT) { 8414 bp->b_flag |= MSGNOTMARKNEXT; 8415 qfirst->b_flag &= ~MSGNOTMARKNEXT; 8416 } 8417 8418 linkb(bp, qfirst); 8419 } 8420 (void) putbq(q, bp); 8421 8422 /* 8423 * A message may have come in when the sd_lock was dropped in the 8424 * calling routine. If this is the case and STR*ATMARK info was 8425 * received, need to move that from the stream head to the q_last 8426 * so that SIOCATMARK can return the proper value. 8427 */ 8428 if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) { 8429 unsigned short *flagp = &q->q_last->b_flag; 8430 uint_t b_flag = (uint_t)*flagp; 8431 8432 if (stp->sd_flag & STRATMARK) { 8433 b_flag &= ~MSGNOTMARKNEXT; 8434 b_flag |= MSGMARKNEXT; 8435 stp->sd_flag &= ~STRATMARK; 8436 } else { 8437 b_flag &= ~MSGMARKNEXT; 8438 b_flag |= MSGNOTMARKNEXT; 8439 stp->sd_flag &= ~STRNOTATMARK; 8440 } 8441 *flagp = (unsigned short) b_flag; 8442 } 8443 8444 #ifdef DEBUG 8445 /* 8446 * Make sure that the flags are not messed up. 8447 */ 8448 { 8449 mblk_t *mp; 8450 mp = q->q_last; 8451 while (mp != NULL) { 8452 ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 8453 (MSGMARKNEXT|MSGNOTMARKNEXT)); 8454 mp = mp->b_cont; 8455 } 8456 } 8457 #endif 8458 if (q->q_first == bp) { 8459 short pollevents; 8460 8461 if (stp->sd_flag & RSLEEP) { 8462 stp->sd_flag &= ~RSLEEP; 8463 cv_broadcast(&q->q_wait); 8464 } 8465 if (stp->sd_flag & STRPRI) { 8466 pollevents = POLLPRI; 8467 } else { 8468 if (band == 0) { 8469 if (!(stp->sd_rput_opt & SR_POLLIN)) 8470 return; 8471 stp->sd_rput_opt &= ~SR_POLLIN; 8472 pollevents = POLLIN | POLLRDNORM; 8473 } else { 8474 pollevents = POLLIN | POLLRDBAND; 8475 } 8476 } 8477 mutex_exit(&stp->sd_lock); 8478 pollwakeup(&stp->sd_pollist, pollevents); 8479 mutex_enter(&stp->sd_lock); 8480 } 8481 } 8482 8483 /* 8484 * Return the held vnode attached to the stream head of a 8485 * given queue 8486 * It is the responsibility of the calling routine to ensure 8487 * that the queue does not go away (e.g. pop). 8488 */ 8489 vnode_t * 8490 strq2vp(queue_t *qp) 8491 { 8492 vnode_t *vp; 8493 vp = STREAM(qp)->sd_vnode; 8494 ASSERT(vp != NULL); 8495 VN_HOLD(vp); 8496 return (vp); 8497 } 8498 8499 /* 8500 * return the stream head write queue for the given vp 8501 * It is the responsibility of the calling routine to ensure 8502 * that the stream or vnode do not close. 8503 */ 8504 queue_t * 8505 strvp2wq(vnode_t *vp) 8506 { 8507 ASSERT(vp->v_stream != NULL); 8508 return (vp->v_stream->sd_wrq); 8509 } 8510 8511 /* 8512 * pollwakeup stream head 8513 * It is the responsibility of the calling routine to ensure 8514 * that the stream or vnode do not close. 8515 */ 8516 void 8517 strpollwakeup(vnode_t *vp, short event) 8518 { 8519 ASSERT(vp->v_stream); 8520 pollwakeup(&vp->v_stream->sd_pollist, event); 8521 } 8522 8523 /* 8524 * Mate the stream heads of two vnodes together. If the two vnodes are the 8525 * same, we just make the write-side point at the read-side -- otherwise, 8526 * we do a full mate. Only works on vnodes associated with streams that are 8527 * still being built and thus have only a stream head. 8528 */ 8529 void 8530 strmate(vnode_t *vp1, vnode_t *vp2) 8531 { 8532 queue_t *wrq1 = strvp2wq(vp1); 8533 queue_t *wrq2 = strvp2wq(vp2); 8534 8535 /* 8536 * Verify that there are no modules on the stream yet. We also 8537 * rely on the stream head always having a service procedure to 8538 * avoid tweaking q_nfsrv. 8539 */ 8540 ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL); 8541 ASSERT(wrq1->q_qinfo->qi_srvp != NULL); 8542 ASSERT(wrq2->q_qinfo->qi_srvp != NULL); 8543 8544 /* 8545 * If the queues are the same, just twist; otherwise do a full mate. 8546 */ 8547 if (wrq1 == wrq2) { 8548 wrq1->q_next = _RD(wrq1); 8549 } else { 8550 wrq1->q_next = _RD(wrq2); 8551 wrq2->q_next = _RD(wrq1); 8552 STREAM(wrq1)->sd_mate = STREAM(wrq2); 8553 STREAM(wrq1)->sd_flag |= STRMATE; 8554 STREAM(wrq2)->sd_mate = STREAM(wrq1); 8555 STREAM(wrq2)->sd_flag |= STRMATE; 8556 } 8557 } 8558 8559 /* 8560 * XXX will go away when console is correctly fixed. 8561 * Clean up the console PIDS, from previous I_SETSIG, 8562 * called only for cnopen which never calls strclean(). 8563 */ 8564 void 8565 str_cn_clean(struct vnode *vp) 8566 { 8567 strsig_t *ssp, *pssp, *tssp; 8568 struct stdata *stp; 8569 struct pid *pidp; 8570 int update = 0; 8571 8572 ASSERT(vp->v_stream); 8573 stp = vp->v_stream; 8574 pssp = NULL; 8575 mutex_enter(&stp->sd_lock); 8576 ssp = stp->sd_siglist; 8577 while (ssp) { 8578 mutex_enter(&pidlock); 8579 pidp = ssp->ss_pidp; 8580 /* 8581 * Get rid of PID if the proc is gone. 8582 */ 8583 if (pidp->pid_prinactive) { 8584 tssp = ssp->ss_next; 8585 if (pssp) 8586 pssp->ss_next = tssp; 8587 else 8588 stp->sd_siglist = tssp; 8589 ASSERT(pidp->pid_ref <= 1); 8590 PID_RELE(ssp->ss_pidp); 8591 mutex_exit(&pidlock); 8592 kmem_free(ssp, sizeof (strsig_t)); 8593 update = 1; 8594 ssp = tssp; 8595 continue; 8596 } else 8597 mutex_exit(&pidlock); 8598 pssp = ssp; 8599 ssp = ssp->ss_next; 8600 } 8601 if (update) { 8602 stp->sd_sigflags = 0; 8603 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 8604 stp->sd_sigflags |= ssp->ss_events; 8605 } 8606 mutex_exit(&stp->sd_lock); 8607 } 8608 8609 /* 8610 * Return B_TRUE if there is data in the message, B_FALSE otherwise. 8611 */ 8612 static boolean_t 8613 msghasdata(mblk_t *bp) 8614 { 8615 for (; bp; bp = bp->b_cont) 8616 if (bp->b_datap->db_type == M_DATA) { 8617 ASSERT(bp->b_wptr >= bp->b_rptr); 8618 if (bp->b_wptr > bp->b_rptr) 8619 return (B_TRUE); 8620 } 8621 return (B_FALSE); 8622 } 8623