1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/sysmacros.h> 32 #include <sys/param.h> 33 #include <sys/errno.h> 34 #include <sys/signal.h> 35 #include <sys/stat.h> 36 #include <sys/proc.h> 37 #include <sys/cred.h> 38 #include <sys/user.h> 39 #include <sys/vnode.h> 40 #include <sys/file.h> 41 #include <sys/stream.h> 42 #include <sys/strsubr.h> 43 #include <sys/stropts.h> 44 #include <sys/tihdr.h> 45 #include <sys/var.h> 46 #include <sys/poll.h> 47 #include <sys/termio.h> 48 #include <sys/ttold.h> 49 #include <sys/systm.h> 50 #include <sys/uio.h> 51 #include <sys/cmn_err.h> 52 #include <sys/sad.h> 53 #include <sys/netstack.h> 54 #include <sys/priocntl.h> 55 #include <sys/jioctl.h> 56 #include <sys/procset.h> 57 #include <sys/session.h> 58 #include <sys/kmem.h> 59 #include <sys/filio.h> 60 #include <sys/vtrace.h> 61 #include <sys/debug.h> 62 #include <sys/strredir.h> 63 #include <sys/fs/fifonode.h> 64 #include <sys/fs/snode.h> 65 #include <sys/strlog.h> 66 #include <sys/strsun.h> 67 #include <sys/project.h> 68 #include <sys/kbio.h> 69 #include <sys/msio.h> 70 #include <sys/tty.h> 71 #include <sys/ptyvar.h> 72 #include <sys/vuid_event.h> 73 #include <sys/modctl.h> 74 #include <sys/sunddi.h> 75 #include <sys/sunldi_impl.h> 76 #include <sys/autoconf.h> 77 #include <sys/policy.h> 78 #include <sys/dld.h> 79 #include <sys/zone.h> 80 #include <c2/audit.h> 81 82 /* 83 * This define helps improve the readability of streams code while 84 * still maintaining a very old streams performance enhancement. The 85 * performance enhancement basically involved having all callers 86 * of straccess() perform the first check that straccess() will do 87 * locally before actually calling straccess(). (There by reducing 88 * the number of unnecessary calls to straccess().) 89 */ 90 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \ 91 (stp->sd_vnode->v_type == VFIFO) ? 0 : \ 92 straccess((x), (y))) 93 94 /* 95 * what is mblk_pull_len? 96 * 97 * If a streams message consists of many short messages, 98 * a performance degradation occurs from copyout overhead. 99 * To decrease the per mblk overhead, messages that are 100 * likely to consist of many small mblks are pulled up into 101 * one continuous chunk of memory. 102 * 103 * To avoid the processing overhead of examining every 104 * mblk, a quick heuristic is used. If the first mblk in 105 * the message is shorter than mblk_pull_len, it is likely 106 * that the rest of the mblk will be short. 107 * 108 * This heuristic was decided upon after performance tests 109 * indicated that anything more complex slowed down the main 110 * code path. 111 */ 112 #define MBLK_PULL_LEN 64 113 uint32_t mblk_pull_len = MBLK_PULL_LEN; 114 115 /* 116 * The sgttyb_handling flag controls the handling of the old BSD 117 * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows: 118 * 119 * 0 - Emit no warnings at all and retain old, broken behavior. 120 * 1 - Emit no warnings and silently handle new semantics. 121 * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used 122 * (once per system invocation). Handle with new semantics. 123 * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is 124 * made (so that offenders drop core and are easy to debug). 125 * 126 * The "new semantics" are that TIOCGETP returns B38400 for 127 * sg_[io]speed if the corresponding value is over B38400, and that 128 * TIOCSET[PN] accept B38400 in these cases to mean "retain current 129 * bit rate." 130 */ 131 int sgttyb_handling = 1; 132 static boolean_t sgttyb_complaint; 133 134 /* don't push drcompat module by default on Style-2 streams */ 135 static int push_drcompat = 0; 136 137 /* 138 * id value used to distinguish between different ioctl messages 139 */ 140 static uint32_t ioc_id; 141 142 static void putback(struct stdata *, queue_t *, mblk_t *, int); 143 static void strcleanall(struct vnode *); 144 static int strwsrv(queue_t *); 145 static int strdocmd(struct stdata *, struct strcmd *, cred_t *); 146 147 /* 148 * qinit and module_info structures for stream head read and write queues 149 */ 150 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW }; 151 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 }; 152 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info }; 153 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info }; 154 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT, 155 FIFOLOWAT }; 156 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 }; 157 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info }; 158 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info }; 159 160 extern kmutex_t strresources; /* protects global resources */ 161 extern kmutex_t muxifier; /* single-threads multiplexor creation */ 162 163 static boolean_t msghasdata(mblk_t *bp); 164 #define msgnodata(bp) (!msghasdata(bp)) 165 166 /* 167 * Stream head locking notes: 168 * There are four monitors associated with the stream head: 169 * 1. v_stream monitor: in stropen() and strclose() v_lock 170 * is held while the association of vnode and stream 171 * head is established or tested for. 172 * 2. open/close/push/pop monitor: sd_lock is held while each 173 * thread bids for exclusive access to this monitor 174 * for opening or closing a stream. In addition, this 175 * monitor is entered during pushes and pops. This 176 * guarantees that during plumbing operations there 177 * is only one thread trying to change the plumbing. 178 * Any other threads present in the stream are only 179 * using the plumbing. 180 * 3. read/write monitor: in the case of read, a thread holds 181 * sd_lock while trying to get data from the stream 182 * head queue. if there is none to fulfill a read 183 * request, it sets RSLEEP and calls cv_wait_sig() down 184 * in strwaitq() to await the arrival of new data. 185 * when new data arrives in strrput(), sd_lock is acquired 186 * before testing for RSLEEP and calling cv_broadcast(). 187 * the behavior of strwrite(), strwsrv(), and WSLEEP 188 * mirror this. 189 * 4. ioctl monitor: sd_lock is gotten to ensure that only one 190 * thread is doing an ioctl at a time. 191 */ 192 193 static int 194 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name, 195 int anchor, cred_t *crp, uint_t anchor_zoneid) 196 { 197 int error; 198 fmodsw_impl_t *fp; 199 200 if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) { 201 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO; 202 return (error); 203 } 204 if (stp->sd_pushcnt >= nstrpush) { 205 return (EINVAL); 206 } 207 208 if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) { 209 stp->sd_flag |= STREOPENFAIL; 210 return (EINVAL); 211 } 212 213 /* 214 * push new module and call its open routine via qattach 215 */ 216 if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0) 217 return (error); 218 219 /* 220 * Check to see if caller wants a STREAMS anchor 221 * put at this place in the stream, and add if so. 222 */ 223 mutex_enter(&stp->sd_lock); 224 if (anchor == stp->sd_pushcnt) { 225 stp->sd_anchor = stp->sd_pushcnt; 226 stp->sd_anchorzone = anchor_zoneid; 227 } 228 mutex_exit(&stp->sd_lock); 229 230 return (0); 231 } 232 233 /* 234 * Open a stream device. 235 */ 236 int 237 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp) 238 { 239 struct stdata *stp; 240 queue_t *qp; 241 int s; 242 dev_t dummydev, savedev; 243 struct autopush *ap; 244 struct dlautopush dlap; 245 int error = 0; 246 ssize_t rmin, rmax; 247 int cloneopen; 248 queue_t *brq; 249 major_t major; 250 str_stack_t *ss; 251 zoneid_t zoneid; 252 uint_t anchor; 253 254 if (AU_AUDITING()) 255 audit_stropen(vp, devp, flag, crp); 256 257 /* 258 * If the stream already exists, wait for any open in progress 259 * to complete, then call the open function of each module and 260 * driver in the stream. Otherwise create the stream. 261 */ 262 TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp); 263 retry: 264 mutex_enter(&vp->v_lock); 265 if ((stp = vp->v_stream) != NULL) { 266 267 /* 268 * Waiting for stream to be created to device 269 * due to another open. 270 */ 271 mutex_exit(&vp->v_lock); 272 273 if (STRMATED(stp)) { 274 struct stdata *strmatep = stp->sd_mate; 275 276 STRLOCKMATES(stp); 277 if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 278 if (flag & (FNDELAY|FNONBLOCK)) { 279 error = EAGAIN; 280 mutex_exit(&strmatep->sd_lock); 281 goto ckreturn; 282 } 283 mutex_exit(&stp->sd_lock); 284 if (!cv_wait_sig(&strmatep->sd_monitor, 285 &strmatep->sd_lock)) { 286 error = EINTR; 287 mutex_exit(&strmatep->sd_lock); 288 mutex_enter(&stp->sd_lock); 289 goto ckreturn; 290 } 291 mutex_exit(&strmatep->sd_lock); 292 goto retry; 293 } 294 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 295 if (flag & (FNDELAY|FNONBLOCK)) { 296 error = EAGAIN; 297 mutex_exit(&strmatep->sd_lock); 298 goto ckreturn; 299 } 300 mutex_exit(&strmatep->sd_lock); 301 if (!cv_wait_sig(&stp->sd_monitor, 302 &stp->sd_lock)) { 303 error = EINTR; 304 goto ckreturn; 305 } 306 mutex_exit(&stp->sd_lock); 307 goto retry; 308 } 309 310 if (stp->sd_flag & (STRDERR|STWRERR)) { 311 error = EIO; 312 mutex_exit(&strmatep->sd_lock); 313 goto ckreturn; 314 } 315 316 stp->sd_flag |= STWOPEN; 317 STRUNLOCKMATES(stp); 318 } else { 319 mutex_enter(&stp->sd_lock); 320 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 321 if (flag & (FNDELAY|FNONBLOCK)) { 322 error = EAGAIN; 323 goto ckreturn; 324 } 325 if (!cv_wait_sig(&stp->sd_monitor, 326 &stp->sd_lock)) { 327 error = EINTR; 328 goto ckreturn; 329 } 330 mutex_exit(&stp->sd_lock); 331 goto retry; /* could be clone! */ 332 } 333 334 if (stp->sd_flag & (STRDERR|STWRERR)) { 335 error = EIO; 336 goto ckreturn; 337 } 338 339 stp->sd_flag |= STWOPEN; 340 mutex_exit(&stp->sd_lock); 341 } 342 343 /* 344 * Open all modules and devices down stream to notify 345 * that another user is streaming. For modules, set the 346 * last argument to MODOPEN and do not pass any open flags. 347 * Ignore dummydev since this is not the first open. 348 */ 349 claimstr(stp->sd_wrq); 350 qp = stp->sd_wrq; 351 while (_SAMESTR(qp)) { 352 qp = qp->q_next; 353 if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0) 354 break; 355 } 356 releasestr(stp->sd_wrq); 357 mutex_enter(&stp->sd_lock); 358 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR); 359 stp->sd_rerror = 0; 360 stp->sd_werror = 0; 361 ckreturn: 362 cv_broadcast(&stp->sd_monitor); 363 mutex_exit(&stp->sd_lock); 364 return (error); 365 } 366 367 /* 368 * This vnode isn't streaming. SPECFS already 369 * checked for multiple vnodes pointing to the 370 * same stream, so create a stream to the driver. 371 */ 372 qp = allocq(); 373 stp = shalloc(qp); 374 375 /* 376 * Initialize stream head. shalloc() has given us 377 * exclusive access, and we have the vnode locked; 378 * we can do whatever we want with stp. 379 */ 380 stp->sd_flag = STWOPEN; 381 stp->sd_siglist = NULL; 382 stp->sd_pollist.ph_list = NULL; 383 stp->sd_sigflags = 0; 384 stp->sd_mark = NULL; 385 stp->sd_closetime = STRTIMOUT; 386 stp->sd_sidp = NULL; 387 stp->sd_pgidp = NULL; 388 stp->sd_vnode = vp; 389 stp->sd_rerror = 0; 390 stp->sd_werror = 0; 391 stp->sd_wroff = 0; 392 stp->sd_tail = 0; 393 stp->sd_iocblk = NULL; 394 stp->sd_cmdblk = NULL; 395 stp->sd_pushcnt = 0; 396 stp->sd_qn_minpsz = 0; 397 stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */ 398 stp->sd_maxblk = INFPSZ; 399 qp->q_ptr = _WR(qp)->q_ptr = stp; 400 STREAM(qp) = STREAM(_WR(qp)) = stp; 401 vp->v_stream = stp; 402 mutex_exit(&vp->v_lock); 403 if (vp->v_type == VFIFO) { 404 stp->sd_flag |= OLDNDELAY; 405 /* 406 * This means, both for pipes and fifos 407 * strwrite will send SIGPIPE if the other 408 * end is closed. For putmsg it depends 409 * on whether it is a XPG4_2 application 410 * or not 411 */ 412 stp->sd_wput_opt = SW_SIGPIPE; 413 414 /* setq might sleep in kmem_alloc - avoid holding locks. */ 415 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE, 416 SQ_CI|SQ_CO, B_FALSE); 417 418 set_qend(qp); 419 stp->sd_strtab = fifo_getinfo(); 420 _WR(qp)->q_nfsrv = _WR(qp); 421 qp->q_nfsrv = qp; 422 /* 423 * Wake up others that are waiting for stream to be created. 424 */ 425 mutex_enter(&stp->sd_lock); 426 /* 427 * nothing is be pushed on stream yet, so 428 * optimized stream head packetsizes are just that 429 * of the read queue 430 */ 431 stp->sd_qn_minpsz = qp->q_minpsz; 432 stp->sd_qn_maxpsz = qp->q_maxpsz; 433 stp->sd_flag &= ~STWOPEN; 434 goto fifo_opendone; 435 } 436 /* setq might sleep in kmem_alloc - avoid holding locks. */ 437 setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE); 438 439 set_qend(qp); 440 441 /* 442 * Open driver and create stream to it (via qattach). 443 */ 444 savedev = *devp; 445 cloneopen = (getmajor(*devp) == clone_major); 446 if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) { 447 mutex_enter(&vp->v_lock); 448 vp->v_stream = NULL; 449 mutex_exit(&vp->v_lock); 450 mutex_enter(&stp->sd_lock); 451 cv_broadcast(&stp->sd_monitor); 452 mutex_exit(&stp->sd_lock); 453 freeq(_RD(qp)); 454 shfree(stp); 455 return (error); 456 } 457 /* 458 * Set sd_strtab after open in order to handle clonable drivers 459 */ 460 stp->sd_strtab = STREAMSTAB(getmajor(*devp)); 461 462 /* 463 * Historical note: dummydev used to be be prior to the initial 464 * open (via qattach above), which made the value seen 465 * inconsistent between an I_PUSH and an autopush of a module. 466 */ 467 dummydev = *devp; 468 469 /* 470 * For clone open of old style (Q not associated) network driver, 471 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH 472 */ 473 brq = _RD(_WR(qp)->q_next); 474 major = getmajor(*devp); 475 if (push_drcompat && cloneopen && NETWORK_DRV(major) && 476 ((brq->q_flag & _QASSOCIATED) == 0)) { 477 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0) 478 cmn_err(CE_WARN, "cannot push " DRMODNAME 479 " streams module"); 480 } 481 482 if (!NETWORK_DRV(major)) { 483 savedev = *devp; 484 } else { 485 /* 486 * For network devices, process differently based on the 487 * return value from dld_autopush(): 488 * 489 * 0: the passed-in device points to a GLDv3 datalink with 490 * per-link autopush configuration; use that configuration 491 * and ignore any per-driver autopush configuration. 492 * 493 * 1: the passed-in device points to a physical GLDv3 494 * datalink without per-link autopush configuration. The 495 * passed in device was changed to refer to the actual 496 * physical device (if it's not already); we use that new 497 * device to look up any per-driver autopush configuration. 498 * 499 * -1: neither of the above cases applied; use the initial 500 * device to look up any per-driver autopush configuration. 501 */ 502 switch (dld_autopush(&savedev, &dlap)) { 503 case 0: 504 zoneid = crgetzoneid(crp); 505 for (s = 0; s < dlap.dap_npush; s++) { 506 error = push_mod(qp, &dummydev, stp, 507 dlap.dap_aplist[s], dlap.dap_anchor, crp, 508 zoneid); 509 if (error != 0) 510 break; 511 } 512 goto opendone; 513 case 1: 514 break; 515 case -1: 516 savedev = *devp; 517 break; 518 } 519 } 520 /* 521 * Find the autopush configuration based on "savedev". Start with the 522 * global zone. If not found check in the local zone. 523 */ 524 zoneid = GLOBAL_ZONEID; 525 retryap: 526 ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))-> 527 netstack_str; 528 if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) { 529 netstack_rele(ss->ss_netstack); 530 if (zoneid == GLOBAL_ZONEID) { 531 /* 532 * None found. Also look in the zone's autopush table. 533 */ 534 zoneid = crgetzoneid(crp); 535 if (zoneid != GLOBAL_ZONEID) 536 goto retryap; 537 } 538 goto opendone; 539 } 540 anchor = ap->ap_anchor; 541 zoneid = crgetzoneid(crp); 542 for (s = 0; s < ap->ap_npush; s++) { 543 error = push_mod(qp, &dummydev, stp, ap->ap_list[s], 544 anchor, crp, zoneid); 545 if (error != 0) 546 break; 547 } 548 sad_ap_rele(ap, ss); 549 netstack_rele(ss->ss_netstack); 550 551 opendone: 552 553 /* 554 * let specfs know that open failed part way through 555 */ 556 if (error) { 557 mutex_enter(&stp->sd_lock); 558 stp->sd_flag |= STREOPENFAIL; 559 mutex_exit(&stp->sd_lock); 560 } 561 562 /* 563 * Wake up others that are waiting for stream to be created. 564 */ 565 mutex_enter(&stp->sd_lock); 566 stp->sd_flag &= ~STWOPEN; 567 568 /* 569 * As a performance concern we are caching the values of 570 * q_minpsz and q_maxpsz of the module below the stream 571 * head in the stream head. 572 */ 573 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 574 rmin = stp->sd_wrq->q_next->q_minpsz; 575 rmax = stp->sd_wrq->q_next->q_maxpsz; 576 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 577 578 /* do this processing here as a performance concern */ 579 if (strmsgsz != 0) { 580 if (rmax == INFPSZ) 581 rmax = strmsgsz; 582 else 583 rmax = MIN(strmsgsz, rmax); 584 } 585 586 mutex_enter(QLOCK(stp->sd_wrq)); 587 stp->sd_qn_minpsz = rmin; 588 stp->sd_qn_maxpsz = rmax; 589 mutex_exit(QLOCK(stp->sd_wrq)); 590 591 fifo_opendone: 592 cv_broadcast(&stp->sd_monitor); 593 mutex_exit(&stp->sd_lock); 594 return (error); 595 } 596 597 static int strsink(queue_t *, mblk_t *); 598 static struct qinit deadrend = { 599 strsink, NULL, NULL, NULL, NULL, &strm_info, NULL 600 }; 601 static struct qinit deadwend = { 602 NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL 603 }; 604 605 /* 606 * Close a stream. 607 * This is called from closef() on the last close of an open stream. 608 * Strclean() will already have removed the siglist and pollist 609 * information, so all that remains is to remove all multiplexor links 610 * for the stream, pop all the modules (and the driver), and free the 611 * stream structure. 612 */ 613 614 int 615 strclose(struct vnode *vp, int flag, cred_t *crp) 616 { 617 struct stdata *stp; 618 queue_t *qp; 619 int rval; 620 int freestp = 1; 621 queue_t *rmq; 622 623 if (AU_AUDITING()) 624 audit_strclose(vp, flag, crp); 625 626 TRACE_1(TR_FAC_STREAMS_FR, 627 TR_STRCLOSE, "strclose:%p", vp); 628 ASSERT(vp->v_stream); 629 630 stp = vp->v_stream; 631 ASSERT(!(stp->sd_flag & STPLEX)); 632 qp = stp->sd_wrq; 633 634 /* 635 * Needed so that strpoll will return non-zero for this fd. 636 * Note that with POLLNOERR STRHUP does still cause POLLHUP. 637 */ 638 mutex_enter(&stp->sd_lock); 639 stp->sd_flag |= STRHUP; 640 mutex_exit(&stp->sd_lock); 641 642 /* 643 * If the registered process or process group did not have an 644 * open instance of this stream then strclean would not be 645 * called. Thus at the time of closing all remaining siglist entries 646 * are removed. 647 */ 648 if (stp->sd_siglist != NULL) 649 strcleanall(vp); 650 651 ASSERT(stp->sd_siglist == NULL); 652 ASSERT(stp->sd_sigflags == 0); 653 654 if (STRMATED(stp)) { 655 struct stdata *strmatep = stp->sd_mate; 656 int waited = 1; 657 658 STRLOCKMATES(stp); 659 while (waited) { 660 waited = 0; 661 while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 662 mutex_exit(&strmatep->sd_lock); 663 cv_wait(&stp->sd_monitor, &stp->sd_lock); 664 mutex_exit(&stp->sd_lock); 665 STRLOCKMATES(stp); 666 waited = 1; 667 } 668 while (strmatep->sd_flag & 669 (STWOPEN|STRCLOSE|STRPLUMB)) { 670 mutex_exit(&stp->sd_lock); 671 cv_wait(&strmatep->sd_monitor, 672 &strmatep->sd_lock); 673 mutex_exit(&strmatep->sd_lock); 674 STRLOCKMATES(stp); 675 waited = 1; 676 } 677 } 678 stp->sd_flag |= STRCLOSE; 679 STRUNLOCKMATES(stp); 680 } else { 681 mutex_enter(&stp->sd_lock); 682 stp->sd_flag |= STRCLOSE; 683 mutex_exit(&stp->sd_lock); 684 } 685 686 ASSERT(qp->q_first == NULL); /* No more delayed write */ 687 688 /* Check if an I_LINK was ever done on this stream */ 689 if (stp->sd_flag & STRHASLINKS) { 690 netstack_t *ns; 691 str_stack_t *ss; 692 693 ns = netstack_find_by_cred(crp); 694 ASSERT(ns != NULL); 695 ss = ns->netstack_str; 696 ASSERT(ss != NULL); 697 698 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss); 699 netstack_rele(ss->ss_netstack); 700 } 701 702 while (_SAMESTR(qp)) { 703 /* 704 * Holding sd_lock prevents q_next from changing in 705 * this stream. 706 */ 707 mutex_enter(&stp->sd_lock); 708 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) { 709 710 /* 711 * sleep until awakened by strwsrv() or timeout 712 */ 713 for (;;) { 714 mutex_enter(QLOCK(qp->q_next)); 715 if (!(qp->q_next->q_mblkcnt)) { 716 mutex_exit(QLOCK(qp->q_next)); 717 break; 718 } 719 stp->sd_flag |= WSLEEP; 720 721 /* ensure strwsrv gets enabled */ 722 qp->q_next->q_flag |= QWANTW; 723 mutex_exit(QLOCK(qp->q_next)); 724 /* get out if we timed out or recv'd a signal */ 725 if (str_cv_wait(&qp->q_wait, &stp->sd_lock, 726 stp->sd_closetime, 0) <= 0) { 727 break; 728 } 729 } 730 stp->sd_flag &= ~WSLEEP; 731 } 732 mutex_exit(&stp->sd_lock); 733 734 rmq = qp->q_next; 735 if (rmq->q_flag & QISDRV) { 736 ASSERT(!_SAMESTR(rmq)); 737 wait_sq_svc(_RD(qp)->q_syncq); 738 } 739 740 qdetach(_RD(rmq), 1, flag, crp, B_FALSE); 741 } 742 743 /* 744 * Since we call pollwakeup in close() now, the poll list should 745 * be empty in most cases. The only exception is the layered devices 746 * (e.g. the console drivers with redirection modules pushed on top 747 * of it). We have to do this after calling qdetach() because 748 * the redirection module won't have torn down the console 749 * redirection until after qdetach() has been invoked. 750 */ 751 if (stp->sd_pollist.ph_list != NULL) { 752 pollwakeup(&stp->sd_pollist, POLLERR); 753 pollhead_clean(&stp->sd_pollist); 754 } 755 ASSERT(stp->sd_pollist.ph_list == NULL); 756 ASSERT(stp->sd_sidp == NULL); 757 ASSERT(stp->sd_pgidp == NULL); 758 759 /* Prevent qenable from re-enabling the stream head queue */ 760 disable_svc(_RD(qp)); 761 762 /* 763 * Wait until service procedure of each queue is 764 * run, if QINSERVICE is set. 765 */ 766 wait_svc(_RD(qp)); 767 768 /* 769 * Now, flush both queues. 770 */ 771 flushq(_RD(qp), FLUSHALL); 772 flushq(qp, FLUSHALL); 773 774 /* 775 * If the write queue of the stream head is pointing to a 776 * read queue, we have a twisted stream. If the read queue 777 * is alive, convert the stream head queues into a dead end. 778 * If the read queue is dead, free the dead pair. 779 */ 780 if (qp->q_next && !_SAMESTR(qp)) { 781 if (qp->q_next->q_qinfo == &deadrend) { /* half-closed pipe */ 782 flushq(qp->q_next, FLUSHALL); /* ensure no message */ 783 shfree(qp->q_next->q_stream); 784 freeq(qp->q_next); 785 freeq(_RD(qp)); 786 } else if (qp->q_next == _RD(qp)) { /* fifo */ 787 freeq(_RD(qp)); 788 } else { /* pipe */ 789 freestp = 0; 790 /* 791 * The q_info pointers are never accessed when 792 * SQLOCK is held. 793 */ 794 ASSERT(qp->q_syncq == _RD(qp)->q_syncq); 795 mutex_enter(SQLOCK(qp->q_syncq)); 796 qp->q_qinfo = &deadwend; 797 _RD(qp)->q_qinfo = &deadrend; 798 mutex_exit(SQLOCK(qp->q_syncq)); 799 } 800 } else { 801 freeq(_RD(qp)); /* free stream head queue pair */ 802 } 803 804 mutex_enter(&vp->v_lock); 805 if (stp->sd_iocblk) { 806 if (stp->sd_iocblk != (mblk_t *)-1) { 807 freemsg(stp->sd_iocblk); 808 } 809 stp->sd_iocblk = NULL; 810 } 811 stp->sd_vnode = NULL; 812 vp->v_stream = NULL; 813 mutex_exit(&vp->v_lock); 814 mutex_enter(&stp->sd_lock); 815 freemsg(stp->sd_cmdblk); 816 stp->sd_cmdblk = NULL; 817 stp->sd_flag &= ~STRCLOSE; 818 cv_broadcast(&stp->sd_monitor); 819 mutex_exit(&stp->sd_lock); 820 821 if (freestp) 822 shfree(stp); 823 return (0); 824 } 825 826 static int 827 strsink(queue_t *q, mblk_t *bp) 828 { 829 struct copyresp *resp; 830 831 switch (bp->b_datap->db_type) { 832 case M_FLUSH: 833 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 834 *bp->b_rptr &= ~FLUSHR; 835 bp->b_flag |= MSGNOLOOP; 836 /* 837 * Protect against the driver passing up 838 * messages after it has done a qprocsoff. 839 */ 840 if (_OTHERQ(q)->q_next == NULL) 841 freemsg(bp); 842 else 843 qreply(q, bp); 844 } else { 845 freemsg(bp); 846 } 847 break; 848 849 case M_COPYIN: 850 case M_COPYOUT: 851 if (bp->b_cont) { 852 freemsg(bp->b_cont); 853 bp->b_cont = NULL; 854 } 855 bp->b_datap->db_type = M_IOCDATA; 856 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 857 resp = (struct copyresp *)bp->b_rptr; 858 resp->cp_rval = (caddr_t)1; /* failure */ 859 /* 860 * Protect against the driver passing up 861 * messages after it has done a qprocsoff. 862 */ 863 if (_OTHERQ(q)->q_next == NULL) 864 freemsg(bp); 865 else 866 qreply(q, bp); 867 break; 868 869 case M_IOCTL: 870 if (bp->b_cont) { 871 freemsg(bp->b_cont); 872 bp->b_cont = NULL; 873 } 874 bp->b_datap->db_type = M_IOCNAK; 875 /* 876 * Protect against the driver passing up 877 * messages after it has done a qprocsoff. 878 */ 879 if (_OTHERQ(q)->q_next == NULL) 880 freemsg(bp); 881 else 882 qreply(q, bp); 883 break; 884 885 default: 886 freemsg(bp); 887 break; 888 } 889 890 return (0); 891 } 892 893 /* 894 * Clean up after a process when it closes a stream. This is called 895 * from closef for all closes, whereas strclose is called only for the 896 * last close on a stream. The siglist is scanned for entries for the 897 * current process, and these are removed. 898 */ 899 void 900 strclean(struct vnode *vp) 901 { 902 strsig_t *ssp, *pssp, *tssp; 903 stdata_t *stp; 904 int update = 0; 905 906 TRACE_1(TR_FAC_STREAMS_FR, 907 TR_STRCLEAN, "strclean:%p", vp); 908 stp = vp->v_stream; 909 pssp = NULL; 910 mutex_enter(&stp->sd_lock); 911 ssp = stp->sd_siglist; 912 while (ssp) { 913 if (ssp->ss_pidp == curproc->p_pidp) { 914 tssp = ssp->ss_next; 915 if (pssp) 916 pssp->ss_next = tssp; 917 else 918 stp->sd_siglist = tssp; 919 mutex_enter(&pidlock); 920 PID_RELE(ssp->ss_pidp); 921 mutex_exit(&pidlock); 922 kmem_free(ssp, sizeof (strsig_t)); 923 update = 1; 924 ssp = tssp; 925 } else { 926 pssp = ssp; 927 ssp = ssp->ss_next; 928 } 929 } 930 if (update) { 931 stp->sd_sigflags = 0; 932 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 933 stp->sd_sigflags |= ssp->ss_events; 934 } 935 mutex_exit(&stp->sd_lock); 936 } 937 938 /* 939 * Used on the last close to remove any remaining items on the siglist. 940 * These could be present on the siglist due to I_ESETSIG calls that 941 * use process groups or processed that do not have an open file descriptor 942 * for this stream (Such entries would not be removed by strclean). 943 */ 944 static void 945 strcleanall(struct vnode *vp) 946 { 947 strsig_t *ssp, *nssp; 948 stdata_t *stp; 949 950 stp = vp->v_stream; 951 mutex_enter(&stp->sd_lock); 952 ssp = stp->sd_siglist; 953 stp->sd_siglist = NULL; 954 while (ssp) { 955 nssp = ssp->ss_next; 956 mutex_enter(&pidlock); 957 PID_RELE(ssp->ss_pidp); 958 mutex_exit(&pidlock); 959 kmem_free(ssp, sizeof (strsig_t)); 960 ssp = nssp; 961 } 962 stp->sd_sigflags = 0; 963 mutex_exit(&stp->sd_lock); 964 } 965 966 /* 967 * Retrieve the next message from the logical stream head read queue 968 * using either rwnext (if sync stream) or getq_noenab. 969 * It is the callers responsibility to call qbackenable after 970 * it is finished with the message. The caller should not call 971 * qbackenable until after any putback calls to avoid spurious backenabling. 972 */ 973 mblk_t * 974 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first, 975 int *errorp) 976 { 977 mblk_t *bp; 978 int error; 979 ssize_t rbytes = 0; 980 981 /* Holding sd_lock prevents the read queue from changing */ 982 ASSERT(MUTEX_HELD(&stp->sd_lock)); 983 984 if (uiop != NULL && stp->sd_struiordq != NULL && 985 q->q_first == NULL && 986 (!first || (stp->sd_wakeq & RSLEEP))) { 987 /* 988 * Stream supports rwnext() for the read side. 989 * If this is the first time we're called by e.g. strread 990 * only do the downcall if there is a deferred wakeup 991 * (registered in sd_wakeq). 992 */ 993 struiod_t uiod; 994 995 if (first) 996 stp->sd_wakeq &= ~RSLEEP; 997 998 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 999 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 1000 uiod.d_mp = 0; 1001 /* 1002 * Mark that a thread is in rwnext on the read side 1003 * to prevent strrput from nacking ioctls immediately. 1004 * When the last concurrent rwnext returns 1005 * the ioctls are nack'ed. 1006 */ 1007 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1008 stp->sd_struiodnak++; 1009 /* 1010 * Note: rwnext will drop sd_lock. 1011 */ 1012 error = rwnext(q, &uiod); 1013 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 1014 mutex_enter(&stp->sd_lock); 1015 stp->sd_struiodnak--; 1016 while (stp->sd_struiodnak == 0 && 1017 ((bp = stp->sd_struionak) != NULL)) { 1018 stp->sd_struionak = bp->b_next; 1019 bp->b_next = NULL; 1020 bp->b_datap->db_type = M_IOCNAK; 1021 /* 1022 * Protect against the driver passing up 1023 * messages after it has done a qprocsoff. 1024 */ 1025 if (_OTHERQ(q)->q_next == NULL) 1026 freemsg(bp); 1027 else { 1028 mutex_exit(&stp->sd_lock); 1029 qreply(q, bp); 1030 mutex_enter(&stp->sd_lock); 1031 } 1032 } 1033 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1034 if (error == 0 || error == EWOULDBLOCK) { 1035 if ((bp = uiod.d_mp) != NULL) { 1036 *errorp = 0; 1037 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1038 return (bp); 1039 } 1040 error = 0; 1041 } else if (error == EINVAL) { 1042 /* 1043 * The stream plumbing must have 1044 * changed while we were away, so 1045 * just turn off rwnext()s. 1046 */ 1047 error = 0; 1048 } else if (error == EBUSY) { 1049 /* 1050 * The module might have data in transit using putnext 1051 * Fall back on waiting + getq. 1052 */ 1053 error = 0; 1054 } else { 1055 *errorp = error; 1056 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1057 return (NULL); 1058 } 1059 /* 1060 * Try a getq in case a rwnext() generated mblk 1061 * has bubbled up via strrput(). 1062 */ 1063 } 1064 *errorp = 0; 1065 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1066 1067 /* 1068 * If we have a valid uio, try and use this as a guide for how 1069 * many bytes to retrieve from the queue via getq_noenab(). 1070 * Doing this can avoid unneccesary counting of overlong 1071 * messages in putback(). We currently only do this for sockets 1072 * and only if there is no sd_rputdatafunc hook. 1073 * 1074 * The sd_rputdatafunc hook transforms the entire message 1075 * before any bytes in it can be given to a client. So, rbytes 1076 * must be 0 if there is a hook. 1077 */ 1078 if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) && 1079 (stp->sd_rputdatafunc == NULL)) 1080 rbytes = uiop->uio_resid; 1081 1082 return (getq_noenab(q, rbytes)); 1083 } 1084 1085 /* 1086 * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'. 1087 * If the message does not fit in the uio the remainder of it is returned; 1088 * otherwise NULL is returned. Any embedded zero-length mblk_t's are 1089 * consumed, even if uio_resid reaches zero. On error, `*errorp' is set to 1090 * the error code, the message is consumed, and NULL is returned. 1091 */ 1092 static mblk_t * 1093 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp) 1094 { 1095 int error; 1096 ptrdiff_t n; 1097 mblk_t *nbp; 1098 1099 ASSERT(bp->b_wptr >= bp->b_rptr); 1100 1101 do { 1102 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) { 1103 ASSERT(n > 0); 1104 1105 error = uiomove(bp->b_rptr, n, UIO_READ, uiop); 1106 if (error != 0) { 1107 freemsg(bp); 1108 *errorp = error; 1109 return (NULL); 1110 } 1111 } 1112 1113 bp->b_rptr += n; 1114 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) { 1115 nbp = bp; 1116 bp = bp->b_cont; 1117 freeb(nbp); 1118 } 1119 } while (bp != NULL && uiop->uio_resid > 0); 1120 1121 *errorp = 0; 1122 return (bp); 1123 } 1124 1125 /* 1126 * Read a stream according to the mode flags in sd_flag: 1127 * 1128 * (default mode) - Byte stream, msg boundaries are ignored 1129 * RD_MSGDIS (msg discard) - Read on msg boundaries and throw away 1130 * any data remaining in msg 1131 * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back 1132 * any remaining data on head of read queue 1133 * 1134 * Consume readable messages on the front of the queue until 1135 * ttolwp(curthread)->lwp_count 1136 * is satisfied, the readable messages are exhausted, or a message 1137 * boundary is reached in a message mode. If no data was read and 1138 * the stream was not opened with the NDELAY flag, block until data arrives. 1139 * Otherwise return the data read and update the count. 1140 * 1141 * In default mode a 0 length message signifies end-of-file and terminates 1142 * a read in progress. The 0 length message is removed from the queue 1143 * only if it is the only message read (no data is read). 1144 * 1145 * An attempt to read an M_PROTO or M_PCPROTO message results in an 1146 * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set. 1147 * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data. 1148 * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message 1149 * are unlinked from and M_DATA blocks in the message, the protos are 1150 * thrown away, and the data is read. 1151 */ 1152 /* ARGSUSED */ 1153 int 1154 strread(struct vnode *vp, struct uio *uiop, cred_t *crp) 1155 { 1156 struct stdata *stp; 1157 mblk_t *bp, *nbp; 1158 queue_t *q; 1159 int error = 0; 1160 uint_t old_sd_flag; 1161 int first; 1162 char rflg; 1163 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 1164 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 1165 short delim; 1166 unsigned char pri = 0; 1167 char waitflag; 1168 unsigned char type; 1169 1170 TRACE_1(TR_FAC_STREAMS_FR, 1171 TR_STRREAD_ENTER, "strread:%p", vp); 1172 ASSERT(vp->v_stream); 1173 stp = vp->v_stream; 1174 1175 mutex_enter(&stp->sd_lock); 1176 1177 if ((error = i_straccess(stp, JCREAD)) != 0) { 1178 mutex_exit(&stp->sd_lock); 1179 return (error); 1180 } 1181 1182 if (stp->sd_flag & (STRDERR|STPLEX)) { 1183 error = strgeterr(stp, STRDERR|STPLEX, 0); 1184 if (error != 0) { 1185 mutex_exit(&stp->sd_lock); 1186 return (error); 1187 } 1188 } 1189 1190 /* 1191 * Loop terminates when uiop->uio_resid == 0. 1192 */ 1193 rflg = 0; 1194 waitflag = READWAIT; 1195 q = _RD(stp->sd_wrq); 1196 for (;;) { 1197 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1198 old_sd_flag = stp->sd_flag; 1199 mark = 0; 1200 delim = 0; 1201 first = 1; 1202 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) { 1203 int done = 0; 1204 1205 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1206 1207 if (error != 0) 1208 goto oops; 1209 1210 if (stp->sd_flag & (STRHUP|STREOF)) { 1211 goto oops; 1212 } 1213 if (rflg && !(stp->sd_flag & STRDELIM)) { 1214 goto oops; 1215 } 1216 /* 1217 * If a read(fd,buf,0) has been done, there is no 1218 * need to sleep. We always have zero bytes to 1219 * return. 1220 */ 1221 if (uiop->uio_resid == 0) { 1222 goto oops; 1223 } 1224 1225 qbackenable(q, 0); 1226 1227 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT, 1228 "strread calls strwaitq:%p, %p, %p", 1229 vp, uiop, crp); 1230 if ((error = strwaitq(stp, waitflag, uiop->uio_resid, 1231 uiop->uio_fmode, -1, &done)) != 0 || done) { 1232 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE, 1233 "strread error or done:%p, %p, %p", 1234 vp, uiop, crp); 1235 if ((uiop->uio_fmode & FNDELAY) && 1236 (stp->sd_flag & OLDNDELAY) && 1237 (error == EAGAIN)) 1238 error = 0; 1239 goto oops; 1240 } 1241 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE, 1242 "strread awakes:%p, %p, %p", vp, uiop, crp); 1243 if ((error = i_straccess(stp, JCREAD)) != 0) { 1244 goto oops; 1245 } 1246 first = 0; 1247 } 1248 1249 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1250 ASSERT(bp); 1251 pri = bp->b_band; 1252 /* 1253 * Extract any mark information. If the message is not 1254 * completely consumed this information will be put in the mblk 1255 * that is putback. 1256 * If MSGMARKNEXT is set and the message is completely consumed 1257 * the STRATMARK flag will be set below. Likewise, if 1258 * MSGNOTMARKNEXT is set and the message is 1259 * completely consumed STRNOTATMARK will be set. 1260 * 1261 * For some unknown reason strread only breaks the read at the 1262 * last mark. 1263 */ 1264 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 1265 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 1266 (MSGMARKNEXT|MSGNOTMARKNEXT)); 1267 if (mark != 0 && bp == stp->sd_mark) { 1268 if (rflg) { 1269 putback(stp, q, bp, pri); 1270 goto oops; 1271 } 1272 mark |= _LASTMARK; 1273 stp->sd_mark = NULL; 1274 } 1275 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM)) 1276 delim = 1; 1277 mutex_exit(&stp->sd_lock); 1278 1279 if (STREAM_NEEDSERVICE(stp)) 1280 stream_runservice(stp); 1281 1282 type = bp->b_datap->db_type; 1283 1284 switch (type) { 1285 1286 case M_DATA: 1287 ismdata: 1288 if (msgnodata(bp)) { 1289 if (mark || delim) { 1290 freemsg(bp); 1291 } else if (rflg) { 1292 1293 /* 1294 * If already read data put zero 1295 * length message back on queue else 1296 * free msg and return 0. 1297 */ 1298 bp->b_band = pri; 1299 mutex_enter(&stp->sd_lock); 1300 putback(stp, q, bp, pri); 1301 mutex_exit(&stp->sd_lock); 1302 } else { 1303 freemsg(bp); 1304 } 1305 error = 0; 1306 goto oops1; 1307 } 1308 1309 rflg = 1; 1310 waitflag |= NOINTR; 1311 bp = struiocopyout(bp, uiop, &error); 1312 if (error != 0) 1313 goto oops1; 1314 1315 mutex_enter(&stp->sd_lock); 1316 if (bp) { 1317 /* 1318 * Have remaining data in message. 1319 * Free msg if in discard mode. 1320 */ 1321 if (stp->sd_read_opt & RD_MSGDIS) { 1322 freemsg(bp); 1323 } else { 1324 bp->b_band = pri; 1325 if ((mark & _LASTMARK) && 1326 (stp->sd_mark == NULL)) 1327 stp->sd_mark = bp; 1328 bp->b_flag |= mark & ~_LASTMARK; 1329 if (delim) 1330 bp->b_flag |= MSGDELIM; 1331 if (msgnodata(bp)) 1332 freemsg(bp); 1333 else 1334 putback(stp, q, bp, pri); 1335 } 1336 } else { 1337 /* 1338 * Consumed the complete message. 1339 * Move the MSG*MARKNEXT information 1340 * to the stream head just in case 1341 * the read queue becomes empty. 1342 * 1343 * If the stream head was at the mark 1344 * (STRATMARK) before we dropped sd_lock above 1345 * and some data was consumed then we have 1346 * moved past the mark thus STRATMARK is 1347 * cleared. However, if a message arrived in 1348 * strrput during the copyout above causing 1349 * STRATMARK to be set we can not clear that 1350 * flag. 1351 */ 1352 if (mark & 1353 (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 1354 if (mark & MSGMARKNEXT) { 1355 stp->sd_flag &= ~STRNOTATMARK; 1356 stp->sd_flag |= STRATMARK; 1357 } else if (mark & MSGNOTMARKNEXT) { 1358 stp->sd_flag &= ~STRATMARK; 1359 stp->sd_flag |= STRNOTATMARK; 1360 } else { 1361 stp->sd_flag &= 1362 ~(STRATMARK|STRNOTATMARK); 1363 } 1364 } else if (rflg && (old_sd_flag & STRATMARK)) { 1365 stp->sd_flag &= ~STRATMARK; 1366 } 1367 } 1368 1369 /* 1370 * Check for signal messages at the front of the read 1371 * queue and generate the signal(s) if appropriate. 1372 * The only signal that can be on queue is M_SIG at 1373 * this point. 1374 */ 1375 while ((((bp = q->q_first)) != NULL) && 1376 (bp->b_datap->db_type == M_SIG)) { 1377 bp = getq_noenab(q, 0); 1378 /* 1379 * sd_lock is held so the content of the 1380 * read queue can not change. 1381 */ 1382 ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG); 1383 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 1384 mutex_exit(&stp->sd_lock); 1385 freemsg(bp); 1386 if (STREAM_NEEDSERVICE(stp)) 1387 stream_runservice(stp); 1388 mutex_enter(&stp->sd_lock); 1389 } 1390 1391 if ((uiop->uio_resid == 0) || (mark & _LASTMARK) || 1392 delim || 1393 (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) { 1394 goto oops; 1395 } 1396 continue; 1397 1398 case M_SIG: 1399 strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band); 1400 freemsg(bp); 1401 mutex_enter(&stp->sd_lock); 1402 continue; 1403 1404 case M_PROTO: 1405 case M_PCPROTO: 1406 /* 1407 * Only data messages are readable. 1408 * Any others generate an error, unless 1409 * RD_PROTDIS or RD_PROTDAT is set. 1410 */ 1411 if (stp->sd_read_opt & RD_PROTDAT) { 1412 for (nbp = bp; nbp; nbp = nbp->b_next) { 1413 if ((nbp->b_datap->db_type == 1414 M_PROTO) || 1415 (nbp->b_datap->db_type == 1416 M_PCPROTO)) { 1417 nbp->b_datap->db_type = M_DATA; 1418 } else { 1419 break; 1420 } 1421 } 1422 /* 1423 * clear stream head hi pri flag based on 1424 * first message 1425 */ 1426 if (type == M_PCPROTO) { 1427 mutex_enter(&stp->sd_lock); 1428 stp->sd_flag &= ~STRPRI; 1429 mutex_exit(&stp->sd_lock); 1430 } 1431 goto ismdata; 1432 } else if (stp->sd_read_opt & RD_PROTDIS) { 1433 /* 1434 * discard non-data messages 1435 */ 1436 while (bp && 1437 ((bp->b_datap->db_type == M_PROTO) || 1438 (bp->b_datap->db_type == M_PCPROTO))) { 1439 nbp = unlinkb(bp); 1440 freeb(bp); 1441 bp = nbp; 1442 } 1443 /* 1444 * clear stream head hi pri flag based on 1445 * first message 1446 */ 1447 if (type == M_PCPROTO) { 1448 mutex_enter(&stp->sd_lock); 1449 stp->sd_flag &= ~STRPRI; 1450 mutex_exit(&stp->sd_lock); 1451 } 1452 if (bp) { 1453 bp->b_band = pri; 1454 goto ismdata; 1455 } else { 1456 break; 1457 } 1458 } 1459 /* FALLTHRU */ 1460 case M_PASSFP: 1461 if ((bp->b_datap->db_type == M_PASSFP) && 1462 (stp->sd_read_opt & RD_PROTDIS)) { 1463 freemsg(bp); 1464 break; 1465 } 1466 mutex_enter(&stp->sd_lock); 1467 putback(stp, q, bp, pri); 1468 mutex_exit(&stp->sd_lock); 1469 if (rflg == 0) 1470 error = EBADMSG; 1471 goto oops1; 1472 1473 default: 1474 /* 1475 * Garbage on stream head read queue. 1476 */ 1477 cmn_err(CE_WARN, "bad %x found at stream head\n", 1478 bp->b_datap->db_type); 1479 freemsg(bp); 1480 goto oops1; 1481 } 1482 mutex_enter(&stp->sd_lock); 1483 } 1484 oops: 1485 mutex_exit(&stp->sd_lock); 1486 oops1: 1487 qbackenable(q, pri); 1488 return (error); 1489 #undef _LASTMARK 1490 } 1491 1492 /* 1493 * Default processing of M_PROTO/M_PCPROTO messages. 1494 * Determine which wakeups and signals are needed. 1495 * This can be replaced by a user-specified procedure for kernel users 1496 * of STREAMS. 1497 */ 1498 /* ARGSUSED */ 1499 mblk_t * 1500 strrput_proto(vnode_t *vp, mblk_t *mp, 1501 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1502 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1503 { 1504 *wakeups = RSLEEP; 1505 *allmsgsigs = 0; 1506 1507 switch (mp->b_datap->db_type) { 1508 case M_PROTO: 1509 if (mp->b_band == 0) { 1510 *firstmsgsigs = S_INPUT | S_RDNORM; 1511 *pollwakeups = POLLIN | POLLRDNORM; 1512 } else { 1513 *firstmsgsigs = S_INPUT | S_RDBAND; 1514 *pollwakeups = POLLIN | POLLRDBAND; 1515 } 1516 break; 1517 case M_PCPROTO: 1518 *firstmsgsigs = S_HIPRI; 1519 *pollwakeups = POLLPRI; 1520 break; 1521 } 1522 return (mp); 1523 } 1524 1525 /* 1526 * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and 1527 * M_PASSFP messages. 1528 * Determine which wakeups and signals are needed. 1529 * This can be replaced by a user-specified procedure for kernel users 1530 * of STREAMS. 1531 */ 1532 /* ARGSUSED */ 1533 mblk_t * 1534 strrput_misc(vnode_t *vp, mblk_t *mp, 1535 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1536 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1537 { 1538 *wakeups = 0; 1539 *firstmsgsigs = 0; 1540 *allmsgsigs = 0; 1541 *pollwakeups = 0; 1542 return (mp); 1543 } 1544 1545 /* 1546 * Stream read put procedure. Called from downstream driver/module 1547 * with messages for the stream head. Data, protocol, and in-stream 1548 * signal messages are placed on the queue, others are handled directly. 1549 */ 1550 int 1551 strrput(queue_t *q, mblk_t *bp) 1552 { 1553 struct stdata *stp; 1554 ulong_t rput_opt; 1555 strwakeup_t wakeups; 1556 strsigset_t firstmsgsigs; /* Signals if first message on queue */ 1557 strsigset_t allmsgsigs; /* Signals for all messages */ 1558 strsigset_t signals; /* Signals events to generate */ 1559 strpollset_t pollwakeups; 1560 mblk_t *nextbp; 1561 uchar_t band = 0; 1562 int hipri_sig; 1563 1564 stp = (struct stdata *)q->q_ptr; 1565 /* 1566 * Use rput_opt for optimized access to the SR_ flags except 1567 * SR_POLLIN. That flag has to be checked under sd_lock since it 1568 * is modified by strpoll(). 1569 */ 1570 rput_opt = stp->sd_rput_opt; 1571 1572 ASSERT(qclaimed(q)); 1573 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER, 1574 "strrput called with message type:q %p bp %p", q, bp); 1575 1576 /* 1577 * Perform initial processing and pass to the parameterized functions. 1578 */ 1579 ASSERT(bp->b_next == NULL); 1580 1581 switch (bp->b_datap->db_type) { 1582 case M_DATA: 1583 /* 1584 * sockfs is the only consumer of STREOF and when it is set, 1585 * it implies that the receiver is not interested in receiving 1586 * any more data, hence the mblk is freed to prevent unnecessary 1587 * message queueing at the stream head. 1588 */ 1589 if (stp->sd_flag == STREOF) { 1590 freemsg(bp); 1591 return (0); 1592 } 1593 if ((rput_opt & SR_IGN_ZEROLEN) && 1594 bp->b_rptr == bp->b_wptr && msgnodata(bp)) { 1595 /* 1596 * Ignore zero-length M_DATA messages. These might be 1597 * generated by some transports. 1598 * The zero-length M_DATA messages, even if they 1599 * are ignored, should effect the atmark tracking and 1600 * should wake up a thread sleeping in strwaitmark. 1601 */ 1602 mutex_enter(&stp->sd_lock); 1603 if (bp->b_flag & MSGMARKNEXT) { 1604 /* 1605 * Record the position of the mark either 1606 * in q_last or in STRATMARK. 1607 */ 1608 if (q->q_last != NULL) { 1609 q->q_last->b_flag &= ~MSGNOTMARKNEXT; 1610 q->q_last->b_flag |= MSGMARKNEXT; 1611 } else { 1612 stp->sd_flag &= ~STRNOTATMARK; 1613 stp->sd_flag |= STRATMARK; 1614 } 1615 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1616 /* 1617 * Record that this is not the position of 1618 * the mark either in q_last or in 1619 * STRNOTATMARK. 1620 */ 1621 if (q->q_last != NULL) { 1622 q->q_last->b_flag &= ~MSGMARKNEXT; 1623 q->q_last->b_flag |= MSGNOTMARKNEXT; 1624 } else { 1625 stp->sd_flag &= ~STRATMARK; 1626 stp->sd_flag |= STRNOTATMARK; 1627 } 1628 } 1629 if (stp->sd_flag & RSLEEP) { 1630 stp->sd_flag &= ~RSLEEP; 1631 cv_broadcast(&q->q_wait); 1632 } 1633 mutex_exit(&stp->sd_lock); 1634 freemsg(bp); 1635 return (0); 1636 } 1637 wakeups = RSLEEP; 1638 if (bp->b_band == 0) { 1639 firstmsgsigs = S_INPUT | S_RDNORM; 1640 pollwakeups = POLLIN | POLLRDNORM; 1641 } else { 1642 firstmsgsigs = S_INPUT | S_RDBAND; 1643 pollwakeups = POLLIN | POLLRDBAND; 1644 } 1645 if (rput_opt & SR_SIGALLDATA) 1646 allmsgsigs = firstmsgsigs; 1647 else 1648 allmsgsigs = 0; 1649 1650 mutex_enter(&stp->sd_lock); 1651 if ((rput_opt & SR_CONSOL_DATA) && 1652 (q->q_last != NULL) && 1653 (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) { 1654 /* 1655 * Consolidate an M_DATA message onto an M_DATA, 1656 * M_PROTO, or M_PCPROTO by merging it with q_last. 1657 * The consolidation does not take place if 1658 * the old message is marked with either of the 1659 * marks or the delim flag or if the new 1660 * message is marked with MSGMARK. The MSGMARK 1661 * check is needed to handle the odd semantics of 1662 * MSGMARK where essentially the whole message 1663 * is to be treated as marked. 1664 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from the 1665 * new message to the front of the b_cont chain. 1666 */ 1667 mblk_t *lbp = q->q_last; 1668 unsigned char db_type = lbp->b_datap->db_type; 1669 1670 if ((db_type == M_DATA || db_type == M_PROTO || 1671 db_type == M_PCPROTO) && 1672 !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) { 1673 rmvq_noenab(q, lbp); 1674 /* 1675 * The first message in the b_cont list 1676 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 1677 * We need to handle the case where we 1678 * are appending: 1679 * 1680 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 1681 * 2) a MSGMARKNEXT to a plain message. 1682 * 3) a MSGNOTMARKNEXT to a plain message 1683 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 1684 * message. 1685 * 1686 * Thus we never append a MSGMARKNEXT or 1687 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 1688 */ 1689 if (bp->b_flag & MSGMARKNEXT) { 1690 lbp->b_flag |= MSGMARKNEXT; 1691 lbp->b_flag &= ~MSGNOTMARKNEXT; 1692 bp->b_flag &= ~MSGMARKNEXT; 1693 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1694 lbp->b_flag |= MSGNOTMARKNEXT; 1695 bp->b_flag &= ~MSGNOTMARKNEXT; 1696 } 1697 1698 linkb(lbp, bp); 1699 bp = lbp; 1700 /* 1701 * The new message logically isn't the first 1702 * even though the q_first check below thinks 1703 * it is. Clear the firstmsgsigs to make it 1704 * not appear to be first. 1705 */ 1706 firstmsgsigs = 0; 1707 } 1708 } 1709 break; 1710 1711 case M_PASSFP: 1712 wakeups = RSLEEP; 1713 allmsgsigs = 0; 1714 if (bp->b_band == 0) { 1715 firstmsgsigs = S_INPUT | S_RDNORM; 1716 pollwakeups = POLLIN | POLLRDNORM; 1717 } else { 1718 firstmsgsigs = S_INPUT | S_RDBAND; 1719 pollwakeups = POLLIN | POLLRDBAND; 1720 } 1721 mutex_enter(&stp->sd_lock); 1722 break; 1723 1724 case M_PROTO: 1725 case M_PCPROTO: 1726 ASSERT(stp->sd_rprotofunc != NULL); 1727 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp, 1728 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1729 #define ALLSIG (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\ 1730 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG) 1731 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\ 1732 POLLWRBAND) 1733 1734 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1735 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1736 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1737 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1738 1739 mutex_enter(&stp->sd_lock); 1740 break; 1741 1742 default: 1743 ASSERT(stp->sd_rmiscfunc != NULL); 1744 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp, 1745 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1746 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1747 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1748 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1749 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1750 #undef ALLSIG 1751 #undef ALLPOLL 1752 mutex_enter(&stp->sd_lock); 1753 break; 1754 } 1755 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1756 1757 /* By default generate superset of signals */ 1758 signals = (firstmsgsigs | allmsgsigs); 1759 1760 /* 1761 * The proto and misc functions can return multiple messages 1762 * as a b_next chain. Such messages are processed separately. 1763 */ 1764 one_more: 1765 hipri_sig = 0; 1766 if (bp == NULL) { 1767 nextbp = NULL; 1768 } else { 1769 nextbp = bp->b_next; 1770 bp->b_next = NULL; 1771 1772 switch (bp->b_datap->db_type) { 1773 case M_PCPROTO: 1774 /* 1775 * Only one priority protocol message is allowed at the 1776 * stream head at a time. 1777 */ 1778 if (stp->sd_flag & STRPRI) { 1779 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR, 1780 "M_PCPROTO already at head"); 1781 freemsg(bp); 1782 mutex_exit(&stp->sd_lock); 1783 goto done; 1784 } 1785 stp->sd_flag |= STRPRI; 1786 hipri_sig = 1; 1787 /* FALLTHRU */ 1788 case M_DATA: 1789 case M_PROTO: 1790 case M_PASSFP: 1791 band = bp->b_band; 1792 /* 1793 * Marking doesn't work well when messages 1794 * are marked in more than one band. We only 1795 * remember the last message received, even if 1796 * it is placed on the queue ahead of other 1797 * marked messages. 1798 */ 1799 if (bp->b_flag & MSGMARK) 1800 stp->sd_mark = bp; 1801 (void) putq(q, bp); 1802 1803 /* 1804 * If message is a PCPROTO message, always use 1805 * firstmsgsigs to determine if a signal should be 1806 * sent as strrput is the only place to send 1807 * signals for PCPROTO. Other messages are based on 1808 * the STRGETINPROG flag. The flag determines if 1809 * strrput or (k)strgetmsg will be responsible for 1810 * sending the signals, in the firstmsgsigs case. 1811 */ 1812 if ((hipri_sig == 1) || 1813 (((stp->sd_flag & STRGETINPROG) == 0) && 1814 (q->q_first == bp))) 1815 signals = (firstmsgsigs | allmsgsigs); 1816 else 1817 signals = allmsgsigs; 1818 break; 1819 1820 default: 1821 mutex_exit(&stp->sd_lock); 1822 (void) strrput_nondata(q, bp); 1823 mutex_enter(&stp->sd_lock); 1824 break; 1825 } 1826 } 1827 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1828 /* 1829 * Wake sleeping read/getmsg and cancel deferred wakeup 1830 */ 1831 if (wakeups & RSLEEP) 1832 stp->sd_wakeq &= ~RSLEEP; 1833 1834 wakeups &= stp->sd_flag; 1835 if (wakeups & RSLEEP) { 1836 stp->sd_flag &= ~RSLEEP; 1837 cv_broadcast(&q->q_wait); 1838 } 1839 if (wakeups & WSLEEP) { 1840 stp->sd_flag &= ~WSLEEP; 1841 cv_broadcast(&_WR(q)->q_wait); 1842 } 1843 1844 if (pollwakeups != 0) { 1845 if (pollwakeups == (POLLIN | POLLRDNORM)) { 1846 /* 1847 * Can't use rput_opt since it was not 1848 * read when sd_lock was held and SR_POLLIN is changed 1849 * by strpoll() under sd_lock. 1850 */ 1851 if (!(stp->sd_rput_opt & SR_POLLIN)) 1852 goto no_pollwake; 1853 stp->sd_rput_opt &= ~SR_POLLIN; 1854 } 1855 mutex_exit(&stp->sd_lock); 1856 pollwakeup(&stp->sd_pollist, pollwakeups); 1857 mutex_enter(&stp->sd_lock); 1858 } 1859 no_pollwake: 1860 1861 /* 1862 * strsendsig can handle multiple signals with a 1863 * single call. 1864 */ 1865 if (stp->sd_sigflags & signals) 1866 strsendsig(stp->sd_siglist, signals, band, 0); 1867 mutex_exit(&stp->sd_lock); 1868 1869 1870 done: 1871 if (nextbp == NULL) 1872 return (0); 1873 1874 /* 1875 * Any signals were handled the first time. 1876 * Wakeups and pollwakeups are redone to avoid any race 1877 * conditions - all the messages are not queued until the 1878 * last message has been processed by strrput. 1879 */ 1880 bp = nextbp; 1881 signals = firstmsgsigs = allmsgsigs = 0; 1882 mutex_enter(&stp->sd_lock); 1883 goto one_more; 1884 } 1885 1886 static void 1887 log_dupioc(queue_t *rq, mblk_t *bp) 1888 { 1889 queue_t *wq, *qp; 1890 char *modnames, *mnp, *dname; 1891 size_t maxmodstr; 1892 boolean_t islast; 1893 1894 /* 1895 * Allocate a buffer large enough to hold the names of nstrpush modules 1896 * and one driver, with spaces between and NUL terminator. If we can't 1897 * get memory, then we'll just log the driver name. 1898 */ 1899 maxmodstr = nstrpush * (FMNAMESZ + 1); 1900 mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP); 1901 1902 /* march down write side to print log message down to the driver */ 1903 wq = WR(rq); 1904 1905 /* make sure q_next doesn't shift around while we're grabbing data */ 1906 claimstr(wq); 1907 qp = wq->q_next; 1908 do { 1909 dname = Q2NAME(qp); 1910 islast = !SAMESTR(qp) || qp->q_next == NULL; 1911 if (modnames == NULL) { 1912 /* 1913 * If we don't have memory, then get the driver name in 1914 * the log where we can see it. Note that memory 1915 * pressure is a possible cause of these sorts of bugs. 1916 */ 1917 if (islast) { 1918 modnames = dname; 1919 maxmodstr = 0; 1920 } 1921 } else { 1922 mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname); 1923 if (!islast) 1924 *mnp++ = ' '; 1925 } 1926 qp = qp->q_next; 1927 } while (!islast); 1928 releasestr(wq); 1929 /* Cannot happen unless stream head is corrupt. */ 1930 ASSERT(modnames != NULL); 1931 (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1, 1932 SL_CONSOLE|SL_TRACE|SL_ERROR, 1933 "Warning: stream %p received duplicate %X M_IOC%s; module list: %s", 1934 rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd, 1935 (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames); 1936 if (maxmodstr != 0) 1937 kmem_free(modnames, maxmodstr); 1938 } 1939 1940 int 1941 strrput_nondata(queue_t *q, mblk_t *bp) 1942 { 1943 struct stdata *stp; 1944 struct iocblk *iocbp; 1945 struct stroptions *sop; 1946 struct copyreq *reqp; 1947 struct copyresp *resp; 1948 unsigned char bpri; 1949 unsigned char flushed_already = 0; 1950 1951 stp = (struct stdata *)q->q_ptr; 1952 1953 ASSERT(!(stp->sd_flag & STPLEX)); 1954 ASSERT(qclaimed(q)); 1955 1956 switch (bp->b_datap->db_type) { 1957 case M_ERROR: 1958 /* 1959 * An error has occurred downstream, the errno is in the first 1960 * bytes of the message. 1961 */ 1962 if ((bp->b_wptr - bp->b_rptr) == 2) { /* New flavor */ 1963 unsigned char rw = 0; 1964 1965 mutex_enter(&stp->sd_lock); 1966 if (*bp->b_rptr != NOERROR) { /* read error */ 1967 if (*bp->b_rptr != 0) { 1968 if (stp->sd_flag & STRDERR) 1969 flushed_already |= FLUSHR; 1970 stp->sd_flag |= STRDERR; 1971 rw |= FLUSHR; 1972 } else { 1973 stp->sd_flag &= ~STRDERR; 1974 } 1975 stp->sd_rerror = *bp->b_rptr; 1976 } 1977 bp->b_rptr++; 1978 if (*bp->b_rptr != NOERROR) { /* write error */ 1979 if (*bp->b_rptr != 0) { 1980 if (stp->sd_flag & STWRERR) 1981 flushed_already |= FLUSHW; 1982 stp->sd_flag |= STWRERR; 1983 rw |= FLUSHW; 1984 } else { 1985 stp->sd_flag &= ~STWRERR; 1986 } 1987 stp->sd_werror = *bp->b_rptr; 1988 } 1989 if (rw) { 1990 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE, 1991 "strrput cv_broadcast:q %p, bp %p", 1992 q, bp); 1993 cv_broadcast(&q->q_wait); /* readers */ 1994 cv_broadcast(&_WR(q)->q_wait); /* writers */ 1995 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 1996 1997 mutex_exit(&stp->sd_lock); 1998 pollwakeup(&stp->sd_pollist, POLLERR); 1999 mutex_enter(&stp->sd_lock); 2000 2001 if (stp->sd_sigflags & S_ERROR) 2002 strsendsig(stp->sd_siglist, S_ERROR, 0, 2003 ((rw & FLUSHR) ? stp->sd_rerror : 2004 stp->sd_werror)); 2005 mutex_exit(&stp->sd_lock); 2006 /* 2007 * Send the M_FLUSH only 2008 * for the first M_ERROR 2009 * message on the stream 2010 */ 2011 if (flushed_already == rw) { 2012 freemsg(bp); 2013 return (0); 2014 } 2015 2016 bp->b_datap->db_type = M_FLUSH; 2017 *bp->b_rptr = rw; 2018 bp->b_wptr = bp->b_rptr + 1; 2019 /* 2020 * Protect against the driver 2021 * passing up messages after 2022 * it has done a qprocsoff 2023 */ 2024 if (_OTHERQ(q)->q_next == NULL) 2025 freemsg(bp); 2026 else 2027 qreply(q, bp); 2028 return (0); 2029 } else 2030 mutex_exit(&stp->sd_lock); 2031 } else if (*bp->b_rptr != 0) { /* Old flavor */ 2032 if (stp->sd_flag & (STRDERR|STWRERR)) 2033 flushed_already = FLUSHRW; 2034 mutex_enter(&stp->sd_lock); 2035 stp->sd_flag |= (STRDERR|STWRERR); 2036 stp->sd_rerror = *bp->b_rptr; 2037 stp->sd_werror = *bp->b_rptr; 2038 TRACE_2(TR_FAC_STREAMS_FR, 2039 TR_STRRPUT_WAKE2, 2040 "strrput wakeup #2:q %p, bp %p", q, bp); 2041 cv_broadcast(&q->q_wait); /* the readers */ 2042 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2043 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 2044 2045 mutex_exit(&stp->sd_lock); 2046 pollwakeup(&stp->sd_pollist, POLLERR); 2047 mutex_enter(&stp->sd_lock); 2048 2049 if (stp->sd_sigflags & S_ERROR) 2050 strsendsig(stp->sd_siglist, S_ERROR, 0, 2051 (stp->sd_werror ? stp->sd_werror : 2052 stp->sd_rerror)); 2053 mutex_exit(&stp->sd_lock); 2054 2055 /* 2056 * Send the M_FLUSH only 2057 * for the first M_ERROR 2058 * message on the stream 2059 */ 2060 if (flushed_already != FLUSHRW) { 2061 bp->b_datap->db_type = M_FLUSH; 2062 *bp->b_rptr = FLUSHRW; 2063 /* 2064 * Protect against the driver passing up 2065 * messages after it has done a 2066 * qprocsoff. 2067 */ 2068 if (_OTHERQ(q)->q_next == NULL) 2069 freemsg(bp); 2070 else 2071 qreply(q, bp); 2072 return (0); 2073 } 2074 } 2075 freemsg(bp); 2076 return (0); 2077 2078 case M_HANGUP: 2079 2080 freemsg(bp); 2081 mutex_enter(&stp->sd_lock); 2082 stp->sd_werror = ENXIO; 2083 stp->sd_flag |= STRHUP; 2084 stp->sd_flag &= ~(WSLEEP|RSLEEP); 2085 2086 /* 2087 * send signal if controlling tty 2088 */ 2089 2090 if (stp->sd_sidp) { 2091 prsignal(stp->sd_sidp, SIGHUP); 2092 if (stp->sd_sidp != stp->sd_pgidp) 2093 pgsignal(stp->sd_pgidp, SIGTSTP); 2094 } 2095 2096 /* 2097 * wake up read, write, and exception pollers and 2098 * reset wakeup mechanism. 2099 */ 2100 cv_broadcast(&q->q_wait); /* the readers */ 2101 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2102 cv_broadcast(&stp->sd_monitor); /* the ioctllers */ 2103 strhup(stp); 2104 mutex_exit(&stp->sd_lock); 2105 return (0); 2106 2107 case M_UNHANGUP: 2108 freemsg(bp); 2109 mutex_enter(&stp->sd_lock); 2110 stp->sd_werror = 0; 2111 stp->sd_flag &= ~STRHUP; 2112 mutex_exit(&stp->sd_lock); 2113 return (0); 2114 2115 case M_SIG: 2116 /* 2117 * Someone downstream wants to post a signal. The 2118 * signal to post is contained in the first byte of the 2119 * message. If the message would go on the front of 2120 * the queue, send a signal to the process group 2121 * (if not SIGPOLL) or to the siglist processes 2122 * (SIGPOLL). If something is already on the queue, 2123 * OR if we are delivering a delayed suspend (*sigh* 2124 * another "tty" hack) and there's no one sleeping already, 2125 * just enqueue the message. 2126 */ 2127 mutex_enter(&stp->sd_lock); 2128 if (q->q_first || (*bp->b_rptr == SIGTSTP && 2129 !(stp->sd_flag & RSLEEP))) { 2130 (void) putq(q, bp); 2131 mutex_exit(&stp->sd_lock); 2132 return (0); 2133 } 2134 mutex_exit(&stp->sd_lock); 2135 /* FALLTHRU */ 2136 2137 case M_PCSIG: 2138 /* 2139 * Don't enqueue, just post the signal. 2140 */ 2141 strsignal(stp, *bp->b_rptr, 0L); 2142 freemsg(bp); 2143 return (0); 2144 2145 case M_CMD: 2146 if (MBLKL(bp) != sizeof (cmdblk_t)) { 2147 freemsg(bp); 2148 return (0); 2149 } 2150 2151 mutex_enter(&stp->sd_lock); 2152 if (stp->sd_flag & STRCMDWAIT) { 2153 ASSERT(stp->sd_cmdblk == NULL); 2154 stp->sd_cmdblk = bp; 2155 cv_broadcast(&stp->sd_monitor); 2156 mutex_exit(&stp->sd_lock); 2157 } else { 2158 mutex_exit(&stp->sd_lock); 2159 freemsg(bp); 2160 } 2161 return (0); 2162 2163 case M_FLUSH: 2164 /* 2165 * Flush queues. The indication of which queues to flush 2166 * is in the first byte of the message. If the read queue 2167 * is specified, then flush it. If FLUSHBAND is set, just 2168 * flush the band specified by the second byte of the message. 2169 * 2170 * If a module has issued a M_SETOPT to not flush hi 2171 * priority messages off of the stream head, then pass this 2172 * flag into the flushq code to preserve such messages. 2173 */ 2174 2175 if (*bp->b_rptr & FLUSHR) { 2176 mutex_enter(&stp->sd_lock); 2177 if (*bp->b_rptr & FLUSHBAND) { 2178 ASSERT((bp->b_wptr - bp->b_rptr) >= 2); 2179 flushband(q, *(bp->b_rptr + 1), FLUSHALL); 2180 } else 2181 flushq_common(q, FLUSHALL, 2182 stp->sd_read_opt & RFLUSHPCPROT); 2183 if ((q->q_first == NULL) || 2184 (q->q_first->b_datap->db_type < QPCTL)) 2185 stp->sd_flag &= ~STRPRI; 2186 else { 2187 ASSERT(stp->sd_flag & STRPRI); 2188 } 2189 mutex_exit(&stp->sd_lock); 2190 } 2191 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 2192 *bp->b_rptr &= ~FLUSHR; 2193 bp->b_flag |= MSGNOLOOP; 2194 /* 2195 * Protect against the driver passing up 2196 * messages after it has done a qprocsoff. 2197 */ 2198 if (_OTHERQ(q)->q_next == NULL) 2199 freemsg(bp); 2200 else 2201 qreply(q, bp); 2202 return (0); 2203 } 2204 freemsg(bp); 2205 return (0); 2206 2207 case M_IOCACK: 2208 case M_IOCNAK: 2209 iocbp = (struct iocblk *)bp->b_rptr; 2210 /* 2211 * If not waiting for ACK or NAK then just free msg. 2212 * If incorrect id sequence number then just free msg. 2213 * If already have ACK or NAK for user then this is a 2214 * duplicate, display a warning and free the msg. 2215 */ 2216 mutex_enter(&stp->sd_lock); 2217 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2218 (stp->sd_iocid != iocbp->ioc_id)) { 2219 /* 2220 * If the ACK/NAK is a dup, display a message 2221 * Dup is when sd_iocid == ioc_id, and 2222 * sd_iocblk == <valid ptr> or -1 (the former 2223 * is when an ioctl has been put on the stream 2224 * head, but has not yet been consumed, the 2225 * later is when it has been consumed). 2226 */ 2227 if ((stp->sd_iocid == iocbp->ioc_id) && 2228 (stp->sd_iocblk != NULL)) { 2229 log_dupioc(q, bp); 2230 } 2231 freemsg(bp); 2232 mutex_exit(&stp->sd_lock); 2233 return (0); 2234 } 2235 2236 /* 2237 * Assign ACK or NAK to user and wake up. 2238 */ 2239 stp->sd_iocblk = bp; 2240 cv_broadcast(&stp->sd_monitor); 2241 mutex_exit(&stp->sd_lock); 2242 return (0); 2243 2244 case M_COPYIN: 2245 case M_COPYOUT: 2246 reqp = (struct copyreq *)bp->b_rptr; 2247 2248 /* 2249 * If not waiting for ACK or NAK then just fail request. 2250 * If already have ACK, NAK, or copy request, then just 2251 * fail request. 2252 * If incorrect id sequence number then just fail request. 2253 */ 2254 mutex_enter(&stp->sd_lock); 2255 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2256 (stp->sd_iocid != reqp->cq_id)) { 2257 if (bp->b_cont) { 2258 freemsg(bp->b_cont); 2259 bp->b_cont = NULL; 2260 } 2261 bp->b_datap->db_type = M_IOCDATA; 2262 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 2263 resp = (struct copyresp *)bp->b_rptr; 2264 resp->cp_rval = (caddr_t)1; /* failure */ 2265 mutex_exit(&stp->sd_lock); 2266 putnext(stp->sd_wrq, bp); 2267 return (0); 2268 } 2269 2270 /* 2271 * Assign copy request to user and wake up. 2272 */ 2273 stp->sd_iocblk = bp; 2274 cv_broadcast(&stp->sd_monitor); 2275 mutex_exit(&stp->sd_lock); 2276 return (0); 2277 2278 case M_SETOPTS: 2279 /* 2280 * Set stream head options (read option, write offset, 2281 * min/max packet size, and/or high/low water marks for 2282 * the read side only). 2283 */ 2284 2285 bpri = 0; 2286 sop = (struct stroptions *)bp->b_rptr; 2287 mutex_enter(&stp->sd_lock); 2288 if (sop->so_flags & SO_READOPT) { 2289 switch (sop->so_readopt & RMODEMASK) { 2290 case RNORM: 2291 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 2292 break; 2293 2294 case RMSGD: 2295 stp->sd_read_opt = 2296 ((stp->sd_read_opt & ~RD_MSGNODIS) | 2297 RD_MSGDIS); 2298 break; 2299 2300 case RMSGN: 2301 stp->sd_read_opt = 2302 ((stp->sd_read_opt & ~RD_MSGDIS) | 2303 RD_MSGNODIS); 2304 break; 2305 } 2306 switch (sop->so_readopt & RPROTMASK) { 2307 case RPROTNORM: 2308 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 2309 break; 2310 2311 case RPROTDAT: 2312 stp->sd_read_opt = 2313 ((stp->sd_read_opt & ~RD_PROTDIS) | 2314 RD_PROTDAT); 2315 break; 2316 2317 case RPROTDIS: 2318 stp->sd_read_opt = 2319 ((stp->sd_read_opt & ~RD_PROTDAT) | 2320 RD_PROTDIS); 2321 break; 2322 } 2323 switch (sop->so_readopt & RFLUSHMASK) { 2324 case RFLUSHPCPROT: 2325 /* 2326 * This sets the stream head to NOT flush 2327 * M_PCPROTO messages. 2328 */ 2329 stp->sd_read_opt |= RFLUSHPCPROT; 2330 break; 2331 } 2332 } 2333 if (sop->so_flags & SO_ERROPT) { 2334 switch (sop->so_erropt & RERRMASK) { 2335 case RERRNORM: 2336 stp->sd_flag &= ~STRDERRNONPERSIST; 2337 break; 2338 case RERRNONPERSIST: 2339 stp->sd_flag |= STRDERRNONPERSIST; 2340 break; 2341 } 2342 switch (sop->so_erropt & WERRMASK) { 2343 case WERRNORM: 2344 stp->sd_flag &= ~STWRERRNONPERSIST; 2345 break; 2346 case WERRNONPERSIST: 2347 stp->sd_flag |= STWRERRNONPERSIST; 2348 break; 2349 } 2350 } 2351 if (sop->so_flags & SO_COPYOPT) { 2352 if (sop->so_copyopt & ZCVMSAFE) { 2353 stp->sd_copyflag |= STZCVMSAFE; 2354 stp->sd_copyflag &= ~STZCVMUNSAFE; 2355 } else if (sop->so_copyopt & ZCVMUNSAFE) { 2356 stp->sd_copyflag |= STZCVMUNSAFE; 2357 stp->sd_copyflag &= ~STZCVMSAFE; 2358 } 2359 2360 if (sop->so_copyopt & COPYCACHED) { 2361 stp->sd_copyflag |= STRCOPYCACHED; 2362 } 2363 } 2364 if (sop->so_flags & SO_WROFF) 2365 stp->sd_wroff = sop->so_wroff; 2366 if (sop->so_flags & SO_TAIL) 2367 stp->sd_tail = sop->so_tail; 2368 if (sop->so_flags & SO_MINPSZ) 2369 q->q_minpsz = sop->so_minpsz; 2370 if (sop->so_flags & SO_MAXPSZ) 2371 q->q_maxpsz = sop->so_maxpsz; 2372 if (sop->so_flags & SO_MAXBLK) 2373 stp->sd_maxblk = sop->so_maxblk; 2374 if (sop->so_flags & SO_HIWAT) { 2375 if (sop->so_flags & SO_BAND) { 2376 if (strqset(q, QHIWAT, 2377 sop->so_band, sop->so_hiwat)) { 2378 cmn_err(CE_WARN, "strrput: could not " 2379 "allocate qband\n"); 2380 } else { 2381 bpri = sop->so_band; 2382 } 2383 } else { 2384 q->q_hiwat = sop->so_hiwat; 2385 } 2386 } 2387 if (sop->so_flags & SO_LOWAT) { 2388 if (sop->so_flags & SO_BAND) { 2389 if (strqset(q, QLOWAT, 2390 sop->so_band, sop->so_lowat)) { 2391 cmn_err(CE_WARN, "strrput: could not " 2392 "allocate qband\n"); 2393 } else { 2394 bpri = sop->so_band; 2395 } 2396 } else { 2397 q->q_lowat = sop->so_lowat; 2398 } 2399 } 2400 if (sop->so_flags & SO_MREADON) 2401 stp->sd_flag |= SNDMREAD; 2402 if (sop->so_flags & SO_MREADOFF) 2403 stp->sd_flag &= ~SNDMREAD; 2404 if (sop->so_flags & SO_NDELON) 2405 stp->sd_flag |= OLDNDELAY; 2406 if (sop->so_flags & SO_NDELOFF) 2407 stp->sd_flag &= ~OLDNDELAY; 2408 if (sop->so_flags & SO_ISTTY) 2409 stp->sd_flag |= STRISTTY; 2410 if (sop->so_flags & SO_ISNTTY) 2411 stp->sd_flag &= ~STRISTTY; 2412 if (sop->so_flags & SO_TOSTOP) 2413 stp->sd_flag |= STRTOSTOP; 2414 if (sop->so_flags & SO_TONSTOP) 2415 stp->sd_flag &= ~STRTOSTOP; 2416 if (sop->so_flags & SO_DELIM) 2417 stp->sd_flag |= STRDELIM; 2418 if (sop->so_flags & SO_NODELIM) 2419 stp->sd_flag &= ~STRDELIM; 2420 2421 mutex_exit(&stp->sd_lock); 2422 freemsg(bp); 2423 2424 /* Check backenable in case the water marks changed */ 2425 qbackenable(q, bpri); 2426 return (0); 2427 2428 /* 2429 * The following set of cases deal with situations where two stream 2430 * heads are connected to each other (twisted streams). These messages 2431 * have no meaning at the stream head. 2432 */ 2433 case M_BREAK: 2434 case M_CTL: 2435 case M_DELAY: 2436 case M_START: 2437 case M_STOP: 2438 case M_IOCDATA: 2439 case M_STARTI: 2440 case M_STOPI: 2441 freemsg(bp); 2442 return (0); 2443 2444 case M_IOCTL: 2445 /* 2446 * Always NAK this condition 2447 * (makes no sense) 2448 * If there is one or more threads in the read side 2449 * rwnext we have to defer the nacking until that thread 2450 * returns (in strget). 2451 */ 2452 mutex_enter(&stp->sd_lock); 2453 if (stp->sd_struiodnak != 0) { 2454 /* 2455 * Defer NAK to the streamhead. Queue at the end 2456 * the list. 2457 */ 2458 mblk_t *mp = stp->sd_struionak; 2459 2460 while (mp && mp->b_next) 2461 mp = mp->b_next; 2462 if (mp) 2463 mp->b_next = bp; 2464 else 2465 stp->sd_struionak = bp; 2466 bp->b_next = NULL; 2467 mutex_exit(&stp->sd_lock); 2468 return (0); 2469 } 2470 mutex_exit(&stp->sd_lock); 2471 2472 bp->b_datap->db_type = M_IOCNAK; 2473 /* 2474 * Protect against the driver passing up 2475 * messages after it has done a qprocsoff. 2476 */ 2477 if (_OTHERQ(q)->q_next == NULL) 2478 freemsg(bp); 2479 else 2480 qreply(q, bp); 2481 return (0); 2482 2483 default: 2484 #ifdef DEBUG 2485 cmn_err(CE_WARN, 2486 "bad message type %x received at stream head\n", 2487 bp->b_datap->db_type); 2488 #endif 2489 freemsg(bp); 2490 return (0); 2491 } 2492 2493 /* NOTREACHED */ 2494 } 2495 2496 /* 2497 * Check if the stream pointed to by `stp' can be written to, and return an 2498 * error code if not. If `eiohup' is set, then return EIO if STRHUP is set. 2499 * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream, 2500 * then always return EPIPE and send a SIGPIPE to the invoking thread. 2501 */ 2502 static int 2503 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok) 2504 { 2505 int error; 2506 2507 ASSERT(MUTEX_HELD(&stp->sd_lock)); 2508 2509 /* 2510 * For modem support, POSIX states that on writes, EIO should 2511 * be returned if the stream has been hung up. 2512 */ 2513 if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP) 2514 error = EIO; 2515 else 2516 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0); 2517 2518 if (error != 0) { 2519 if (!(stp->sd_flag & STPLEX) && 2520 (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) { 2521 tsignal(curthread, SIGPIPE); 2522 error = EPIPE; 2523 } 2524 } 2525 2526 return (error); 2527 } 2528 2529 /* 2530 * Copyin and send data down a stream. 2531 * The caller will allocate and copyin any control part that precedes the 2532 * message and pass that in as mctl. 2533 * 2534 * Caller should *not* hold sd_lock. 2535 * When EWOULDBLOCK is returned the caller has to redo the canputnext 2536 * under sd_lock in order to avoid missing a backenabling wakeup. 2537 * 2538 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA. 2539 * 2540 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages. 2541 * For sync streams we can only ignore flow control by reverting to using 2542 * putnext. 2543 * 2544 * If sd_maxblk is less than *iosize this routine might return without 2545 * transferring all of *iosize. In all cases, on return *iosize will contain 2546 * the amount of data that was transferred. 2547 */ 2548 static int 2549 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize, 2550 int b_flag, int pri, int flags) 2551 { 2552 struiod_t uiod; 2553 mblk_t *mp; 2554 queue_t *wqp = stp->sd_wrq; 2555 int error = 0; 2556 ssize_t count = *iosize; 2557 2558 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 2559 2560 if (uiop != NULL && count >= 0) 2561 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0; 2562 2563 if (!(flags & STRUIO_POSTPONE)) { 2564 /* 2565 * Use regular canputnext, strmakedata, putnext sequence. 2566 */ 2567 if (pri == 0) { 2568 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2569 freemsg(mctl); 2570 return (EWOULDBLOCK); 2571 } 2572 } else { 2573 if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) { 2574 freemsg(mctl); 2575 return (EWOULDBLOCK); 2576 } 2577 } 2578 2579 if ((error = strmakedata(iosize, uiop, stp, flags, 2580 &mp)) != 0) { 2581 freemsg(mctl); 2582 /* 2583 * need to change return code to ENOMEM 2584 * so that this is not confused with 2585 * flow control, EAGAIN. 2586 */ 2587 2588 if (error == EAGAIN) 2589 return (ENOMEM); 2590 else 2591 return (error); 2592 } 2593 if (mctl != NULL) { 2594 if (mctl->b_cont == NULL) 2595 mctl->b_cont = mp; 2596 else if (mp != NULL) 2597 linkb(mctl, mp); 2598 mp = mctl; 2599 } else if (mp == NULL) 2600 return (0); 2601 2602 mp->b_flag |= b_flag; 2603 mp->b_band = (uchar_t)pri; 2604 2605 if (flags & MSG_IGNFLOW) { 2606 /* 2607 * XXX Hack: Don't get stuck running service 2608 * procedures. This is needed for sockfs when 2609 * sending the unbind message out of the rput 2610 * procedure - we don't want a put procedure 2611 * to run service procedures. 2612 */ 2613 putnext(wqp, mp); 2614 } else { 2615 stream_willservice(stp); 2616 putnext(wqp, mp); 2617 stream_runservice(stp); 2618 } 2619 return (0); 2620 } 2621 /* 2622 * Stream supports rwnext() for the write side. 2623 */ 2624 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) { 2625 freemsg(mctl); 2626 /* 2627 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled". 2628 */ 2629 return (error == EAGAIN ? ENOMEM : error); 2630 } 2631 if (mctl != NULL) { 2632 if (mctl->b_cont == NULL) 2633 mctl->b_cont = mp; 2634 else if (mp != NULL) 2635 linkb(mctl, mp); 2636 mp = mctl; 2637 } else if (mp == NULL) { 2638 return (0); 2639 } 2640 2641 mp->b_flag |= b_flag; 2642 mp->b_band = (uchar_t)pri; 2643 2644 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 2645 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 2646 uiod.d_uio.uio_offset = 0; 2647 uiod.d_mp = mp; 2648 error = rwnext(wqp, &uiod); 2649 if (! uiod.d_mp) { 2650 uioskip(uiop, *iosize); 2651 return (error); 2652 } 2653 ASSERT(mp == uiod.d_mp); 2654 if (error == EINVAL) { 2655 /* 2656 * The stream plumbing must have changed while 2657 * we were away, so just turn off rwnext()s. 2658 */ 2659 error = 0; 2660 } else if (error == EBUSY || error == EWOULDBLOCK) { 2661 /* 2662 * Couldn't enter a perimeter or took a page fault, 2663 * so fall-back to putnext(). 2664 */ 2665 error = 0; 2666 } else { 2667 freemsg(mp); 2668 return (error); 2669 } 2670 /* Have to check canput before consuming data from the uio */ 2671 if (pri == 0) { 2672 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2673 freemsg(mp); 2674 return (EWOULDBLOCK); 2675 } 2676 } else { 2677 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) { 2678 freemsg(mp); 2679 return (EWOULDBLOCK); 2680 } 2681 } 2682 ASSERT(mp == uiod.d_mp); 2683 /* Copyin data from the uio */ 2684 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) { 2685 freemsg(mp); 2686 return (error); 2687 } 2688 uioskip(uiop, *iosize); 2689 if (flags & MSG_IGNFLOW) { 2690 /* 2691 * XXX Hack: Don't get stuck running service procedures. 2692 * This is needed for sockfs when sending the unbind message 2693 * out of the rput procedure - we don't want a put procedure 2694 * to run service procedures. 2695 */ 2696 putnext(wqp, mp); 2697 } else { 2698 stream_willservice(stp); 2699 putnext(wqp, mp); 2700 stream_runservice(stp); 2701 } 2702 return (0); 2703 } 2704 2705 /* 2706 * Write attempts to break the write request into messages conforming 2707 * with the minimum and maximum packet sizes set downstream. 2708 * 2709 * Write will not block if downstream queue is full and 2710 * O_NDELAY is set, otherwise it will block waiting for the queue to get room. 2711 * 2712 * A write of zero bytes gets packaged into a zero length message and sent 2713 * downstream like any other message. 2714 * 2715 * If buffers of the requested sizes are not available, the write will 2716 * sleep until the buffers become available. 2717 * 2718 * Write (if specified) will supply a write offset in a message if it 2719 * makes sense. This can be specified by downstream modules as part of 2720 * a M_SETOPTS message. Write will not supply the write offset if it 2721 * cannot supply any data in a buffer. In other words, write will never 2722 * send down an empty packet due to a write offset. 2723 */ 2724 /* ARGSUSED2 */ 2725 int 2726 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp) 2727 { 2728 return (strwrite_common(vp, uiop, crp, 0)); 2729 } 2730 2731 /* ARGSUSED2 */ 2732 int 2733 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) 2734 { 2735 struct stdata *stp; 2736 struct queue *wqp; 2737 ssize_t rmin, rmax; 2738 ssize_t iosize; 2739 int waitflag; 2740 int tempmode; 2741 int error = 0; 2742 int b_flag; 2743 2744 ASSERT(vp->v_stream); 2745 stp = vp->v_stream; 2746 2747 mutex_enter(&stp->sd_lock); 2748 2749 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2750 mutex_exit(&stp->sd_lock); 2751 return (error); 2752 } 2753 2754 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 2755 error = strwriteable(stp, B_TRUE, B_TRUE); 2756 if (error != 0) { 2757 mutex_exit(&stp->sd_lock); 2758 return (error); 2759 } 2760 } 2761 2762 mutex_exit(&stp->sd_lock); 2763 2764 wqp = stp->sd_wrq; 2765 2766 /* get these values from them cached in the stream head */ 2767 rmin = stp->sd_qn_minpsz; 2768 rmax = stp->sd_qn_maxpsz; 2769 2770 /* 2771 * Check the min/max packet size constraints. If min packet size 2772 * is non-zero, the write cannot be split into multiple messages 2773 * and still guarantee the size constraints. 2774 */ 2775 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp); 2776 2777 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 2778 if (rmax == 0) { 2779 return (0); 2780 } 2781 if (rmin > 0) { 2782 if (uiop->uio_resid < rmin) { 2783 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2784 "strwrite out:q %p out %d error %d", 2785 wqp, 0, ERANGE); 2786 return (ERANGE); 2787 } 2788 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) { 2789 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2790 "strwrite out:q %p out %d error %d", 2791 wqp, 1, ERANGE); 2792 return (ERANGE); 2793 } 2794 } 2795 2796 /* 2797 * Do until count satisfied or error. 2798 */ 2799 waitflag = WRITEWAIT | wflag; 2800 if (stp->sd_flag & OLDNDELAY) 2801 tempmode = uiop->uio_fmode & ~FNDELAY; 2802 else 2803 tempmode = uiop->uio_fmode; 2804 2805 if (rmax == INFPSZ) 2806 rmax = uiop->uio_resid; 2807 2808 /* 2809 * Note that tempmode does not get used in strput/strmakedata 2810 * but only in strwaitq. The other routines use uio_fmode 2811 * unmodified. 2812 */ 2813 2814 /* LINTED: constant in conditional context */ 2815 while (1) { /* breaks when uio_resid reaches zero */ 2816 /* 2817 * Determine the size of the next message to be 2818 * packaged. May have to break write into several 2819 * messages based on max packet size. 2820 */ 2821 iosize = MIN(uiop->uio_resid, rmax); 2822 2823 /* 2824 * Put block downstream when flow control allows it. 2825 */ 2826 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize)) 2827 b_flag = MSGDELIM; 2828 else 2829 b_flag = 0; 2830 2831 for (;;) { 2832 int done = 0; 2833 2834 error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0); 2835 if (error == 0) 2836 break; 2837 if (error != EWOULDBLOCK) 2838 goto out; 2839 2840 mutex_enter(&stp->sd_lock); 2841 /* 2842 * Check for a missed wakeup. 2843 * Needed since strput did not hold sd_lock across 2844 * the canputnext. 2845 */ 2846 if (canputnext(wqp)) { 2847 /* Try again */ 2848 mutex_exit(&stp->sd_lock); 2849 continue; 2850 } 2851 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT, 2852 "strwrite wait:q %p wait", wqp); 2853 if ((error = strwaitq(stp, waitflag, (ssize_t)0, 2854 tempmode, -1, &done)) != 0 || done) { 2855 mutex_exit(&stp->sd_lock); 2856 if ((vp->v_type == VFIFO) && 2857 (uiop->uio_fmode & FNDELAY) && 2858 (error == EAGAIN)) 2859 error = 0; 2860 goto out; 2861 } 2862 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE, 2863 "strwrite wake:q %p awakes", wqp); 2864 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2865 mutex_exit(&stp->sd_lock); 2866 goto out; 2867 } 2868 mutex_exit(&stp->sd_lock); 2869 } 2870 waitflag |= NOINTR; 2871 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID, 2872 "strwrite resid:q %p uiop %p", wqp, uiop); 2873 if (uiop->uio_resid) { 2874 /* Recheck for errors - needed for sockets */ 2875 if ((stp->sd_wput_opt & SW_RECHECK_ERR) && 2876 (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 2877 mutex_enter(&stp->sd_lock); 2878 error = strwriteable(stp, B_FALSE, B_TRUE); 2879 mutex_exit(&stp->sd_lock); 2880 if (error != 0) 2881 return (error); 2882 } 2883 continue; 2884 } 2885 break; 2886 } 2887 out: 2888 /* 2889 * For historical reasons, applications expect EAGAIN when a data 2890 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN. 2891 */ 2892 if (error == ENOMEM) 2893 error = EAGAIN; 2894 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2895 "strwrite out:q %p out %d error %d", wqp, 2, error); 2896 return (error); 2897 } 2898 2899 /* 2900 * Stream head write service routine. 2901 * Its job is to wake up any sleeping writers when a queue 2902 * downstream needs data (part of the flow control in putq and getq). 2903 * It also must wake anyone sleeping on a poll(). 2904 * For stream head right below mux module, it must also invoke put procedure 2905 * of next downstream module. 2906 */ 2907 int 2908 strwsrv(queue_t *q) 2909 { 2910 struct stdata *stp; 2911 queue_t *tq; 2912 qband_t *qbp; 2913 int i; 2914 qband_t *myqbp; 2915 int isevent; 2916 unsigned char qbf[NBAND]; /* band flushing backenable flags */ 2917 2918 TRACE_1(TR_FAC_STREAMS_FR, 2919 TR_STRWSRV, "strwsrv:q %p", q); 2920 stp = (struct stdata *)q->q_ptr; 2921 ASSERT(qclaimed(q)); 2922 mutex_enter(&stp->sd_lock); 2923 ASSERT(!(stp->sd_flag & STPLEX)); 2924 2925 if (stp->sd_flag & WSLEEP) { 2926 stp->sd_flag &= ~WSLEEP; 2927 cv_broadcast(&q->q_wait); 2928 } 2929 mutex_exit(&stp->sd_lock); 2930 2931 /* The other end of a stream pipe went away. */ 2932 if ((tq = q->q_next) == NULL) { 2933 return (0); 2934 } 2935 2936 /* Find the next module forward that has a service procedure */ 2937 claimstr(q); 2938 tq = q->q_nfsrv; 2939 ASSERT(tq != NULL); 2940 2941 if ((q->q_flag & QBACK)) { 2942 if ((tq->q_flag & QFULL)) { 2943 mutex_enter(QLOCK(tq)); 2944 if (!(tq->q_flag & QFULL)) { 2945 mutex_exit(QLOCK(tq)); 2946 goto wakeup; 2947 } 2948 /* 2949 * The queue must have become full again. Set QWANTW 2950 * again so strwsrv will be back enabled when 2951 * the queue becomes non-full next time. 2952 */ 2953 tq->q_flag |= QWANTW; 2954 mutex_exit(QLOCK(tq)); 2955 } else { 2956 wakeup: 2957 pollwakeup(&stp->sd_pollist, POLLWRNORM); 2958 mutex_enter(&stp->sd_lock); 2959 if (stp->sd_sigflags & S_WRNORM) 2960 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 2961 mutex_exit(&stp->sd_lock); 2962 } 2963 } 2964 2965 isevent = 0; 2966 i = 1; 2967 bzero((caddr_t)qbf, NBAND); 2968 mutex_enter(QLOCK(tq)); 2969 if ((myqbp = q->q_bandp) != NULL) 2970 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) { 2971 ASSERT(myqbp); 2972 if ((myqbp->qb_flag & QB_BACK)) { 2973 if (qbp->qb_flag & QB_FULL) { 2974 /* 2975 * The band must have become full again. 2976 * Set QB_WANTW again so strwsrv will 2977 * be back enabled when the band becomes 2978 * non-full next time. 2979 */ 2980 qbp->qb_flag |= QB_WANTW; 2981 } else { 2982 isevent = 1; 2983 qbf[i] = 1; 2984 } 2985 } 2986 myqbp = myqbp->qb_next; 2987 i++; 2988 } 2989 mutex_exit(QLOCK(tq)); 2990 2991 if (isevent) { 2992 for (i = tq->q_nband; i; i--) { 2993 if (qbf[i]) { 2994 pollwakeup(&stp->sd_pollist, POLLWRBAND); 2995 mutex_enter(&stp->sd_lock); 2996 if (stp->sd_sigflags & S_WRBAND) 2997 strsendsig(stp->sd_siglist, S_WRBAND, 2998 (uchar_t)i, 0); 2999 mutex_exit(&stp->sd_lock); 3000 } 3001 } 3002 } 3003 3004 releasestr(q); 3005 return (0); 3006 } 3007 3008 /* 3009 * Special case of strcopyin/strcopyout for copying 3010 * struct strioctl that can deal with both data 3011 * models. 3012 */ 3013 3014 #ifdef _LP64 3015 3016 static int 3017 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3018 { 3019 struct strioctl32 strioc32; 3020 struct strioctl *striocp; 3021 3022 if (copyflag & U_TO_K) { 3023 ASSERT((copyflag & K_TO_K) == 0); 3024 3025 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3026 if (copyin(from, &strioc32, sizeof (strioc32))) 3027 return (EFAULT); 3028 3029 striocp = (struct strioctl *)to; 3030 striocp->ic_cmd = strioc32.ic_cmd; 3031 striocp->ic_timout = strioc32.ic_timout; 3032 striocp->ic_len = strioc32.ic_len; 3033 striocp->ic_dp = (char *)(uintptr_t)strioc32.ic_dp; 3034 3035 } else { /* NATIVE data model */ 3036 if (copyin(from, to, sizeof (struct strioctl))) { 3037 return (EFAULT); 3038 } else { 3039 return (0); 3040 } 3041 } 3042 } else { 3043 ASSERT(copyflag & K_TO_K); 3044 bcopy(from, to, sizeof (struct strioctl)); 3045 } 3046 return (0); 3047 } 3048 3049 static int 3050 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3051 { 3052 struct strioctl32 strioc32; 3053 struct strioctl *striocp; 3054 3055 if (copyflag & U_TO_K) { 3056 ASSERT((copyflag & K_TO_K) == 0); 3057 3058 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3059 striocp = (struct strioctl *)from; 3060 strioc32.ic_cmd = striocp->ic_cmd; 3061 strioc32.ic_timout = striocp->ic_timout; 3062 strioc32.ic_len = striocp->ic_len; 3063 strioc32.ic_dp = (caddr32_t)(uintptr_t)striocp->ic_dp; 3064 ASSERT((char *)(uintptr_t)strioc32.ic_dp == 3065 striocp->ic_dp); 3066 3067 if (copyout(&strioc32, to, sizeof (strioc32))) 3068 return (EFAULT); 3069 3070 } else { /* NATIVE data model */ 3071 if (copyout(from, to, sizeof (struct strioctl))) { 3072 return (EFAULT); 3073 } else { 3074 return (0); 3075 } 3076 } 3077 } else { 3078 ASSERT(copyflag & K_TO_K); 3079 bcopy(from, to, sizeof (struct strioctl)); 3080 } 3081 return (0); 3082 } 3083 3084 #else /* ! _LP64 */ 3085 3086 /* ARGSUSED2 */ 3087 static int 3088 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3089 { 3090 return (strcopyin(from, to, sizeof (struct strioctl), copyflag)); 3091 } 3092 3093 /* ARGSUSED2 */ 3094 static int 3095 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3096 { 3097 return (strcopyout(from, to, sizeof (struct strioctl), copyflag)); 3098 } 3099 3100 #endif /* _LP64 */ 3101 3102 /* 3103 * Determine type of job control semantics expected by user. The 3104 * possibilities are: 3105 * JCREAD - Behaves like read() on fd; send SIGTTIN 3106 * JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set 3107 * JCSETP - Sets a value in the stream; send SIGTTOU, ignore TOSTOP 3108 * JCGETP - Gets a value in the stream; no signals. 3109 * See straccess in strsubr.c for usage of these values. 3110 * 3111 * This routine also returns -1 for I_STR as a special case; the 3112 * caller must call again with the real ioctl number for 3113 * classification. 3114 */ 3115 static int 3116 job_control_type(int cmd) 3117 { 3118 switch (cmd) { 3119 case I_STR: 3120 return (-1); 3121 3122 case I_RECVFD: 3123 case I_E_RECVFD: 3124 return (JCREAD); 3125 3126 case I_FDINSERT: 3127 case I_SENDFD: 3128 return (JCWRITE); 3129 3130 case TCSETA: 3131 case TCSETAW: 3132 case TCSETAF: 3133 case TCSBRK: 3134 case TCXONC: 3135 case TCFLSH: 3136 case TCDSET: /* Obsolete */ 3137 case TIOCSWINSZ: 3138 case TCSETS: 3139 case TCSETSW: 3140 case TCSETSF: 3141 case TIOCSETD: 3142 case TIOCHPCL: 3143 case TIOCSETP: 3144 case TIOCSETN: 3145 case TIOCEXCL: 3146 case TIOCNXCL: 3147 case TIOCFLUSH: 3148 case TIOCSETC: 3149 case TIOCLBIS: 3150 case TIOCLBIC: 3151 case TIOCLSET: 3152 case TIOCSBRK: 3153 case TIOCCBRK: 3154 case TIOCSDTR: 3155 case TIOCCDTR: 3156 case TIOCSLTC: 3157 case TIOCSTOP: 3158 case TIOCSTART: 3159 case TIOCSTI: 3160 case TIOCSPGRP: 3161 case TIOCMSET: 3162 case TIOCMBIS: 3163 case TIOCMBIC: 3164 case TIOCREMOTE: 3165 case TIOCSIGNAL: 3166 case LDSETT: 3167 case LDSMAP: /* Obsolete */ 3168 case DIOCSETP: 3169 case I_FLUSH: 3170 case I_SRDOPT: 3171 case I_SETSIG: 3172 case I_SWROPT: 3173 case I_FLUSHBAND: 3174 case I_SETCLTIME: 3175 case I_SERROPT: 3176 case I_ESETSIG: 3177 case FIONBIO: 3178 case FIOASYNC: 3179 case FIOSETOWN: 3180 case JBOOT: /* Obsolete */ 3181 case JTERM: /* Obsolete */ 3182 case JTIMOM: /* Obsolete */ 3183 case JZOMBOOT: /* Obsolete */ 3184 case JAGENT: /* Obsolete */ 3185 case JTRUN: /* Obsolete */ 3186 case JXTPROTO: /* Obsolete */ 3187 case TIOCSETLD: 3188 return (JCSETP); 3189 } 3190 3191 return (JCGETP); 3192 } 3193 3194 /* 3195 * ioctl for streams 3196 */ 3197 int 3198 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, 3199 cred_t *crp, int *rvalp) 3200 { 3201 struct stdata *stp; 3202 struct strcmd *scp; 3203 struct strioctl strioc; 3204 struct uio uio; 3205 struct iovec iov; 3206 int access; 3207 mblk_t *mp; 3208 int error = 0; 3209 int done = 0; 3210 ssize_t rmin, rmax; 3211 queue_t *wrq; 3212 queue_t *rdq; 3213 boolean_t kioctl = B_FALSE; 3214 uint32_t auditing = AU_AUDITING(); 3215 3216 if (flag & FKIOCTL) { 3217 copyflag = K_TO_K; 3218 kioctl = B_TRUE; 3219 } 3220 ASSERT(vp->v_stream); 3221 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 3222 stp = vp->v_stream; 3223 3224 TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER, 3225 "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg); 3226 3227 if (auditing) 3228 audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp); 3229 3230 /* 3231 * If the copy is kernel to kernel, make sure that the FNATIVE 3232 * flag is set. After this it would be a serious error to have 3233 * no model flag. 3234 */ 3235 if (copyflag == K_TO_K) 3236 flag = (flag & ~FMODELS) | FNATIVE; 3237 3238 ASSERT((flag & FMODELS) != 0); 3239 3240 wrq = stp->sd_wrq; 3241 rdq = _RD(wrq); 3242 3243 access = job_control_type(cmd); 3244 3245 /* We should never see these here, should be handled by iwscn */ 3246 if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR) 3247 return (EINVAL); 3248 3249 mutex_enter(&stp->sd_lock); 3250 if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) { 3251 mutex_exit(&stp->sd_lock); 3252 return (error); 3253 } 3254 mutex_exit(&stp->sd_lock); 3255 3256 /* 3257 * Check for sgttyb-related ioctls first, and complain as 3258 * necessary. 3259 */ 3260 switch (cmd) { 3261 case TIOCGETP: 3262 case TIOCSETP: 3263 case TIOCSETN: 3264 if (sgttyb_handling >= 2 && !sgttyb_complaint) { 3265 sgttyb_complaint = B_TRUE; 3266 cmn_err(CE_NOTE, 3267 "application used obsolete TIOC[GS]ET"); 3268 } 3269 if (sgttyb_handling >= 3) { 3270 tsignal(curthread, SIGSYS); 3271 return (EIO); 3272 } 3273 break; 3274 } 3275 3276 mutex_enter(&stp->sd_lock); 3277 3278 switch (cmd) { 3279 case I_RECVFD: 3280 case I_E_RECVFD: 3281 case I_PEEK: 3282 case I_NREAD: 3283 case FIONREAD: 3284 case FIORDCHK: 3285 case I_ATMARK: 3286 case FIONBIO: 3287 case FIOASYNC: 3288 if (stp->sd_flag & (STRDERR|STPLEX)) { 3289 error = strgeterr(stp, STRDERR|STPLEX, 0); 3290 if (error != 0) { 3291 mutex_exit(&stp->sd_lock); 3292 return (error); 3293 } 3294 } 3295 break; 3296 3297 default: 3298 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) { 3299 error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0); 3300 if (error != 0) { 3301 mutex_exit(&stp->sd_lock); 3302 return (error); 3303 } 3304 } 3305 } 3306 3307 mutex_exit(&stp->sd_lock); 3308 3309 switch (cmd) { 3310 default: 3311 /* 3312 * The stream head has hardcoded knowledge of a 3313 * miscellaneous collection of terminal-, keyboard- and 3314 * mouse-related ioctls, enumerated below. This hardcoded 3315 * knowledge allows the stream head to automatically 3316 * convert transparent ioctl requests made by userland 3317 * programs into I_STR ioctls which many old STREAMS 3318 * modules and drivers require. 3319 * 3320 * No new ioctls should ever be added to this list. 3321 * Instead, the STREAMS module or driver should be written 3322 * to either handle transparent ioctls or require any 3323 * userland programs to use I_STR ioctls (by returning 3324 * EINVAL to any transparent ioctl requests). 3325 * 3326 * More importantly, removing ioctls from this list should 3327 * be done with the utmost care, since our STREAMS modules 3328 * and drivers *count* on the stream head performing this 3329 * conversion, and thus may panic while processing 3330 * transparent ioctl request for one of these ioctls (keep 3331 * in mind that third party modules and drivers may have 3332 * similar problems). 3333 */ 3334 if (((cmd & IOCTYPE) == LDIOC) || 3335 ((cmd & IOCTYPE) == tIOC) || 3336 ((cmd & IOCTYPE) == TIOC) || 3337 ((cmd & IOCTYPE) == KIOC) || 3338 ((cmd & IOCTYPE) == MSIOC) || 3339 ((cmd & IOCTYPE) == VUIOC)) { 3340 /* 3341 * The ioctl is a tty ioctl - set up strioc buffer 3342 * and call strdoioctl() to do the work. 3343 */ 3344 if (stp->sd_flag & STRHUP) 3345 return (ENXIO); 3346 strioc.ic_cmd = cmd; 3347 strioc.ic_timout = INFTIM; 3348 3349 switch (cmd) { 3350 3351 case TCXONC: 3352 case TCSBRK: 3353 case TCFLSH: 3354 case TCDSET: 3355 { 3356 int native_arg = (int)arg; 3357 strioc.ic_len = sizeof (int); 3358 strioc.ic_dp = (char *)&native_arg; 3359 return (strdoioctl(stp, &strioc, flag, 3360 K_TO_K, crp, rvalp)); 3361 } 3362 3363 case TCSETA: 3364 case TCSETAW: 3365 case TCSETAF: 3366 strioc.ic_len = sizeof (struct termio); 3367 strioc.ic_dp = (char *)arg; 3368 return (strdoioctl(stp, &strioc, flag, 3369 copyflag, crp, rvalp)); 3370 3371 case TCSETS: 3372 case TCSETSW: 3373 case TCSETSF: 3374 strioc.ic_len = sizeof (struct termios); 3375 strioc.ic_dp = (char *)arg; 3376 return (strdoioctl(stp, &strioc, flag, 3377 copyflag, crp, rvalp)); 3378 3379 case LDSETT: 3380 strioc.ic_len = sizeof (struct termcb); 3381 strioc.ic_dp = (char *)arg; 3382 return (strdoioctl(stp, &strioc, flag, 3383 copyflag, crp, rvalp)); 3384 3385 case TIOCSETP: 3386 strioc.ic_len = sizeof (struct sgttyb); 3387 strioc.ic_dp = (char *)arg; 3388 return (strdoioctl(stp, &strioc, flag, 3389 copyflag, crp, rvalp)); 3390 3391 case TIOCSTI: 3392 if ((flag & FREAD) == 0 && 3393 secpolicy_sti(crp) != 0) { 3394 return (EPERM); 3395 } 3396 mutex_enter(&stp->sd_lock); 3397 mutex_enter(&curproc->p_splock); 3398 if (stp->sd_sidp != curproc->p_sessp->s_sidp && 3399 secpolicy_sti(crp) != 0) { 3400 mutex_exit(&curproc->p_splock); 3401 mutex_exit(&stp->sd_lock); 3402 return (EACCES); 3403 } 3404 mutex_exit(&curproc->p_splock); 3405 mutex_exit(&stp->sd_lock); 3406 3407 strioc.ic_len = sizeof (char); 3408 strioc.ic_dp = (char *)arg; 3409 return (strdoioctl(stp, &strioc, flag, 3410 copyflag, crp, rvalp)); 3411 3412 case TIOCSWINSZ: 3413 strioc.ic_len = sizeof (struct winsize); 3414 strioc.ic_dp = (char *)arg; 3415 return (strdoioctl(stp, &strioc, flag, 3416 copyflag, crp, rvalp)); 3417 3418 case TIOCSSIZE: 3419 strioc.ic_len = sizeof (struct ttysize); 3420 strioc.ic_dp = (char *)arg; 3421 return (strdoioctl(stp, &strioc, flag, 3422 copyflag, crp, rvalp)); 3423 3424 case TIOCSSOFTCAR: 3425 case KIOCTRANS: 3426 case KIOCTRANSABLE: 3427 case KIOCCMD: 3428 case KIOCSDIRECT: 3429 case KIOCSCOMPAT: 3430 case KIOCSKABORTEN: 3431 case KIOCSRPTDELAY: 3432 case KIOCSRPTRATE: 3433 case VUIDSFORMAT: 3434 case TIOCSPPS: 3435 strioc.ic_len = sizeof (int); 3436 strioc.ic_dp = (char *)arg; 3437 return (strdoioctl(stp, &strioc, flag, 3438 copyflag, crp, rvalp)); 3439 3440 case KIOCSETKEY: 3441 case KIOCGETKEY: 3442 strioc.ic_len = sizeof (struct kiockey); 3443 strioc.ic_dp = (char *)arg; 3444 return (strdoioctl(stp, &strioc, flag, 3445 copyflag, crp, rvalp)); 3446 3447 case KIOCSKEY: 3448 case KIOCGKEY: 3449 strioc.ic_len = sizeof (struct kiockeymap); 3450 strioc.ic_dp = (char *)arg; 3451 return (strdoioctl(stp, &strioc, flag, 3452 copyflag, crp, rvalp)); 3453 3454 case KIOCSLED: 3455 /* arg is a pointer to char */ 3456 strioc.ic_len = sizeof (char); 3457 strioc.ic_dp = (char *)arg; 3458 return (strdoioctl(stp, &strioc, flag, 3459 copyflag, crp, rvalp)); 3460 3461 case MSIOSETPARMS: 3462 strioc.ic_len = sizeof (Ms_parms); 3463 strioc.ic_dp = (char *)arg; 3464 return (strdoioctl(stp, &strioc, flag, 3465 copyflag, crp, rvalp)); 3466 3467 case VUIDSADDR: 3468 case VUIDGADDR: 3469 strioc.ic_len = sizeof (struct vuid_addr_probe); 3470 strioc.ic_dp = (char *)arg; 3471 return (strdoioctl(stp, &strioc, flag, 3472 copyflag, crp, rvalp)); 3473 3474 /* 3475 * These M_IOCTL's don't require any data to be sent 3476 * downstream, and the driver will allocate and link 3477 * on its own mblk_t upon M_IOCACK -- thus we set 3478 * ic_len to zero and set ic_dp to arg so we know 3479 * where to copyout to later. 3480 */ 3481 case TIOCGSOFTCAR: 3482 case TIOCGWINSZ: 3483 case TIOCGSIZE: 3484 case KIOCGTRANS: 3485 case KIOCGTRANSABLE: 3486 case KIOCTYPE: 3487 case KIOCGDIRECT: 3488 case KIOCGCOMPAT: 3489 case KIOCLAYOUT: 3490 case KIOCGLED: 3491 case MSIOGETPARMS: 3492 case MSIOBUTTONS: 3493 case VUIDGFORMAT: 3494 case TIOCGPPS: 3495 case TIOCGPPSEV: 3496 case TCGETA: 3497 case TCGETS: 3498 case LDGETT: 3499 case TIOCGETP: 3500 case KIOCGRPTDELAY: 3501 case KIOCGRPTRATE: 3502 strioc.ic_len = 0; 3503 strioc.ic_dp = (char *)arg; 3504 return (strdoioctl(stp, &strioc, flag, 3505 copyflag, crp, rvalp)); 3506 } 3507 } 3508 3509 /* 3510 * Unknown cmd - send it down as a transparent ioctl. 3511 */ 3512 strioc.ic_cmd = cmd; 3513 strioc.ic_timout = INFTIM; 3514 strioc.ic_len = TRANSPARENT; 3515 strioc.ic_dp = (char *)&arg; 3516 3517 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp)); 3518 3519 case I_STR: 3520 /* 3521 * Stream ioctl. Read in an strioctl buffer from the user 3522 * along with any data specified and send it downstream. 3523 * Strdoioctl will wait allow only one ioctl message at 3524 * a time, and waits for the acknowledgement. 3525 */ 3526 3527 if (stp->sd_flag & STRHUP) 3528 return (ENXIO); 3529 3530 error = strcopyin_strioctl((void *)arg, &strioc, flag, 3531 copyflag); 3532 if (error != 0) 3533 return (error); 3534 3535 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1)) 3536 return (EINVAL); 3537 3538 access = job_control_type(strioc.ic_cmd); 3539 mutex_enter(&stp->sd_lock); 3540 if ((access != -1) && 3541 ((error = i_straccess(stp, access)) != 0)) { 3542 mutex_exit(&stp->sd_lock); 3543 return (error); 3544 } 3545 mutex_exit(&stp->sd_lock); 3546 3547 /* 3548 * The I_STR facility provides a trap door for malicious 3549 * code to send down bogus streamio(7I) ioctl commands to 3550 * unsuspecting STREAMS modules and drivers which expect to 3551 * only get these messages from the stream head. 3552 * Explicitly prohibit any streamio ioctls which can be 3553 * passed downstream by the stream head. Note that we do 3554 * not block all streamio ioctls because the ioctl 3555 * numberspace is not well managed and thus it's possible 3556 * that a module or driver's ioctl numbers may accidentally 3557 * collide with them. 3558 */ 3559 switch (strioc.ic_cmd) { 3560 case I_LINK: 3561 case I_PLINK: 3562 case I_UNLINK: 3563 case I_PUNLINK: 3564 case _I_GETPEERCRED: 3565 case _I_PLINK_LH: 3566 return (EINVAL); 3567 } 3568 3569 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp); 3570 if (error == 0) { 3571 error = strcopyout_strioctl(&strioc, (void *)arg, 3572 flag, copyflag); 3573 } 3574 return (error); 3575 3576 case _I_CMD: 3577 /* 3578 * Like I_STR, but without using M_IOC* messages and without 3579 * copyins/copyouts beyond the passed-in argument. 3580 */ 3581 if (stp->sd_flag & STRHUP) 3582 return (ENXIO); 3583 3584 if ((scp = kmem_alloc(sizeof (strcmd_t), KM_NOSLEEP)) == NULL) 3585 return (ENOMEM); 3586 3587 if (copyin((void *)arg, scp, sizeof (strcmd_t))) { 3588 kmem_free(scp, sizeof (strcmd_t)); 3589 return (EFAULT); 3590 } 3591 3592 access = job_control_type(scp->sc_cmd); 3593 mutex_enter(&stp->sd_lock); 3594 if (access != -1 && (error = i_straccess(stp, access)) != 0) { 3595 mutex_exit(&stp->sd_lock); 3596 kmem_free(scp, sizeof (strcmd_t)); 3597 return (error); 3598 } 3599 mutex_exit(&stp->sd_lock); 3600 3601 *rvalp = 0; 3602 if ((error = strdocmd(stp, scp, crp)) == 0) { 3603 if (copyout(scp, (void *)arg, sizeof (strcmd_t))) 3604 error = EFAULT; 3605 } 3606 kmem_free(scp, sizeof (strcmd_t)); 3607 return (error); 3608 3609 case I_NREAD: 3610 /* 3611 * Return number of bytes of data in first message 3612 * in queue in "arg" and return the number of messages 3613 * in queue in return value. 3614 */ 3615 { 3616 size_t size; 3617 int retval; 3618 int count = 0; 3619 3620 mutex_enter(QLOCK(rdq)); 3621 3622 size = msgdsize(rdq->q_first); 3623 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3624 count++; 3625 3626 mutex_exit(QLOCK(rdq)); 3627 if (stp->sd_struiordq) { 3628 infod_t infod; 3629 3630 infod.d_cmd = INFOD_COUNT; 3631 infod.d_count = 0; 3632 if (count == 0) { 3633 infod.d_cmd |= INFOD_FIRSTBYTES; 3634 infod.d_bytes = 0; 3635 } 3636 infod.d_res = 0; 3637 (void) infonext(rdq, &infod); 3638 count += infod.d_count; 3639 if (infod.d_res & INFOD_FIRSTBYTES) 3640 size = infod.d_bytes; 3641 } 3642 3643 /* 3644 * Drop down from size_t to the "int" required by the 3645 * interface. Cap at INT_MAX. 3646 */ 3647 retval = MIN(size, INT_MAX); 3648 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3649 copyflag); 3650 if (!error) 3651 *rvalp = count; 3652 return (error); 3653 } 3654 3655 case FIONREAD: 3656 /* 3657 * Return number of bytes of data in all data messages 3658 * in queue in "arg". 3659 */ 3660 { 3661 size_t size = 0; 3662 int retval; 3663 3664 mutex_enter(QLOCK(rdq)); 3665 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3666 size += msgdsize(mp); 3667 mutex_exit(QLOCK(rdq)); 3668 3669 if (stp->sd_struiordq) { 3670 infod_t infod; 3671 3672 infod.d_cmd = INFOD_BYTES; 3673 infod.d_res = 0; 3674 infod.d_bytes = 0; 3675 (void) infonext(rdq, &infod); 3676 size += infod.d_bytes; 3677 } 3678 3679 /* 3680 * Drop down from size_t to the "int" required by the 3681 * interface. Cap at INT_MAX. 3682 */ 3683 retval = MIN(size, INT_MAX); 3684 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3685 copyflag); 3686 3687 *rvalp = 0; 3688 return (error); 3689 } 3690 case FIORDCHK: 3691 /* 3692 * FIORDCHK does not use arg value (like FIONREAD), 3693 * instead a count is returned. I_NREAD value may 3694 * not be accurate but safe. The real thing to do is 3695 * to add the msgdsizes of all data messages until 3696 * a non-data message. 3697 */ 3698 { 3699 size_t size = 0; 3700 3701 mutex_enter(QLOCK(rdq)); 3702 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3703 size += msgdsize(mp); 3704 mutex_exit(QLOCK(rdq)); 3705 3706 if (stp->sd_struiordq) { 3707 infod_t infod; 3708 3709 infod.d_cmd = INFOD_BYTES; 3710 infod.d_res = 0; 3711 infod.d_bytes = 0; 3712 (void) infonext(rdq, &infod); 3713 size += infod.d_bytes; 3714 } 3715 3716 /* 3717 * Since ioctl returns an int, and memory sizes under 3718 * LP64 may not fit, we return INT_MAX if the count was 3719 * actually greater. 3720 */ 3721 *rvalp = MIN(size, INT_MAX); 3722 return (0); 3723 } 3724 3725 case I_FIND: 3726 /* 3727 * Get module name. 3728 */ 3729 { 3730 char mname[FMNAMESZ + 1]; 3731 queue_t *q; 3732 3733 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3734 mname, FMNAMESZ + 1, NULL); 3735 if (error) 3736 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3737 3738 /* 3739 * Return EINVAL if we're handed a bogus module name. 3740 */ 3741 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) { 3742 TRACE_0(TR_FAC_STREAMS_FR, 3743 TR_I_CANT_FIND, "couldn't I_FIND"); 3744 return (EINVAL); 3745 } 3746 3747 *rvalp = 0; 3748 3749 /* Look downstream to see if module is there. */ 3750 claimstr(stp->sd_wrq); 3751 for (q = stp->sd_wrq->q_next; q; q = q->q_next) { 3752 if (q->q_flag & QREADR) { 3753 q = NULL; 3754 break; 3755 } 3756 if (strcmp(mname, Q2NAME(q)) == 0) 3757 break; 3758 } 3759 releasestr(stp->sd_wrq); 3760 3761 *rvalp = (q ? 1 : 0); 3762 return (error); 3763 } 3764 3765 case I_PUSH: 3766 case __I_PUSH_NOCTTY: 3767 /* 3768 * Push a module. 3769 * For the case __I_PUSH_NOCTTY push a module but 3770 * do not allocate controlling tty. See bugid 4025044 3771 */ 3772 3773 { 3774 char mname[FMNAMESZ + 1]; 3775 fmodsw_impl_t *fp; 3776 dev_t dummydev; 3777 3778 if (stp->sd_flag & STRHUP) 3779 return (ENXIO); 3780 3781 /* 3782 * Get module name and look up in fmodsw. 3783 */ 3784 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3785 mname, FMNAMESZ + 1, NULL); 3786 if (error) 3787 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3788 3789 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) == 3790 NULL) 3791 return (EINVAL); 3792 3793 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH, 3794 "I_PUSH:fp %p stp %p", fp, stp); 3795 3796 if (error = strstartplumb(stp, flag, cmd)) { 3797 fmodsw_rele(fp); 3798 return (error); 3799 } 3800 3801 /* 3802 * See if any more modules can be pushed on this stream. 3803 * Note that this check must be done after strstartplumb() 3804 * since otherwise multiple threads issuing I_PUSHes on 3805 * the same stream will be able to exceed nstrpush. 3806 */ 3807 mutex_enter(&stp->sd_lock); 3808 if (stp->sd_pushcnt >= nstrpush) { 3809 fmodsw_rele(fp); 3810 strendplumb(stp); 3811 mutex_exit(&stp->sd_lock); 3812 return (EINVAL); 3813 } 3814 mutex_exit(&stp->sd_lock); 3815 3816 /* 3817 * Push new module and call its open routine 3818 * via qattach(). Modules don't change device 3819 * numbers, so just ignore dummydev here. 3820 */ 3821 dummydev = vp->v_rdev; 3822 if ((error = qattach(rdq, &dummydev, 0, crp, fp, 3823 B_FALSE)) == 0) { 3824 if (vp->v_type == VCHR && /* sorry, no pipes allowed */ 3825 (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) { 3826 /* 3827 * try to allocate it as a controlling terminal 3828 */ 3829 (void) strctty(stp); 3830 } 3831 } 3832 3833 mutex_enter(&stp->sd_lock); 3834 3835 /* 3836 * As a performance concern we are caching the values of 3837 * q_minpsz and q_maxpsz of the module below the stream 3838 * head in the stream head. 3839 */ 3840 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 3841 rmin = stp->sd_wrq->q_next->q_minpsz; 3842 rmax = stp->sd_wrq->q_next->q_maxpsz; 3843 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 3844 3845 /* Do this processing here as a performance concern */ 3846 if (strmsgsz != 0) { 3847 if (rmax == INFPSZ) 3848 rmax = strmsgsz; 3849 else { 3850 if (vp->v_type == VFIFO) 3851 rmax = MIN(PIPE_BUF, rmax); 3852 else rmax = MIN(strmsgsz, rmax); 3853 } 3854 } 3855 3856 mutex_enter(QLOCK(wrq)); 3857 stp->sd_qn_minpsz = rmin; 3858 stp->sd_qn_maxpsz = rmax; 3859 mutex_exit(QLOCK(wrq)); 3860 3861 strendplumb(stp); 3862 mutex_exit(&stp->sd_lock); 3863 return (error); 3864 } 3865 3866 case I_POP: 3867 { 3868 queue_t *q; 3869 3870 if (stp->sd_flag & STRHUP) 3871 return (ENXIO); 3872 if (!wrq->q_next) /* for broken pipes */ 3873 return (EINVAL); 3874 3875 if (error = strstartplumb(stp, flag, cmd)) 3876 return (error); 3877 3878 /* 3879 * If there is an anchor on this stream and popping 3880 * the current module would attempt to pop through the 3881 * anchor, then disallow the pop unless we have sufficient 3882 * privileges; take the cheapest (non-locking) check 3883 * first. 3884 */ 3885 if (secpolicy_ip_config(crp, B_TRUE) != 0 || 3886 (stp->sd_anchorzone != crgetzoneid(crp))) { 3887 mutex_enter(&stp->sd_lock); 3888 /* 3889 * Anchors only apply if there's at least one 3890 * module on the stream (sd_pushcnt > 0). 3891 */ 3892 if (stp->sd_pushcnt > 0 && 3893 stp->sd_pushcnt == stp->sd_anchor && 3894 stp->sd_vnode->v_type != VFIFO) { 3895 strendplumb(stp); 3896 mutex_exit(&stp->sd_lock); 3897 if (stp->sd_anchorzone != crgetzoneid(crp)) 3898 return (EINVAL); 3899 /* Audit and report error */ 3900 return (secpolicy_ip_config(crp, B_FALSE)); 3901 } 3902 mutex_exit(&stp->sd_lock); 3903 } 3904 3905 q = wrq->q_next; 3906 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP, 3907 "I_POP:%p from %p", q, stp); 3908 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) { 3909 error = EINVAL; 3910 } else { 3911 qdetach(_RD(q), 1, flag, crp, B_FALSE); 3912 error = 0; 3913 } 3914 mutex_enter(&stp->sd_lock); 3915 3916 /* 3917 * As a performance concern we are caching the values of 3918 * q_minpsz and q_maxpsz of the module below the stream 3919 * head in the stream head. 3920 */ 3921 mutex_enter(QLOCK(wrq->q_next)); 3922 rmin = wrq->q_next->q_minpsz; 3923 rmax = wrq->q_next->q_maxpsz; 3924 mutex_exit(QLOCK(wrq->q_next)); 3925 3926 /* Do this processing here as a performance concern */ 3927 if (strmsgsz != 0) { 3928 if (rmax == INFPSZ) 3929 rmax = strmsgsz; 3930 else { 3931 if (vp->v_type == VFIFO) 3932 rmax = MIN(PIPE_BUF, rmax); 3933 else rmax = MIN(strmsgsz, rmax); 3934 } 3935 } 3936 3937 mutex_enter(QLOCK(wrq)); 3938 stp->sd_qn_minpsz = rmin; 3939 stp->sd_qn_maxpsz = rmax; 3940 mutex_exit(QLOCK(wrq)); 3941 3942 /* If we popped through the anchor, then reset the anchor. */ 3943 if (stp->sd_pushcnt < stp->sd_anchor) { 3944 stp->sd_anchor = 0; 3945 stp->sd_anchorzone = 0; 3946 } 3947 strendplumb(stp); 3948 mutex_exit(&stp->sd_lock); 3949 return (error); 3950 } 3951 3952 case _I_MUXID2FD: 3953 { 3954 /* 3955 * Create a fd for a I_PLINK'ed lower stream with a given 3956 * muxid. With the fd, application can send down ioctls, 3957 * like I_LIST, to the previously I_PLINK'ed stream. Note 3958 * that after getting the fd, the application has to do an 3959 * I_PUNLINK on the muxid before it can do any operation 3960 * on the lower stream. This is required by spec1170. 3961 * 3962 * The fd used to do this ioctl should point to the same 3963 * controlling device used to do the I_PLINK. If it uses 3964 * a different stream or an invalid muxid, I_MUXID2FD will 3965 * fail. The error code is set to EINVAL. 3966 * 3967 * The intended use of this interface is the following. 3968 * An application I_PLINK'ed a stream and exits. The fd 3969 * to the lower stream is gone. Another application 3970 * wants to get a fd to the lower stream, it uses I_MUXID2FD. 3971 */ 3972 int muxid = (int)arg; 3973 int fd; 3974 linkinfo_t *linkp; 3975 struct file *fp; 3976 netstack_t *ns; 3977 str_stack_t *ss; 3978 3979 /* 3980 * Do not allow the wildcard muxid. This ioctl is not 3981 * intended to find arbitrary link. 3982 */ 3983 if (muxid == 0) { 3984 return (EINVAL); 3985 } 3986 3987 ns = netstack_find_by_cred(crp); 3988 ASSERT(ns != NULL); 3989 ss = ns->netstack_str; 3990 ASSERT(ss != NULL); 3991 3992 mutex_enter(&muxifier); 3993 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss); 3994 if (linkp == NULL) { 3995 mutex_exit(&muxifier); 3996 netstack_rele(ss->ss_netstack); 3997 return (EINVAL); 3998 } 3999 4000 if ((fd = ufalloc(0)) == -1) { 4001 mutex_exit(&muxifier); 4002 netstack_rele(ss->ss_netstack); 4003 return (EMFILE); 4004 } 4005 fp = linkp->li_fpdown; 4006 mutex_enter(&fp->f_tlock); 4007 fp->f_count++; 4008 mutex_exit(&fp->f_tlock); 4009 mutex_exit(&muxifier); 4010 setf(fd, fp); 4011 *rvalp = fd; 4012 netstack_rele(ss->ss_netstack); 4013 return (0); 4014 } 4015 4016 case _I_INSERT: 4017 { 4018 /* 4019 * To insert a module to a given position in a stream. 4020 * In the first release, only allow privileged user 4021 * to use this ioctl. Furthermore, the insert is only allowed 4022 * below an anchor if the zoneid is the same as the zoneid 4023 * which created the anchor. 4024 * 4025 * Note that we do not plan to support this ioctl 4026 * on pipes in the first release. We want to learn more 4027 * about the implications of these ioctls before extending 4028 * their support. And we do not think these features are 4029 * valuable for pipes. 4030 */ 4031 STRUCT_DECL(strmodconf, strmodinsert); 4032 char mod_name[FMNAMESZ + 1]; 4033 fmodsw_impl_t *fp; 4034 dev_t dummydev; 4035 queue_t *tmp_wrq; 4036 int pos; 4037 boolean_t is_insert; 4038 4039 STRUCT_INIT(strmodinsert, flag); 4040 if (stp->sd_flag & STRHUP) 4041 return (ENXIO); 4042 if (STRMATED(stp)) 4043 return (EINVAL); 4044 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4045 return (error); 4046 if (stp->sd_anchor != 0 && 4047 stp->sd_anchorzone != crgetzoneid(crp)) 4048 return (EINVAL); 4049 4050 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert), 4051 STRUCT_SIZE(strmodinsert), copyflag); 4052 if (error) 4053 return (error); 4054 4055 /* 4056 * Get module name and look up in fmodsw. 4057 */ 4058 error = (copyflag & U_TO_K ? copyinstr : 4059 copystr)(STRUCT_FGETP(strmodinsert, mod_name), 4060 mod_name, FMNAMESZ + 1, NULL); 4061 if (error) 4062 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4063 4064 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) == 4065 NULL) 4066 return (EINVAL); 4067 4068 if (error = strstartplumb(stp, flag, cmd)) { 4069 fmodsw_rele(fp); 4070 return (error); 4071 } 4072 4073 /* 4074 * Is this _I_INSERT just like an I_PUSH? We need to know 4075 * this because we do some optimizations if this is a 4076 * module being pushed. 4077 */ 4078 pos = STRUCT_FGET(strmodinsert, pos); 4079 is_insert = (pos != 0); 4080 4081 /* 4082 * Make sure pos is valid. Even though it is not an I_PUSH, 4083 * we impose the same limit on the number of modules in a 4084 * stream. 4085 */ 4086 mutex_enter(&stp->sd_lock); 4087 if (stp->sd_pushcnt >= nstrpush || pos < 0 || 4088 pos > stp->sd_pushcnt) { 4089 fmodsw_rele(fp); 4090 strendplumb(stp); 4091 mutex_exit(&stp->sd_lock); 4092 return (EINVAL); 4093 } 4094 if (stp->sd_anchor != 0) { 4095 /* 4096 * Is this insert below the anchor? 4097 * Pushcnt hasn't been increased yet hence 4098 * we test for greater than here, and greater or 4099 * equal after qattach. 4100 */ 4101 if (pos > (stp->sd_pushcnt - stp->sd_anchor) && 4102 stp->sd_anchorzone != crgetzoneid(crp)) { 4103 fmodsw_rele(fp); 4104 strendplumb(stp); 4105 mutex_exit(&stp->sd_lock); 4106 return (EPERM); 4107 } 4108 } 4109 4110 mutex_exit(&stp->sd_lock); 4111 4112 /* 4113 * First find the correct position this module to 4114 * be inserted. We don't need to call claimstr() 4115 * as the stream should not be changing at this point. 4116 * 4117 * Insert new module and call its open routine 4118 * via qattach(). Modules don't change device 4119 * numbers, so just ignore dummydev here. 4120 */ 4121 for (tmp_wrq = stp->sd_wrq; pos > 0; 4122 tmp_wrq = tmp_wrq->q_next, pos--) { 4123 ASSERT(SAMESTR(tmp_wrq)); 4124 } 4125 dummydev = vp->v_rdev; 4126 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp, 4127 fp, is_insert)) != 0) { 4128 mutex_enter(&stp->sd_lock); 4129 strendplumb(stp); 4130 mutex_exit(&stp->sd_lock); 4131 return (error); 4132 } 4133 4134 mutex_enter(&stp->sd_lock); 4135 4136 /* 4137 * As a performance concern we are caching the values of 4138 * q_minpsz and q_maxpsz of the module below the stream 4139 * head in the stream head. 4140 */ 4141 if (!is_insert) { 4142 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 4143 rmin = stp->sd_wrq->q_next->q_minpsz; 4144 rmax = stp->sd_wrq->q_next->q_maxpsz; 4145 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 4146 4147 /* Do this processing here as a performance concern */ 4148 if (strmsgsz != 0) { 4149 if (rmax == INFPSZ) { 4150 rmax = strmsgsz; 4151 } else { 4152 rmax = MIN(strmsgsz, rmax); 4153 } 4154 } 4155 4156 mutex_enter(QLOCK(wrq)); 4157 stp->sd_qn_minpsz = rmin; 4158 stp->sd_qn_maxpsz = rmax; 4159 mutex_exit(QLOCK(wrq)); 4160 } 4161 4162 /* 4163 * Need to update the anchor value if this module is 4164 * inserted below the anchor point. 4165 */ 4166 if (stp->sd_anchor != 0) { 4167 pos = STRUCT_FGET(strmodinsert, pos); 4168 if (pos >= (stp->sd_pushcnt - stp->sd_anchor)) 4169 stp->sd_anchor++; 4170 } 4171 4172 strendplumb(stp); 4173 mutex_exit(&stp->sd_lock); 4174 return (0); 4175 } 4176 4177 case _I_REMOVE: 4178 { 4179 /* 4180 * To remove a module with a given name in a stream. The 4181 * caller of this ioctl needs to provide both the name and 4182 * the position of the module to be removed. This eliminates 4183 * the ambiguity of removal if a module is inserted/pushed 4184 * multiple times in a stream. In the first release, only 4185 * allow privileged user to use this ioctl. 4186 * Furthermore, the remove is only allowed 4187 * below an anchor if the zoneid is the same as the zoneid 4188 * which created the anchor. 4189 * 4190 * Note that we do not plan to support this ioctl 4191 * on pipes in the first release. We want to learn more 4192 * about the implications of these ioctls before extending 4193 * their support. And we do not think these features are 4194 * valuable for pipes. 4195 * 4196 * Also note that _I_REMOVE cannot be used to remove a 4197 * driver or the stream head. 4198 */ 4199 STRUCT_DECL(strmodconf, strmodremove); 4200 queue_t *q; 4201 int pos; 4202 char mod_name[FMNAMESZ + 1]; 4203 boolean_t is_remove; 4204 4205 STRUCT_INIT(strmodremove, flag); 4206 if (stp->sd_flag & STRHUP) 4207 return (ENXIO); 4208 if (STRMATED(stp)) 4209 return (EINVAL); 4210 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4211 return (error); 4212 if (stp->sd_anchor != 0 && 4213 stp->sd_anchorzone != crgetzoneid(crp)) 4214 return (EINVAL); 4215 4216 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove), 4217 STRUCT_SIZE(strmodremove), copyflag); 4218 if (error) 4219 return (error); 4220 4221 error = (copyflag & U_TO_K ? copyinstr : 4222 copystr)(STRUCT_FGETP(strmodremove, mod_name), 4223 mod_name, FMNAMESZ + 1, NULL); 4224 if (error) 4225 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4226 4227 if ((error = strstartplumb(stp, flag, cmd)) != 0) 4228 return (error); 4229 4230 /* 4231 * Match the name of given module to the name of module at 4232 * the given position. 4233 */ 4234 pos = STRUCT_FGET(strmodremove, pos); 4235 4236 is_remove = (pos != 0); 4237 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0; 4238 q = q->q_next, pos--) 4239 ; 4240 if (pos > 0 || !SAMESTR(q) || 4241 strcmp(Q2NAME(q), mod_name) != 0) { 4242 mutex_enter(&stp->sd_lock); 4243 strendplumb(stp); 4244 mutex_exit(&stp->sd_lock); 4245 return (EINVAL); 4246 } 4247 4248 /* 4249 * If the position is at or below an anchor, then the zoneid 4250 * must match the zoneid that created the anchor. 4251 */ 4252 if (stp->sd_anchor != 0) { 4253 pos = STRUCT_FGET(strmodremove, pos); 4254 if (pos >= (stp->sd_pushcnt - stp->sd_anchor) && 4255 stp->sd_anchorzone != crgetzoneid(crp)) { 4256 mutex_enter(&stp->sd_lock); 4257 strendplumb(stp); 4258 mutex_exit(&stp->sd_lock); 4259 return (EPERM); 4260 } 4261 } 4262 4263 4264 ASSERT(!(q->q_flag & QREADR)); 4265 qdetach(_RD(q), 1, flag, crp, is_remove); 4266 4267 mutex_enter(&stp->sd_lock); 4268 4269 /* 4270 * As a performance concern we are caching the values of 4271 * q_minpsz and q_maxpsz of the module below the stream 4272 * head in the stream head. 4273 */ 4274 if (!is_remove) { 4275 mutex_enter(QLOCK(wrq->q_next)); 4276 rmin = wrq->q_next->q_minpsz; 4277 rmax = wrq->q_next->q_maxpsz; 4278 mutex_exit(QLOCK(wrq->q_next)); 4279 4280 /* Do this processing here as a performance concern */ 4281 if (strmsgsz != 0) { 4282 if (rmax == INFPSZ) 4283 rmax = strmsgsz; 4284 else { 4285 if (vp->v_type == VFIFO) 4286 rmax = MIN(PIPE_BUF, rmax); 4287 else rmax = MIN(strmsgsz, rmax); 4288 } 4289 } 4290 4291 mutex_enter(QLOCK(wrq)); 4292 stp->sd_qn_minpsz = rmin; 4293 stp->sd_qn_maxpsz = rmax; 4294 mutex_exit(QLOCK(wrq)); 4295 } 4296 4297 /* 4298 * Need to update the anchor value if this module is removed 4299 * at or below the anchor point. If the removed module is at 4300 * the anchor point, remove the anchor for this stream if 4301 * there is no module above the anchor point. Otherwise, if 4302 * the removed module is below the anchor point, decrement the 4303 * anchor point by 1. 4304 */ 4305 if (stp->sd_anchor != 0) { 4306 pos = STRUCT_FGET(strmodremove, pos); 4307 if (pos == stp->sd_pushcnt - stp->sd_anchor + 1) 4308 stp->sd_anchor = 0; 4309 else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1)) 4310 stp->sd_anchor--; 4311 } 4312 4313 strendplumb(stp); 4314 mutex_exit(&stp->sd_lock); 4315 return (0); 4316 } 4317 4318 case I_ANCHOR: 4319 /* 4320 * Set the anchor position on the stream to reside at 4321 * the top module (in other words, the top module 4322 * cannot be popped). Anchors with a FIFO make no 4323 * obvious sense, so they're not allowed. 4324 */ 4325 mutex_enter(&stp->sd_lock); 4326 4327 if (stp->sd_vnode->v_type == VFIFO) { 4328 mutex_exit(&stp->sd_lock); 4329 return (EINVAL); 4330 } 4331 /* Only allow the same zoneid to update the anchor */ 4332 if (stp->sd_anchor != 0 && 4333 stp->sd_anchorzone != crgetzoneid(crp)) { 4334 mutex_exit(&stp->sd_lock); 4335 return (EINVAL); 4336 } 4337 stp->sd_anchor = stp->sd_pushcnt; 4338 stp->sd_anchorzone = crgetzoneid(crp); 4339 mutex_exit(&stp->sd_lock); 4340 return (0); 4341 4342 case I_LOOK: 4343 /* 4344 * Get name of first module downstream. 4345 * If no module, return an error. 4346 */ 4347 claimstr(wrq); 4348 if (_SAMESTR(wrq) && wrq->q_next->q_next != NULL) { 4349 char *name = Q2NAME(wrq->q_next); 4350 4351 error = strcopyout(name, (void *)arg, strlen(name) + 1, 4352 copyflag); 4353 releasestr(wrq); 4354 return (error); 4355 } 4356 releasestr(wrq); 4357 return (EINVAL); 4358 4359 case I_LINK: 4360 case I_PLINK: 4361 /* 4362 * Link a multiplexor. 4363 */ 4364 return (mlink(vp, cmd, (int)arg, crp, rvalp, 0)); 4365 4366 case _I_PLINK_LH: 4367 /* 4368 * Link a multiplexor: Call must originate from kernel. 4369 */ 4370 if (kioctl) 4371 return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp)); 4372 4373 return (EINVAL); 4374 case I_UNLINK: 4375 case I_PUNLINK: 4376 /* 4377 * Unlink a multiplexor. 4378 * If arg is -1, unlink all links for which this is the 4379 * controlling stream. Otherwise, arg is an index number 4380 * for a link to be removed. 4381 */ 4382 { 4383 struct linkinfo *linkp; 4384 int native_arg = (int)arg; 4385 int type; 4386 netstack_t *ns; 4387 str_stack_t *ss; 4388 4389 TRACE_1(TR_FAC_STREAMS_FR, 4390 TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp); 4391 if (vp->v_type == VFIFO) { 4392 return (EINVAL); 4393 } 4394 if (cmd == I_UNLINK) 4395 type = LINKNORMAL; 4396 else /* I_PUNLINK */ 4397 type = LINKPERSIST; 4398 if (native_arg == 0) { 4399 return (EINVAL); 4400 } 4401 ns = netstack_find_by_cred(crp); 4402 ASSERT(ns != NULL); 4403 ss = ns->netstack_str; 4404 ASSERT(ss != NULL); 4405 4406 if (native_arg == MUXID_ALL) 4407 error = munlinkall(stp, type, crp, rvalp, ss); 4408 else { 4409 mutex_enter(&muxifier); 4410 if (!(linkp = findlinks(stp, (int)arg, type, ss))) { 4411 /* invalid user supplied index number */ 4412 mutex_exit(&muxifier); 4413 netstack_rele(ss->ss_netstack); 4414 return (EINVAL); 4415 } 4416 /* munlink drops the muxifier lock */ 4417 error = munlink(stp, linkp, type, crp, rvalp, ss); 4418 } 4419 netstack_rele(ss->ss_netstack); 4420 return (error); 4421 } 4422 4423 case I_FLUSH: 4424 /* 4425 * send a flush message downstream 4426 * flush message can indicate 4427 * FLUSHR - flush read queue 4428 * FLUSHW - flush write queue 4429 * FLUSHRW - flush read/write queue 4430 */ 4431 if (stp->sd_flag & STRHUP) 4432 return (ENXIO); 4433 if (arg & ~FLUSHRW) 4434 return (EINVAL); 4435 4436 for (;;) { 4437 if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) { 4438 break; 4439 } 4440 if (error = strwaitbuf(1, BPRI_HI)) { 4441 return (error); 4442 } 4443 } 4444 4445 /* 4446 * Send down an unsupported ioctl and wait for the nack 4447 * in order to allow the M_FLUSH to propagate back 4448 * up to the stream head. 4449 * Replaces if (qready()) runqueues(); 4450 */ 4451 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4452 strioc.ic_timout = 0; 4453 strioc.ic_len = 0; 4454 strioc.ic_dp = NULL; 4455 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4456 *rvalp = 0; 4457 return (0); 4458 4459 case I_FLUSHBAND: 4460 { 4461 struct bandinfo binfo; 4462 4463 error = strcopyin((void *)arg, &binfo, sizeof (binfo), 4464 copyflag); 4465 if (error) 4466 return (error); 4467 if (stp->sd_flag & STRHUP) 4468 return (ENXIO); 4469 if (binfo.bi_flag & ~FLUSHRW) 4470 return (EINVAL); 4471 while (!(mp = allocb(2, BPRI_HI))) { 4472 if (error = strwaitbuf(2, BPRI_HI)) 4473 return (error); 4474 } 4475 mp->b_datap->db_type = M_FLUSH; 4476 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND; 4477 *mp->b_wptr++ = binfo.bi_pri; 4478 putnext(stp->sd_wrq, mp); 4479 /* 4480 * Send down an unsupported ioctl and wait for the nack 4481 * in order to allow the M_FLUSH to propagate back 4482 * up to the stream head. 4483 * Replaces if (qready()) runqueues(); 4484 */ 4485 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4486 strioc.ic_timout = 0; 4487 strioc.ic_len = 0; 4488 strioc.ic_dp = NULL; 4489 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4490 *rvalp = 0; 4491 return (0); 4492 } 4493 4494 case I_SRDOPT: 4495 /* 4496 * Set read options 4497 * 4498 * RNORM - default stream mode 4499 * RMSGN - message no discard 4500 * RMSGD - message discard 4501 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs 4502 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs 4503 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs 4504 */ 4505 if (arg & ~(RMODEMASK | RPROTMASK)) 4506 return (EINVAL); 4507 4508 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN)) 4509 return (EINVAL); 4510 4511 mutex_enter(&stp->sd_lock); 4512 switch (arg & RMODEMASK) { 4513 case RNORM: 4514 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 4515 break; 4516 case RMSGD: 4517 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) | 4518 RD_MSGDIS; 4519 break; 4520 case RMSGN: 4521 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) | 4522 RD_MSGNODIS; 4523 break; 4524 } 4525 4526 switch (arg & RPROTMASK) { 4527 case RPROTNORM: 4528 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 4529 break; 4530 4531 case RPROTDAT: 4532 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) | 4533 RD_PROTDAT); 4534 break; 4535 4536 case RPROTDIS: 4537 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) | 4538 RD_PROTDIS); 4539 break; 4540 } 4541 mutex_exit(&stp->sd_lock); 4542 return (0); 4543 4544 case I_GRDOPT: 4545 /* 4546 * Get read option and return the value 4547 * to spot pointed to by arg 4548 */ 4549 { 4550 int rdopt; 4551 4552 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD : 4553 ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM)); 4554 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT : 4555 ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM)); 4556 4557 return (strcopyout(&rdopt, (void *)arg, sizeof (int), 4558 copyflag)); 4559 } 4560 4561 case I_SERROPT: 4562 /* 4563 * Set error options 4564 * 4565 * RERRNORM - persistent read errors 4566 * RERRNONPERSIST - non-persistent read errors 4567 * WERRNORM - persistent write errors 4568 * WERRNONPERSIST - non-persistent write errors 4569 */ 4570 if (arg & ~(RERRMASK | WERRMASK)) 4571 return (EINVAL); 4572 4573 mutex_enter(&stp->sd_lock); 4574 switch (arg & RERRMASK) { 4575 case RERRNORM: 4576 stp->sd_flag &= ~STRDERRNONPERSIST; 4577 break; 4578 case RERRNONPERSIST: 4579 stp->sd_flag |= STRDERRNONPERSIST; 4580 break; 4581 } 4582 switch (arg & WERRMASK) { 4583 case WERRNORM: 4584 stp->sd_flag &= ~STWRERRNONPERSIST; 4585 break; 4586 case WERRNONPERSIST: 4587 stp->sd_flag |= STWRERRNONPERSIST; 4588 break; 4589 } 4590 mutex_exit(&stp->sd_lock); 4591 return (0); 4592 4593 case I_GERROPT: 4594 /* 4595 * Get error option and return the value 4596 * to spot pointed to by arg 4597 */ 4598 { 4599 int erropt = 0; 4600 4601 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST : 4602 RERRNORM; 4603 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST : 4604 WERRNORM; 4605 return (strcopyout(&erropt, (void *)arg, sizeof (int), 4606 copyflag)); 4607 } 4608 4609 case I_SETSIG: 4610 /* 4611 * Register the calling proc to receive the SIGPOLL 4612 * signal based on the events given in arg. If 4613 * arg is zero, remove the proc from register list. 4614 */ 4615 { 4616 strsig_t *ssp, *pssp; 4617 struct pid *pidp; 4618 4619 pssp = NULL; 4620 pidp = curproc->p_pidp; 4621 /* 4622 * Hold sd_lock to prevent traversal of sd_siglist while 4623 * it is modified. 4624 */ 4625 mutex_enter(&stp->sd_lock); 4626 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp); 4627 pssp = ssp, ssp = ssp->ss_next) 4628 ; 4629 4630 if (arg) { 4631 if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4632 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4633 mutex_exit(&stp->sd_lock); 4634 return (EINVAL); 4635 } 4636 if ((arg & S_BANDURG) && !(arg & S_RDBAND)) { 4637 mutex_exit(&stp->sd_lock); 4638 return (EINVAL); 4639 } 4640 4641 /* 4642 * If proc not already registered, add it 4643 * to list. 4644 */ 4645 if (!ssp) { 4646 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4647 ssp->ss_pidp = pidp; 4648 ssp->ss_pid = pidp->pid_id; 4649 ssp->ss_next = NULL; 4650 if (pssp) 4651 pssp->ss_next = ssp; 4652 else 4653 stp->sd_siglist = ssp; 4654 mutex_enter(&pidlock); 4655 PID_HOLD(pidp); 4656 mutex_exit(&pidlock); 4657 } 4658 4659 /* 4660 * Set events. 4661 */ 4662 ssp->ss_events = (int)arg; 4663 } else { 4664 /* 4665 * Remove proc from register list. 4666 */ 4667 if (ssp) { 4668 mutex_enter(&pidlock); 4669 PID_RELE(pidp); 4670 mutex_exit(&pidlock); 4671 if (pssp) 4672 pssp->ss_next = ssp->ss_next; 4673 else 4674 stp->sd_siglist = ssp->ss_next; 4675 kmem_free(ssp, sizeof (strsig_t)); 4676 } else { 4677 mutex_exit(&stp->sd_lock); 4678 return (EINVAL); 4679 } 4680 } 4681 4682 /* 4683 * Recalculate OR of sig events. 4684 */ 4685 stp->sd_sigflags = 0; 4686 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4687 stp->sd_sigflags |= ssp->ss_events; 4688 mutex_exit(&stp->sd_lock); 4689 return (0); 4690 } 4691 4692 case I_GETSIG: 4693 /* 4694 * Return (in arg) the current registration of events 4695 * for which the calling proc is to be signaled. 4696 */ 4697 { 4698 struct strsig *ssp; 4699 struct pid *pidp; 4700 4701 pidp = curproc->p_pidp; 4702 mutex_enter(&stp->sd_lock); 4703 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4704 if (ssp->ss_pidp == pidp) { 4705 error = strcopyout(&ssp->ss_events, (void *)arg, 4706 sizeof (int), copyflag); 4707 mutex_exit(&stp->sd_lock); 4708 return (error); 4709 } 4710 mutex_exit(&stp->sd_lock); 4711 return (EINVAL); 4712 } 4713 4714 case I_ESETSIG: 4715 /* 4716 * Register the ss_pid to receive the SIGPOLL 4717 * signal based on the events is ss_events arg. If 4718 * ss_events is zero, remove the proc from register list. 4719 */ 4720 { 4721 struct strsig *ssp, *pssp; 4722 struct proc *proc; 4723 struct pid *pidp; 4724 pid_t pid; 4725 struct strsigset ss; 4726 4727 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4728 if (error) 4729 return (error); 4730 4731 pid = ss.ss_pid; 4732 4733 if (ss.ss_events != 0) { 4734 /* 4735 * Permissions check by sending signal 0. 4736 * Note that when kill fails it does a set_errno 4737 * causing the system call to fail. 4738 */ 4739 error = kill(pid, 0); 4740 if (error) { 4741 return (error); 4742 } 4743 } 4744 mutex_enter(&pidlock); 4745 if (pid == 0) 4746 proc = curproc; 4747 else if (pid < 0) 4748 proc = pgfind(-pid); 4749 else 4750 proc = prfind(pid); 4751 if (proc == NULL) { 4752 mutex_exit(&pidlock); 4753 return (ESRCH); 4754 } 4755 if (pid < 0) 4756 pidp = proc->p_pgidp; 4757 else 4758 pidp = proc->p_pidp; 4759 ASSERT(pidp); 4760 /* 4761 * Get a hold on the pid structure while referencing it. 4762 * There is a separate PID_HOLD should it be inserted 4763 * in the list below. 4764 */ 4765 PID_HOLD(pidp); 4766 mutex_exit(&pidlock); 4767 4768 pssp = NULL; 4769 /* 4770 * Hold sd_lock to prevent traversal of sd_siglist while 4771 * it is modified. 4772 */ 4773 mutex_enter(&stp->sd_lock); 4774 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid); 4775 pssp = ssp, ssp = ssp->ss_next) 4776 ; 4777 4778 if (ss.ss_events) { 4779 if (ss.ss_events & 4780 ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4781 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4782 mutex_exit(&stp->sd_lock); 4783 mutex_enter(&pidlock); 4784 PID_RELE(pidp); 4785 mutex_exit(&pidlock); 4786 return (EINVAL); 4787 } 4788 if ((ss.ss_events & S_BANDURG) && 4789 !(ss.ss_events & S_RDBAND)) { 4790 mutex_exit(&stp->sd_lock); 4791 mutex_enter(&pidlock); 4792 PID_RELE(pidp); 4793 mutex_exit(&pidlock); 4794 return (EINVAL); 4795 } 4796 4797 /* 4798 * If proc not already registered, add it 4799 * to list. 4800 */ 4801 if (!ssp) { 4802 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4803 ssp->ss_pidp = pidp; 4804 ssp->ss_pid = pid; 4805 ssp->ss_next = NULL; 4806 if (pssp) 4807 pssp->ss_next = ssp; 4808 else 4809 stp->sd_siglist = ssp; 4810 mutex_enter(&pidlock); 4811 PID_HOLD(pidp); 4812 mutex_exit(&pidlock); 4813 } 4814 4815 /* 4816 * Set events. 4817 */ 4818 ssp->ss_events = ss.ss_events; 4819 } else { 4820 /* 4821 * Remove proc from register list. 4822 */ 4823 if (ssp) { 4824 mutex_enter(&pidlock); 4825 PID_RELE(pidp); 4826 mutex_exit(&pidlock); 4827 if (pssp) 4828 pssp->ss_next = ssp->ss_next; 4829 else 4830 stp->sd_siglist = ssp->ss_next; 4831 kmem_free(ssp, sizeof (strsig_t)); 4832 } else { 4833 mutex_exit(&stp->sd_lock); 4834 mutex_enter(&pidlock); 4835 PID_RELE(pidp); 4836 mutex_exit(&pidlock); 4837 return (EINVAL); 4838 } 4839 } 4840 4841 /* 4842 * Recalculate OR of sig events. 4843 */ 4844 stp->sd_sigflags = 0; 4845 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4846 stp->sd_sigflags |= ssp->ss_events; 4847 mutex_exit(&stp->sd_lock); 4848 mutex_enter(&pidlock); 4849 PID_RELE(pidp); 4850 mutex_exit(&pidlock); 4851 return (0); 4852 } 4853 4854 case I_EGETSIG: 4855 /* 4856 * Return (in arg) the current registration of events 4857 * for which the calling proc is to be signaled. 4858 */ 4859 { 4860 struct strsig *ssp; 4861 struct proc *proc; 4862 pid_t pid; 4863 struct pid *pidp; 4864 struct strsigset ss; 4865 4866 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4867 if (error) 4868 return (error); 4869 4870 pid = ss.ss_pid; 4871 mutex_enter(&pidlock); 4872 if (pid == 0) 4873 proc = curproc; 4874 else if (pid < 0) 4875 proc = pgfind(-pid); 4876 else 4877 proc = prfind(pid); 4878 if (proc == NULL) { 4879 mutex_exit(&pidlock); 4880 return (ESRCH); 4881 } 4882 if (pid < 0) 4883 pidp = proc->p_pgidp; 4884 else 4885 pidp = proc->p_pidp; 4886 4887 /* Prevent the pidp from being reassigned */ 4888 PID_HOLD(pidp); 4889 mutex_exit(&pidlock); 4890 4891 mutex_enter(&stp->sd_lock); 4892 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4893 if (ssp->ss_pid == pid) { 4894 ss.ss_pid = ssp->ss_pid; 4895 ss.ss_events = ssp->ss_events; 4896 error = strcopyout(&ss, (void *)arg, 4897 sizeof (struct strsigset), copyflag); 4898 mutex_exit(&stp->sd_lock); 4899 mutex_enter(&pidlock); 4900 PID_RELE(pidp); 4901 mutex_exit(&pidlock); 4902 return (error); 4903 } 4904 mutex_exit(&stp->sd_lock); 4905 mutex_enter(&pidlock); 4906 PID_RELE(pidp); 4907 mutex_exit(&pidlock); 4908 return (EINVAL); 4909 } 4910 4911 case I_PEEK: 4912 { 4913 STRUCT_DECL(strpeek, strpeek); 4914 size_t n; 4915 mblk_t *fmp, *tmp_mp = NULL; 4916 4917 STRUCT_INIT(strpeek, flag); 4918 4919 error = strcopyin((void *)arg, STRUCT_BUF(strpeek), 4920 STRUCT_SIZE(strpeek), copyflag); 4921 if (error) 4922 return (error); 4923 4924 mutex_enter(QLOCK(rdq)); 4925 /* 4926 * Skip the invalid messages 4927 */ 4928 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 4929 if (mp->b_datap->db_type != M_SIG) 4930 break; 4931 4932 /* 4933 * If user has requested to peek at a high priority message 4934 * and first message is not, return 0 4935 */ 4936 if (mp != NULL) { 4937 if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) && 4938 queclass(mp) == QNORM) { 4939 *rvalp = 0; 4940 mutex_exit(QLOCK(rdq)); 4941 return (0); 4942 } 4943 } else if (stp->sd_struiordq == NULL || 4944 (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) { 4945 /* 4946 * No mblks to look at at the streamhead and 4947 * 1). This isn't a synch stream or 4948 * 2). This is a synch stream but caller wants high 4949 * priority messages which is not supported by 4950 * the synch stream. (it only supports QNORM) 4951 */ 4952 *rvalp = 0; 4953 mutex_exit(QLOCK(rdq)); 4954 return (0); 4955 } 4956 4957 fmp = mp; 4958 4959 if (mp && mp->b_datap->db_type == M_PASSFP) { 4960 mutex_exit(QLOCK(rdq)); 4961 return (EBADMSG); 4962 } 4963 4964 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO || 4965 mp->b_datap->db_type == M_PROTO || 4966 mp->b_datap->db_type == M_DATA); 4967 4968 if (mp && mp->b_datap->db_type == M_PCPROTO) { 4969 STRUCT_FSET(strpeek, flags, RS_HIPRI); 4970 } else { 4971 STRUCT_FSET(strpeek, flags, 0); 4972 } 4973 4974 4975 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) { 4976 mutex_exit(QLOCK(rdq)); 4977 return (ENOSR); 4978 } 4979 mutex_exit(QLOCK(rdq)); 4980 4981 /* 4982 * set mp = tmp_mp, so that I_PEEK processing can continue. 4983 * tmp_mp is used to free the dup'd message. 4984 */ 4985 mp = tmp_mp; 4986 4987 uio.uio_fmode = 0; 4988 uio.uio_extflg = UIO_COPY_CACHED; 4989 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 4990 UIO_SYSSPACE; 4991 uio.uio_limit = 0; 4992 /* 4993 * First process PROTO blocks, if any. 4994 * If user doesn't want to get ctl info by setting maxlen <= 0, 4995 * then set len to -1/0 and skip control blocks part. 4996 */ 4997 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0) 4998 STRUCT_FSET(strpeek, ctlbuf.len, -1); 4999 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0) 5000 STRUCT_FSET(strpeek, ctlbuf.len, 0); 5001 else { 5002 int ctl_part = 0; 5003 5004 iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf); 5005 iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen); 5006 uio.uio_iov = &iov; 5007 uio.uio_resid = iov.iov_len; 5008 uio.uio_loffset = 0; 5009 uio.uio_iovcnt = 1; 5010 while (mp && mp->b_datap->db_type != M_DATA && 5011 uio.uio_resid >= 0) { 5012 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ? 5013 mp->b_datap->db_type == M_PROTO : 5014 mp->b_datap->db_type == M_PCPROTO); 5015 5016 if ((n = MIN(uio.uio_resid, 5017 mp->b_wptr - mp->b_rptr)) != 0 && 5018 (error = uiomove((char *)mp->b_rptr, n, 5019 UIO_READ, &uio)) != 0) { 5020 freemsg(tmp_mp); 5021 return (error); 5022 } 5023 ctl_part = 1; 5024 mp = mp->b_cont; 5025 } 5026 /* No ctl message */ 5027 if (ctl_part == 0) 5028 STRUCT_FSET(strpeek, ctlbuf.len, -1); 5029 else 5030 STRUCT_FSET(strpeek, ctlbuf.len, 5031 STRUCT_FGET(strpeek, ctlbuf.maxlen) - 5032 uio.uio_resid); 5033 } 5034 5035 /* 5036 * Now process DATA blocks, if any. 5037 * If user doesn't want to get data info by setting maxlen <= 0, 5038 * then set len to -1/0 and skip data blocks part. 5039 */ 5040 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0) 5041 STRUCT_FSET(strpeek, databuf.len, -1); 5042 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0) 5043 STRUCT_FSET(strpeek, databuf.len, 0); 5044 else { 5045 int data_part = 0; 5046 5047 iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf); 5048 iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen); 5049 uio.uio_iov = &iov; 5050 uio.uio_resid = iov.iov_len; 5051 uio.uio_loffset = 0; 5052 uio.uio_iovcnt = 1; 5053 while (mp && uio.uio_resid) { 5054 if (mp->b_datap->db_type == M_DATA) { 5055 if ((n = MIN(uio.uio_resid, 5056 mp->b_wptr - mp->b_rptr)) != 0 && 5057 (error = uiomove((char *)mp->b_rptr, 5058 n, UIO_READ, &uio)) != 0) { 5059 freemsg(tmp_mp); 5060 return (error); 5061 } 5062 data_part = 1; 5063 } 5064 ASSERT(data_part == 0 || 5065 mp->b_datap->db_type == M_DATA); 5066 mp = mp->b_cont; 5067 } 5068 /* No data message */ 5069 if (data_part == 0) 5070 STRUCT_FSET(strpeek, databuf.len, -1); 5071 else 5072 STRUCT_FSET(strpeek, databuf.len, 5073 STRUCT_FGET(strpeek, databuf.maxlen) - 5074 uio.uio_resid); 5075 } 5076 freemsg(tmp_mp); 5077 5078 /* 5079 * It is a synch stream and user wants to get 5080 * data (maxlen > 0). 5081 * uio setup is done by the codes that process DATA 5082 * blocks above. 5083 */ 5084 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) { 5085 infod_t infod; 5086 5087 infod.d_cmd = INFOD_COPYOUT; 5088 infod.d_res = 0; 5089 infod.d_uiop = &uio; 5090 error = infonext(rdq, &infod); 5091 if (error == EINVAL || error == EBUSY) 5092 error = 0; 5093 if (error) 5094 return (error); 5095 STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek, 5096 databuf.maxlen) - uio.uio_resid); 5097 if (STRUCT_FGET(strpeek, databuf.len) == 0) { 5098 /* 5099 * No data found by the infonext(). 5100 */ 5101 STRUCT_FSET(strpeek, databuf.len, -1); 5102 } 5103 } 5104 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg, 5105 STRUCT_SIZE(strpeek), copyflag); 5106 if (error) { 5107 return (error); 5108 } 5109 /* 5110 * If there is no message retrieved, set return code to 0 5111 * otherwise, set it to 1. 5112 */ 5113 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 && 5114 STRUCT_FGET(strpeek, databuf.len) == -1) 5115 *rvalp = 0; 5116 else 5117 *rvalp = 1; 5118 return (0); 5119 } 5120 5121 case I_FDINSERT: 5122 { 5123 STRUCT_DECL(strfdinsert, strfdinsert); 5124 struct file *resftp; 5125 struct stdata *resstp; 5126 t_uscalar_t ival; 5127 ssize_t msgsize; 5128 struct strbuf mctl; 5129 5130 STRUCT_INIT(strfdinsert, flag); 5131 if (stp->sd_flag & STRHUP) 5132 return (ENXIO); 5133 /* 5134 * STRDERR, STWRERR and STPLEX tested above. 5135 */ 5136 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert), 5137 STRUCT_SIZE(strfdinsert), copyflag); 5138 if (error) 5139 return (error); 5140 5141 if (STRUCT_FGET(strfdinsert, offset) < 0 || 5142 (STRUCT_FGET(strfdinsert, offset) % 5143 sizeof (t_uscalar_t)) != 0) 5144 return (EINVAL); 5145 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) { 5146 if ((resstp = resftp->f_vnode->v_stream) == NULL) { 5147 releasef(STRUCT_FGET(strfdinsert, fildes)); 5148 return (EINVAL); 5149 } 5150 } else 5151 return (EINVAL); 5152 5153 mutex_enter(&resstp->sd_lock); 5154 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) { 5155 error = strgeterr(resstp, 5156 STRDERR|STWRERR|STRHUP|STPLEX, 0); 5157 if (error != 0) { 5158 mutex_exit(&resstp->sd_lock); 5159 releasef(STRUCT_FGET(strfdinsert, fildes)); 5160 return (error); 5161 } 5162 } 5163 mutex_exit(&resstp->sd_lock); 5164 5165 #ifdef _ILP32 5166 { 5167 queue_t *q; 5168 queue_t *mate = NULL; 5169 5170 /* get read queue of stream terminus */ 5171 claimstr(resstp->sd_wrq); 5172 for (q = resstp->sd_wrq->q_next; q->q_next != NULL; 5173 q = q->q_next) 5174 if (!STRMATED(resstp) && STREAM(q) != resstp && 5175 mate == NULL) { 5176 ASSERT(q->q_qinfo->qi_srvp); 5177 ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp); 5178 claimstr(q); 5179 mate = q; 5180 } 5181 q = _RD(q); 5182 if (mate) 5183 releasestr(mate); 5184 releasestr(resstp->sd_wrq); 5185 ival = (t_uscalar_t)q; 5186 } 5187 #else 5188 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev); 5189 #endif /* _ILP32 */ 5190 5191 if (STRUCT_FGET(strfdinsert, ctlbuf.len) < 5192 STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) { 5193 releasef(STRUCT_FGET(strfdinsert, fildes)); 5194 return (EINVAL); 5195 } 5196 5197 /* 5198 * Check for legal flag value. 5199 */ 5200 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) { 5201 releasef(STRUCT_FGET(strfdinsert, fildes)); 5202 return (EINVAL); 5203 } 5204 5205 /* get these values from those cached in the stream head */ 5206 mutex_enter(QLOCK(stp->sd_wrq)); 5207 rmin = stp->sd_qn_minpsz; 5208 rmax = stp->sd_qn_maxpsz; 5209 mutex_exit(QLOCK(stp->sd_wrq)); 5210 5211 /* 5212 * Make sure ctl and data sizes together fall within 5213 * the limits of the max and min receive packet sizes 5214 * and do not exceed system limit. A negative data 5215 * length means that no data part is to be sent. 5216 */ 5217 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 5218 if (rmax == 0) { 5219 releasef(STRUCT_FGET(strfdinsert, fildes)); 5220 return (ERANGE); 5221 } 5222 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0) 5223 msgsize = 0; 5224 if ((msgsize < rmin) || 5225 ((msgsize > rmax) && (rmax != INFPSZ)) || 5226 (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) { 5227 releasef(STRUCT_FGET(strfdinsert, fildes)); 5228 return (ERANGE); 5229 } 5230 5231 mutex_enter(&stp->sd_lock); 5232 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) && 5233 !canputnext(stp->sd_wrq)) { 5234 if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, 5235 flag, -1, &done)) != 0 || done) { 5236 mutex_exit(&stp->sd_lock); 5237 releasef(STRUCT_FGET(strfdinsert, fildes)); 5238 return (error); 5239 } 5240 if ((error = i_straccess(stp, access)) != 0) { 5241 mutex_exit(&stp->sd_lock); 5242 releasef( 5243 STRUCT_FGET(strfdinsert, fildes)); 5244 return (error); 5245 } 5246 } 5247 mutex_exit(&stp->sd_lock); 5248 5249 /* 5250 * Copy strfdinsert.ctlbuf into native form of 5251 * ctlbuf to pass down into strmakemsg(). 5252 */ 5253 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen); 5254 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len); 5255 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf); 5256 5257 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf); 5258 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len); 5259 uio.uio_iov = &iov; 5260 uio.uio_iovcnt = 1; 5261 uio.uio_loffset = 0; 5262 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 5263 UIO_SYSSPACE; 5264 uio.uio_fmode = 0; 5265 uio.uio_extflg = UIO_COPY_CACHED; 5266 uio.uio_resid = iov.iov_len; 5267 if ((error = strmakemsg(&mctl, 5268 &msgsize, &uio, stp, 5269 STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) { 5270 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5271 releasef(STRUCT_FGET(strfdinsert, fildes)); 5272 return (error); 5273 } 5274 5275 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5276 5277 /* 5278 * Place the possibly reencoded queue pointer 'offset' bytes 5279 * from the start of the control portion of the message. 5280 */ 5281 *((t_uscalar_t *)(mp->b_rptr + 5282 STRUCT_FGET(strfdinsert, offset))) = ival; 5283 5284 /* 5285 * Put message downstream. 5286 */ 5287 stream_willservice(stp); 5288 putnext(stp->sd_wrq, mp); 5289 stream_runservice(stp); 5290 releasef(STRUCT_FGET(strfdinsert, fildes)); 5291 return (error); 5292 } 5293 5294 case I_SENDFD: 5295 { 5296 struct file *fp; 5297 5298 if ((fp = getf((int)arg)) == NULL) 5299 return (EBADF); 5300 error = do_sendfp(stp, fp, crp); 5301 if (auditing) { 5302 audit_fdsend((int)arg, fp, error); 5303 } 5304 releasef((int)arg); 5305 return (error); 5306 } 5307 5308 case I_RECVFD: 5309 case I_E_RECVFD: 5310 { 5311 struct k_strrecvfd *srf; 5312 int i, fd; 5313 5314 mutex_enter(&stp->sd_lock); 5315 while (!(mp = getq(rdq))) { 5316 if (stp->sd_flag & (STRHUP|STREOF)) { 5317 mutex_exit(&stp->sd_lock); 5318 return (ENXIO); 5319 } 5320 if ((error = strwaitq(stp, GETWAIT, (ssize_t)0, 5321 flag, -1, &done)) != 0 || done) { 5322 mutex_exit(&stp->sd_lock); 5323 return (error); 5324 } 5325 if ((error = i_straccess(stp, access)) != 0) { 5326 mutex_exit(&stp->sd_lock); 5327 return (error); 5328 } 5329 } 5330 if (mp->b_datap->db_type != M_PASSFP) { 5331 putback(stp, rdq, mp, mp->b_band); 5332 mutex_exit(&stp->sd_lock); 5333 return (EBADMSG); 5334 } 5335 mutex_exit(&stp->sd_lock); 5336 5337 srf = (struct k_strrecvfd *)mp->b_rptr; 5338 if ((fd = ufalloc(0)) == -1) { 5339 mutex_enter(&stp->sd_lock); 5340 putback(stp, rdq, mp, mp->b_band); 5341 mutex_exit(&stp->sd_lock); 5342 return (EMFILE); 5343 } 5344 if (cmd == I_RECVFD) { 5345 struct o_strrecvfd ostrfd; 5346 5347 /* check to see if uid/gid values are too large. */ 5348 5349 if (srf->uid > (o_uid_t)USHRT_MAX || 5350 srf->gid > (o_gid_t)USHRT_MAX) { 5351 mutex_enter(&stp->sd_lock); 5352 putback(stp, rdq, mp, mp->b_band); 5353 mutex_exit(&stp->sd_lock); 5354 setf(fd, NULL); /* release fd entry */ 5355 return (EOVERFLOW); 5356 } 5357 5358 ostrfd.fd = fd; 5359 ostrfd.uid = (o_uid_t)srf->uid; 5360 ostrfd.gid = (o_gid_t)srf->gid; 5361 5362 /* Null the filler bits */ 5363 for (i = 0; i < 8; i++) 5364 ostrfd.fill[i] = 0; 5365 5366 error = strcopyout(&ostrfd, (void *)arg, 5367 sizeof (struct o_strrecvfd), copyflag); 5368 } else { /* I_E_RECVFD */ 5369 struct strrecvfd strfd; 5370 5371 strfd.fd = fd; 5372 strfd.uid = srf->uid; 5373 strfd.gid = srf->gid; 5374 5375 /* null the filler bits */ 5376 for (i = 0; i < 8; i++) 5377 strfd.fill[i] = 0; 5378 5379 error = strcopyout(&strfd, (void *)arg, 5380 sizeof (struct strrecvfd), copyflag); 5381 } 5382 5383 if (error) { 5384 setf(fd, NULL); /* release fd entry */ 5385 mutex_enter(&stp->sd_lock); 5386 putback(stp, rdq, mp, mp->b_band); 5387 mutex_exit(&stp->sd_lock); 5388 return (error); 5389 } 5390 if (auditing) { 5391 audit_fdrecv(fd, srf->fp); 5392 } 5393 5394 /* 5395 * Always increment f_count since the freemsg() below will 5396 * always call free_passfp() which performs a closef(). 5397 */ 5398 mutex_enter(&srf->fp->f_tlock); 5399 srf->fp->f_count++; 5400 mutex_exit(&srf->fp->f_tlock); 5401 setf(fd, srf->fp); 5402 freemsg(mp); 5403 return (0); 5404 } 5405 5406 case I_SWROPT: 5407 /* 5408 * Set/clear the write options. arg is a bit 5409 * mask with any of the following bits set... 5410 * SNDZERO - send zero length message 5411 * SNDPIPE - send sigpipe to process if 5412 * sd_werror is set and process is 5413 * doing a write or putmsg. 5414 * The new stream head write options should reflect 5415 * what is in arg. 5416 */ 5417 if (arg & ~(SNDZERO|SNDPIPE)) 5418 return (EINVAL); 5419 5420 mutex_enter(&stp->sd_lock); 5421 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO); 5422 if (arg & SNDZERO) 5423 stp->sd_wput_opt |= SW_SNDZERO; 5424 if (arg & SNDPIPE) 5425 stp->sd_wput_opt |= SW_SIGPIPE; 5426 mutex_exit(&stp->sd_lock); 5427 return (0); 5428 5429 case I_GWROPT: 5430 { 5431 int wropt = 0; 5432 5433 if (stp->sd_wput_opt & SW_SNDZERO) 5434 wropt |= SNDZERO; 5435 if (stp->sd_wput_opt & SW_SIGPIPE) 5436 wropt |= SNDPIPE; 5437 return (strcopyout(&wropt, (void *)arg, sizeof (wropt), 5438 copyflag)); 5439 } 5440 5441 case I_LIST: 5442 /* 5443 * Returns all the modules found on this stream, 5444 * upto the driver. If argument is NULL, return the 5445 * number of modules (including driver). If argument 5446 * is not NULL, copy the names into the structure 5447 * provided. 5448 */ 5449 5450 { 5451 queue_t *q; 5452 char *qname; 5453 int i, nmods; 5454 struct str_mlist *mlist; 5455 STRUCT_DECL(str_list, strlist); 5456 5457 if (arg == NULL) { /* Return number of modules plus driver */ 5458 if (stp->sd_vnode->v_type == VFIFO) 5459 *rvalp = stp->sd_pushcnt; 5460 else 5461 *rvalp = stp->sd_pushcnt + 1; 5462 return (0); 5463 } 5464 5465 STRUCT_INIT(strlist, flag); 5466 5467 error = strcopyin((void *)arg, STRUCT_BUF(strlist), 5468 STRUCT_SIZE(strlist), copyflag); 5469 if (error != 0) 5470 return (error); 5471 5472 mlist = STRUCT_FGETP(strlist, sl_modlist); 5473 nmods = STRUCT_FGET(strlist, sl_nmods); 5474 if (nmods <= 0) 5475 return (EINVAL); 5476 5477 claimstr(stp->sd_wrq); 5478 q = stp->sd_wrq; 5479 for (i = 0; i < nmods && _SAMESTR(q); i++, q = q->q_next) { 5480 qname = Q2NAME(q->q_next); 5481 error = strcopyout(qname, &mlist[i], strlen(qname) + 1, 5482 copyflag); 5483 if (error != 0) { 5484 releasestr(stp->sd_wrq); 5485 return (error); 5486 } 5487 } 5488 releasestr(stp->sd_wrq); 5489 return (strcopyout(&i, (void *)arg, sizeof (int), copyflag)); 5490 } 5491 5492 case I_CKBAND: 5493 { 5494 queue_t *q; 5495 qband_t *qbp; 5496 5497 if ((arg < 0) || (arg >= NBAND)) 5498 return (EINVAL); 5499 q = _RD(stp->sd_wrq); 5500 mutex_enter(QLOCK(q)); 5501 if (arg > (int)q->q_nband) { 5502 *rvalp = 0; 5503 } else { 5504 if (arg == 0) { 5505 if (q->q_first) 5506 *rvalp = 1; 5507 else 5508 *rvalp = 0; 5509 } else { 5510 qbp = q->q_bandp; 5511 while (--arg > 0) 5512 qbp = qbp->qb_next; 5513 if (qbp->qb_first) 5514 *rvalp = 1; 5515 else 5516 *rvalp = 0; 5517 } 5518 } 5519 mutex_exit(QLOCK(q)); 5520 return (0); 5521 } 5522 5523 case I_GETBAND: 5524 { 5525 int intpri; 5526 queue_t *q; 5527 5528 q = _RD(stp->sd_wrq); 5529 mutex_enter(QLOCK(q)); 5530 mp = q->q_first; 5531 if (!mp) { 5532 mutex_exit(QLOCK(q)); 5533 return (ENODATA); 5534 } 5535 intpri = (int)mp->b_band; 5536 error = strcopyout(&intpri, (void *)arg, sizeof (int), 5537 copyflag); 5538 mutex_exit(QLOCK(q)); 5539 return (error); 5540 } 5541 5542 case I_ATMARK: 5543 { 5544 queue_t *q; 5545 5546 if (arg & ~(ANYMARK|LASTMARK)) 5547 return (EINVAL); 5548 q = _RD(stp->sd_wrq); 5549 mutex_enter(&stp->sd_lock); 5550 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) { 5551 *rvalp = 1; 5552 } else { 5553 mutex_enter(QLOCK(q)); 5554 mp = q->q_first; 5555 5556 if (mp == NULL) 5557 *rvalp = 0; 5558 else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK)) 5559 *rvalp = 1; 5560 else if ((arg == LASTMARK) && (mp == stp->sd_mark)) 5561 *rvalp = 1; 5562 else 5563 *rvalp = 0; 5564 mutex_exit(QLOCK(q)); 5565 } 5566 mutex_exit(&stp->sd_lock); 5567 return (0); 5568 } 5569 5570 case I_CANPUT: 5571 { 5572 char band; 5573 5574 if ((arg < 0) || (arg >= NBAND)) 5575 return (EINVAL); 5576 band = (char)arg; 5577 *rvalp = bcanputnext(stp->sd_wrq, band); 5578 return (0); 5579 } 5580 5581 case I_SETCLTIME: 5582 { 5583 int closetime; 5584 5585 error = strcopyin((void *)arg, &closetime, sizeof (int), 5586 copyflag); 5587 if (error) 5588 return (error); 5589 if (closetime < 0) 5590 return (EINVAL); 5591 5592 stp->sd_closetime = closetime; 5593 return (0); 5594 } 5595 5596 case I_GETCLTIME: 5597 { 5598 int closetime; 5599 5600 closetime = stp->sd_closetime; 5601 return (strcopyout(&closetime, (void *)arg, sizeof (int), 5602 copyflag)); 5603 } 5604 5605 case TIOCGSID: 5606 { 5607 pid_t sid; 5608 5609 mutex_enter(&stp->sd_lock); 5610 if (stp->sd_sidp == NULL) { 5611 mutex_exit(&stp->sd_lock); 5612 return (ENOTTY); 5613 } 5614 sid = stp->sd_sidp->pid_id; 5615 mutex_exit(&stp->sd_lock); 5616 return (strcopyout(&sid, (void *)arg, sizeof (pid_t), 5617 copyflag)); 5618 } 5619 5620 case TIOCSPGRP: 5621 { 5622 pid_t pgrp; 5623 proc_t *q; 5624 pid_t sid, fg_pgid, bg_pgid; 5625 5626 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t), 5627 copyflag)) 5628 return (error); 5629 mutex_enter(&stp->sd_lock); 5630 mutex_enter(&pidlock); 5631 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) { 5632 mutex_exit(&pidlock); 5633 mutex_exit(&stp->sd_lock); 5634 return (ENOTTY); 5635 } 5636 if (pgrp == stp->sd_pgidp->pid_id) { 5637 mutex_exit(&pidlock); 5638 mutex_exit(&stp->sd_lock); 5639 return (0); 5640 } 5641 if (pgrp <= 0 || pgrp >= maxpid) { 5642 mutex_exit(&pidlock); 5643 mutex_exit(&stp->sd_lock); 5644 return (EINVAL); 5645 } 5646 if ((q = pgfind(pgrp)) == NULL || 5647 q->p_sessp != ttoproc(curthread)->p_sessp) { 5648 mutex_exit(&pidlock); 5649 mutex_exit(&stp->sd_lock); 5650 return (EPERM); 5651 } 5652 sid = stp->sd_sidp->pid_id; 5653 fg_pgid = q->p_pgrp; 5654 bg_pgid = stp->sd_pgidp->pid_id; 5655 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid); 5656 PID_RELE(stp->sd_pgidp); 5657 ctty_clear_sighuped(); 5658 stp->sd_pgidp = q->p_pgidp; 5659 PID_HOLD(stp->sd_pgidp); 5660 mutex_exit(&pidlock); 5661 mutex_exit(&stp->sd_lock); 5662 return (0); 5663 } 5664 5665 case TIOCGPGRP: 5666 { 5667 pid_t pgrp; 5668 5669 mutex_enter(&stp->sd_lock); 5670 if (stp->sd_sidp == NULL) { 5671 mutex_exit(&stp->sd_lock); 5672 return (ENOTTY); 5673 } 5674 pgrp = stp->sd_pgidp->pid_id; 5675 mutex_exit(&stp->sd_lock); 5676 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t), 5677 copyflag)); 5678 } 5679 5680 case TIOCSCTTY: 5681 { 5682 return (strctty(stp)); 5683 } 5684 5685 case TIOCNOTTY: 5686 { 5687 /* freectty() always assumes curproc. */ 5688 if (freectty(B_FALSE) != 0) 5689 return (0); 5690 return (ENOTTY); 5691 } 5692 5693 case FIONBIO: 5694 case FIOASYNC: 5695 return (0); /* handled by the upper layer */ 5696 } 5697 } 5698 5699 /* 5700 * Custom free routine used for M_PASSFP messages. 5701 */ 5702 static void 5703 free_passfp(struct k_strrecvfd *srf) 5704 { 5705 (void) closef(srf->fp); 5706 kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t)); 5707 } 5708 5709 /* ARGSUSED */ 5710 int 5711 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr) 5712 { 5713 queue_t *qp, *nextqp; 5714 struct k_strrecvfd *srf; 5715 mblk_t *mp; 5716 frtn_t *frtnp; 5717 size_t bufsize; 5718 queue_t *mate = NULL; 5719 syncq_t *sq = NULL; 5720 int retval = 0; 5721 5722 if (stp->sd_flag & STRHUP) 5723 return (ENXIO); 5724 5725 claimstr(stp->sd_wrq); 5726 5727 /* Fastpath, we have a pipe, and we are already mated, use it. */ 5728 if (STRMATED(stp)) { 5729 qp = _RD(stp->sd_mate->sd_wrq); 5730 claimstr(qp); 5731 mate = qp; 5732 } else { /* Not already mated. */ 5733 5734 /* 5735 * Walk the stream to the end of this one. 5736 * assumes that the claimstr() will prevent 5737 * plumbing between the stream head and the 5738 * driver from changing 5739 */ 5740 qp = stp->sd_wrq; 5741 5742 /* 5743 * Loop until we reach the end of this stream. 5744 * On completion, qp points to the write queue 5745 * at the end of the stream, or the read queue 5746 * at the stream head if this is a fifo. 5747 */ 5748 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp)) 5749 ; 5750 5751 /* 5752 * Just in case we get a q_next which is NULL, but 5753 * not at the end of the stream. This is actually 5754 * broken, so we set an assert to catch it in 5755 * debug, and set an error and return if not debug. 5756 */ 5757 ASSERT(qp); 5758 if (qp == NULL) { 5759 releasestr(stp->sd_wrq); 5760 return (EINVAL); 5761 } 5762 5763 /* 5764 * Enter the syncq for the driver, so (hopefully) 5765 * the queue values will not change on us. 5766 * XXXX - This will only prevent the race IFF only 5767 * the write side modifies the q_next member, and 5768 * the put procedure is protected by at least 5769 * MT_PERQ. 5770 */ 5771 if ((sq = qp->q_syncq) != NULL) 5772 entersq(sq, SQ_PUT); 5773 5774 /* Now get the q_next value from this qp. */ 5775 nextqp = qp->q_next; 5776 5777 /* 5778 * If nextqp exists and the other stream is different 5779 * from this one claim the stream, set the mate, and 5780 * get the read queue at the stream head of the other 5781 * stream. Assumes that nextqp was at least valid when 5782 * we got it. Hopefully the entersq of the driver 5783 * will prevent it from changing on us. 5784 */ 5785 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) { 5786 ASSERT(qp->q_qinfo->qi_srvp); 5787 ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp); 5788 ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp); 5789 claimstr(nextqp); 5790 5791 /* Make sure we still have a q_next */ 5792 if (nextqp != qp->q_next) { 5793 releasestr(stp->sd_wrq); 5794 releasestr(nextqp); 5795 return (EINVAL); 5796 } 5797 5798 qp = _RD(STREAM(nextqp)->sd_wrq); 5799 mate = qp; 5800 } 5801 /* If we entered the synq above, leave it. */ 5802 if (sq != NULL) 5803 leavesq(sq, SQ_PUT); 5804 } /* STRMATED(STP) */ 5805 5806 /* XXX prevents substitution of the ops vector */ 5807 if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) { 5808 retval = EINVAL; 5809 goto out; 5810 } 5811 5812 if (qp->q_flag & QFULL) { 5813 retval = EAGAIN; 5814 goto out; 5815 } 5816 5817 /* 5818 * Since M_PASSFP messages include a file descriptor, we use 5819 * esballoc() and specify a custom free routine (free_passfp()) that 5820 * will close the descriptor as part of freeing the message. For 5821 * convenience, we stash the frtn_t right after the data block. 5822 */ 5823 bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t); 5824 srf = kmem_alloc(bufsize, KM_NOSLEEP); 5825 if (srf == NULL) { 5826 retval = EAGAIN; 5827 goto out; 5828 } 5829 5830 frtnp = (frtn_t *)(srf + 1); 5831 frtnp->free_arg = (caddr_t)srf; 5832 frtnp->free_func = free_passfp; 5833 5834 mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp); 5835 if (mp == NULL) { 5836 kmem_free(srf, bufsize); 5837 retval = EAGAIN; 5838 goto out; 5839 } 5840 mp->b_wptr += sizeof (struct k_strrecvfd); 5841 mp->b_datap->db_type = M_PASSFP; 5842 5843 srf->fp = fp; 5844 srf->uid = crgetuid(curthread->t_cred); 5845 srf->gid = crgetgid(curthread->t_cred); 5846 mutex_enter(&fp->f_tlock); 5847 fp->f_count++; 5848 mutex_exit(&fp->f_tlock); 5849 5850 put(qp, mp); 5851 out: 5852 releasestr(stp->sd_wrq); 5853 if (mate) 5854 releasestr(mate); 5855 return (retval); 5856 } 5857 5858 /* 5859 * Send an ioctl message downstream and wait for acknowledgement. 5860 * flags may be set to either U_TO_K or K_TO_K and a combination 5861 * of STR_NOERROR or STR_NOSIG 5862 * STR_NOSIG: Signals are essentially ignored or held and have 5863 * no effect for the duration of the call. 5864 * STR_NOERROR: Ignores stream head read, write and hup errors. 5865 * Additionally, if an existing ioctl times out, it is assumed 5866 * lost and and this ioctl will continue as if the previous ioctl had 5867 * finished. ETIME may be returned if this ioctl times out (i.e. 5868 * ic_timout is not INFTIM). Non-stream head errors may be returned if 5869 * the ioc_error indicates that the driver/module had problems, 5870 * an EFAULT was found when accessing user data, a lack of 5871 * resources, etc. 5872 */ 5873 int 5874 strdoioctl( 5875 struct stdata *stp, 5876 struct strioctl *strioc, 5877 int fflags, /* file flags with model info */ 5878 int flag, 5879 cred_t *crp, 5880 int *rvalp) 5881 { 5882 mblk_t *bp; 5883 struct iocblk *iocbp; 5884 struct copyreq *reqp; 5885 struct copyresp *resp; 5886 int id; 5887 int transparent = 0; 5888 int error = 0; 5889 int len = 0; 5890 caddr_t taddr; 5891 int copyflag = (flag & (U_TO_K | K_TO_K)); 5892 int sigflag = (flag & STR_NOSIG); 5893 int errs; 5894 uint_t waitflags; 5895 5896 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 5897 ASSERT((fflags & FMODELS) != 0); 5898 5899 TRACE_2(TR_FAC_STREAMS_FR, 5900 TR_STRDOIOCTL, 5901 "strdoioctl:stp %p strioc %p", stp, strioc); 5902 if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */ 5903 transparent = 1; 5904 strioc->ic_len = sizeof (intptr_t); 5905 } 5906 5907 if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz)) 5908 return (EINVAL); 5909 5910 if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error, 5911 crp, curproc->p_pid)) == NULL) 5912 return (error); 5913 5914 bzero(bp->b_wptr, sizeof (union ioctypes)); 5915 5916 iocbp = (struct iocblk *)bp->b_wptr; 5917 iocbp->ioc_count = strioc->ic_len; 5918 iocbp->ioc_cmd = strioc->ic_cmd; 5919 iocbp->ioc_flag = (fflags & FMODELS); 5920 5921 crhold(crp); 5922 iocbp->ioc_cr = crp; 5923 DB_TYPE(bp) = M_IOCTL; 5924 bp->b_wptr += sizeof (struct iocblk); 5925 5926 if (flag & STR_NOERROR) 5927 errs = STPLEX; 5928 else 5929 errs = STRHUP|STRDERR|STWRERR|STPLEX; 5930 5931 /* 5932 * If there is data to copy into ioctl block, do so. 5933 */ 5934 if (iocbp->ioc_count > 0) { 5935 if (transparent) 5936 /* 5937 * Note: STR_NOERROR does not have an effect 5938 * in putiocd() 5939 */ 5940 id = K_TO_K | sigflag; 5941 else 5942 id = flag; 5943 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) { 5944 freemsg(bp); 5945 crfree(crp); 5946 return (error); 5947 } 5948 5949 /* 5950 * We could have slept copying in user pages. 5951 * Recheck the stream head state (the other end 5952 * of a pipe could have gone away). 5953 */ 5954 if (stp->sd_flag & errs) { 5955 mutex_enter(&stp->sd_lock); 5956 error = strgeterr(stp, errs, 0); 5957 mutex_exit(&stp->sd_lock); 5958 if (error != 0) { 5959 freemsg(bp); 5960 crfree(crp); 5961 return (error); 5962 } 5963 } 5964 } 5965 if (transparent) 5966 iocbp->ioc_count = TRANSPARENT; 5967 5968 /* 5969 * Block for up to STRTIMOUT milliseconds if there is an outstanding 5970 * ioctl for this stream already running. All processes 5971 * sleeping here will be awakened as a result of an ACK 5972 * or NAK being received for the outstanding ioctl, or 5973 * as a result of the timer expiring on the outstanding 5974 * ioctl (a failure), or as a result of any waiting 5975 * process's timer expiring (also a failure). 5976 */ 5977 5978 error = 0; 5979 mutex_enter(&stp->sd_lock); 5980 while (stp->sd_flag & (IOCWAIT | IOCWAITNE)) { 5981 clock_t cv_rval; 5982 5983 TRACE_0(TR_FAC_STREAMS_FR, 5984 TR_STRDOIOCTL_WAIT, 5985 "strdoioctl sleeps - IOCWAIT"); 5986 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock, 5987 STRTIMOUT, sigflag); 5988 if (cv_rval <= 0) { 5989 if (cv_rval == 0) { 5990 error = EINTR; 5991 } else { 5992 if (flag & STR_NOERROR) { 5993 /* 5994 * Terminating current ioctl in 5995 * progress -- assume it got lost and 5996 * wake up the other thread so that the 5997 * operation completes. 5998 */ 5999 if (!(stp->sd_flag & IOCWAITNE)) { 6000 stp->sd_flag |= IOCWAITNE; 6001 cv_broadcast(&stp->sd_monitor); 6002 } 6003 /* 6004 * Otherwise, there's a running 6005 * STR_NOERROR -- we have no choice 6006 * here but to wait forever (or until 6007 * interrupted). 6008 */ 6009 } else { 6010 /* 6011 * pending ioctl has caused 6012 * us to time out 6013 */ 6014 error = ETIME; 6015 } 6016 } 6017 } else if ((stp->sd_flag & errs)) { 6018 error = strgeterr(stp, errs, 0); 6019 } 6020 if (error) { 6021 mutex_exit(&stp->sd_lock); 6022 freemsg(bp); 6023 crfree(crp); 6024 return (error); 6025 } 6026 } 6027 6028 /* 6029 * Have control of ioctl mechanism. 6030 * Send down ioctl packet and wait for response. 6031 */ 6032 if (stp->sd_iocblk != (mblk_t *)-1) { 6033 freemsg(stp->sd_iocblk); 6034 } 6035 stp->sd_iocblk = NULL; 6036 6037 /* 6038 * If this is marked with 'noerror' (internal; mostly 6039 * I_{P,}{UN,}LINK), then make sure nobody else is able to get 6040 * in here by setting IOCWAITNE. 6041 */ 6042 waitflags = IOCWAIT; 6043 if (flag & STR_NOERROR) 6044 waitflags |= IOCWAITNE; 6045 6046 stp->sd_flag |= waitflags; 6047 6048 /* 6049 * Assign sequence number. 6050 */ 6051 iocbp->ioc_id = stp->sd_iocid = getiocseqno(); 6052 6053 mutex_exit(&stp->sd_lock); 6054 6055 TRACE_1(TR_FAC_STREAMS_FR, 6056 TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp); 6057 stream_willservice(stp); 6058 putnext(stp->sd_wrq, bp); 6059 stream_runservice(stp); 6060 6061 /* 6062 * Timed wait for acknowledgment. The wait time is limited by the 6063 * timeout value, which must be a positive integer (number of 6064 * milliseconds) to wait, or 0 (use default value of STRTIMOUT 6065 * milliseconds), or -1 (wait forever). This will be awakened 6066 * either by an ACK/NAK message arriving, the timer expiring, or 6067 * the timer expiring on another ioctl waiting for control of the 6068 * mechanism. 6069 */ 6070 waitioc: 6071 mutex_enter(&stp->sd_lock); 6072 6073 6074 /* 6075 * If the reply has already arrived, don't sleep. If awakened from 6076 * the sleep, fail only if the reply has not arrived by then. 6077 * Otherwise, process the reply. 6078 */ 6079 while (!stp->sd_iocblk) { 6080 clock_t cv_rval; 6081 6082 if (stp->sd_flag & errs) { 6083 error = strgeterr(stp, errs, 0); 6084 if (error != 0) { 6085 stp->sd_flag &= ~waitflags; 6086 cv_broadcast(&stp->sd_iocmonitor); 6087 mutex_exit(&stp->sd_lock); 6088 crfree(crp); 6089 return (error); 6090 } 6091 } 6092 6093 TRACE_0(TR_FAC_STREAMS_FR, 6094 TR_STRDOIOCTL_WAIT2, 6095 "strdoioctl sleeps awaiting reply"); 6096 ASSERT(error == 0); 6097 6098 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, 6099 (strioc->ic_timout ? 6100 strioc->ic_timout * 1000 : STRTIMOUT), sigflag); 6101 6102 /* 6103 * There are four possible cases here: interrupt, timeout, 6104 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a 6105 * valid M_IOCTL reply). 6106 * 6107 * If we've been awakened by a STR_NOERROR ioctl on some other 6108 * thread, then sd_iocblk will still be NULL, and IOCWAITNE 6109 * will be set. Pretend as if we just timed out. Note that 6110 * this other thread waited at least STRTIMOUT before trying to 6111 * awaken our thread, so this is indistinguishable (even for 6112 * INFTIM) from the case where we failed with ETIME waiting on 6113 * IOCWAIT in the prior loop. 6114 */ 6115 if (cv_rval > 0 && !(flag & STR_NOERROR) && 6116 stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) { 6117 cv_rval = -1; 6118 } 6119 6120 /* 6121 * note: STR_NOERROR does not protect 6122 * us here.. use ic_timout < 0 6123 */ 6124 if (cv_rval <= 0) { 6125 if (cv_rval == 0) { 6126 error = EINTR; 6127 } else { 6128 error = ETIME; 6129 } 6130 /* 6131 * A message could have come in after we were scheduled 6132 * but before we were actually run. 6133 */ 6134 bp = stp->sd_iocblk; 6135 stp->sd_iocblk = NULL; 6136 if (bp != NULL) { 6137 if ((bp->b_datap->db_type == M_COPYIN) || 6138 (bp->b_datap->db_type == M_COPYOUT)) { 6139 mutex_exit(&stp->sd_lock); 6140 if (bp->b_cont) { 6141 freemsg(bp->b_cont); 6142 bp->b_cont = NULL; 6143 } 6144 bp->b_datap->db_type = M_IOCDATA; 6145 bp->b_wptr = bp->b_rptr + 6146 sizeof (struct copyresp); 6147 resp = (struct copyresp *)bp->b_rptr; 6148 resp->cp_rval = 6149 (caddr_t)1; /* failure */ 6150 stream_willservice(stp); 6151 putnext(stp->sd_wrq, bp); 6152 stream_runservice(stp); 6153 mutex_enter(&stp->sd_lock); 6154 } else { 6155 freemsg(bp); 6156 } 6157 } 6158 stp->sd_flag &= ~waitflags; 6159 cv_broadcast(&stp->sd_iocmonitor); 6160 mutex_exit(&stp->sd_lock); 6161 crfree(crp); 6162 return (error); 6163 } 6164 } 6165 bp = stp->sd_iocblk; 6166 /* 6167 * Note: it is strictly impossible to get here with sd_iocblk set to 6168 * -1. This is because the initial loop above doesn't allow any new 6169 * ioctls into the fray until all others have passed this point. 6170 */ 6171 ASSERT(bp != NULL && bp != (mblk_t *)-1); 6172 TRACE_1(TR_FAC_STREAMS_FR, 6173 TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp); 6174 if ((bp->b_datap->db_type == M_IOCACK) || 6175 (bp->b_datap->db_type == M_IOCNAK)) { 6176 /* for detection of duplicate ioctl replies */ 6177 stp->sd_iocblk = (mblk_t *)-1; 6178 stp->sd_flag &= ~waitflags; 6179 cv_broadcast(&stp->sd_iocmonitor); 6180 mutex_exit(&stp->sd_lock); 6181 } else { 6182 /* 6183 * flags not cleared here because we're still doing 6184 * copy in/out for ioctl. 6185 */ 6186 stp->sd_iocblk = NULL; 6187 mutex_exit(&stp->sd_lock); 6188 } 6189 6190 6191 /* 6192 * Have received acknowledgment. 6193 */ 6194 6195 switch (bp->b_datap->db_type) { 6196 case M_IOCACK: 6197 /* 6198 * Positive ack. 6199 */ 6200 iocbp = (struct iocblk *)bp->b_rptr; 6201 6202 /* 6203 * Set error if indicated. 6204 */ 6205 if (iocbp->ioc_error) { 6206 error = iocbp->ioc_error; 6207 break; 6208 } 6209 6210 /* 6211 * Set return value. 6212 */ 6213 *rvalp = iocbp->ioc_rval; 6214 6215 /* 6216 * Data may have been returned in ACK message (ioc_count > 0). 6217 * If so, copy it out to the user's buffer. 6218 */ 6219 if (iocbp->ioc_count && !transparent) { 6220 if (error = getiocd(bp, strioc->ic_dp, copyflag)) 6221 break; 6222 } 6223 if (!transparent) { 6224 if (len) /* an M_COPYOUT was used with I_STR */ 6225 strioc->ic_len = len; 6226 else 6227 strioc->ic_len = (int)iocbp->ioc_count; 6228 } 6229 break; 6230 6231 case M_IOCNAK: 6232 /* 6233 * Negative ack. 6234 * 6235 * The only thing to do is set error as specified 6236 * in neg ack packet. 6237 */ 6238 iocbp = (struct iocblk *)bp->b_rptr; 6239 6240 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL); 6241 break; 6242 6243 case M_COPYIN: 6244 /* 6245 * Driver or module has requested user ioctl data. 6246 */ 6247 reqp = (struct copyreq *)bp->b_rptr; 6248 6249 /* 6250 * M_COPYIN should *never* have a message attached, though 6251 * it's harmless if it does -- thus, panic on a DEBUG 6252 * kernel and just free it on a non-DEBUG build. 6253 */ 6254 ASSERT(bp->b_cont == NULL); 6255 if (bp->b_cont != NULL) { 6256 freemsg(bp->b_cont); 6257 bp->b_cont = NULL; 6258 } 6259 6260 error = putiocd(bp, reqp->cq_addr, flag, crp); 6261 if (error && bp->b_cont) { 6262 freemsg(bp->b_cont); 6263 bp->b_cont = NULL; 6264 } 6265 6266 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6267 bp->b_datap->db_type = M_IOCDATA; 6268 6269 mblk_setcred(bp, crp, curproc->p_pid); 6270 resp = (struct copyresp *)bp->b_rptr; 6271 resp->cp_rval = (caddr_t)(uintptr_t)error; 6272 resp->cp_flag = (fflags & FMODELS); 6273 6274 stream_willservice(stp); 6275 putnext(stp->sd_wrq, bp); 6276 stream_runservice(stp); 6277 6278 if (error) { 6279 mutex_enter(&stp->sd_lock); 6280 stp->sd_flag &= ~waitflags; 6281 cv_broadcast(&stp->sd_iocmonitor); 6282 mutex_exit(&stp->sd_lock); 6283 crfree(crp); 6284 return (error); 6285 } 6286 6287 goto waitioc; 6288 6289 case M_COPYOUT: 6290 /* 6291 * Driver or module has ioctl data for a user. 6292 */ 6293 reqp = (struct copyreq *)bp->b_rptr; 6294 ASSERT(bp->b_cont != NULL); 6295 6296 /* 6297 * Always (transparent or non-transparent ) 6298 * use the address specified in the request 6299 */ 6300 taddr = reqp->cq_addr; 6301 if (!transparent) 6302 len = (int)reqp->cq_size; 6303 6304 /* copyout data to the provided address */ 6305 error = getiocd(bp, taddr, copyflag); 6306 6307 freemsg(bp->b_cont); 6308 bp->b_cont = NULL; 6309 6310 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6311 bp->b_datap->db_type = M_IOCDATA; 6312 6313 mblk_setcred(bp, crp, curproc->p_pid); 6314 resp = (struct copyresp *)bp->b_rptr; 6315 resp->cp_rval = (caddr_t)(uintptr_t)error; 6316 resp->cp_flag = (fflags & FMODELS); 6317 6318 stream_willservice(stp); 6319 putnext(stp->sd_wrq, bp); 6320 stream_runservice(stp); 6321 6322 if (error) { 6323 mutex_enter(&stp->sd_lock); 6324 stp->sd_flag &= ~waitflags; 6325 cv_broadcast(&stp->sd_iocmonitor); 6326 mutex_exit(&stp->sd_lock); 6327 crfree(crp); 6328 return (error); 6329 } 6330 goto waitioc; 6331 6332 default: 6333 ASSERT(0); 6334 mutex_enter(&stp->sd_lock); 6335 stp->sd_flag &= ~waitflags; 6336 cv_broadcast(&stp->sd_iocmonitor); 6337 mutex_exit(&stp->sd_lock); 6338 break; 6339 } 6340 6341 freemsg(bp); 6342 crfree(crp); 6343 return (error); 6344 } 6345 6346 /* 6347 * Send an M_CMD message downstream and wait for a reply. This is a ptools 6348 * special used to retrieve information from modules/drivers a stream without 6349 * being subjected to flow control or interfering with pending messages on the 6350 * stream (e.g. an ioctl in flight). 6351 */ 6352 int 6353 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp) 6354 { 6355 mblk_t *mp; 6356 struct cmdblk *cmdp; 6357 int error = 0; 6358 int errs = STRHUP|STRDERR|STWRERR|STPLEX; 6359 clock_t rval, timeout = STRTIMOUT; 6360 6361 if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) || 6362 scp->sc_timeout < -1) 6363 return (EINVAL); 6364 6365 if (scp->sc_timeout > 0) 6366 timeout = scp->sc_timeout * MILLISEC; 6367 6368 if ((mp = allocb_cred(sizeof (struct cmdblk), crp, 6369 curproc->p_pid)) == NULL) 6370 return (ENOMEM); 6371 6372 crhold(crp); 6373 6374 cmdp = (struct cmdblk *)mp->b_wptr; 6375 cmdp->cb_cr = crp; 6376 cmdp->cb_cmd = scp->sc_cmd; 6377 cmdp->cb_len = scp->sc_len; 6378 cmdp->cb_error = 0; 6379 mp->b_wptr += sizeof (struct cmdblk); 6380 6381 DB_TYPE(mp) = M_CMD; 6382 DB_CPID(mp) = curproc->p_pid; 6383 6384 /* 6385 * Copy in the payload. 6386 */ 6387 if (cmdp->cb_len > 0) { 6388 mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp, 6389 curproc->p_pid); 6390 if (mp->b_cont == NULL) { 6391 error = ENOMEM; 6392 goto out; 6393 } 6394 6395 /* cb_len comes from sc_len, which has already been checked */ 6396 ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf)); 6397 (void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len); 6398 mp->b_cont->b_wptr += cmdp->cb_len; 6399 DB_CPID(mp->b_cont) = curproc->p_pid; 6400 } 6401 6402 /* 6403 * Since this mechanism is strictly for ptools, and since only one 6404 * process can be grabbed at a time, we simply fail if there's 6405 * currently an operation pending. 6406 */ 6407 mutex_enter(&stp->sd_lock); 6408 if (stp->sd_flag & STRCMDWAIT) { 6409 mutex_exit(&stp->sd_lock); 6410 error = EBUSY; 6411 goto out; 6412 } 6413 stp->sd_flag |= STRCMDWAIT; 6414 ASSERT(stp->sd_cmdblk == NULL); 6415 mutex_exit(&stp->sd_lock); 6416 6417 putnext(stp->sd_wrq, mp); 6418 mp = NULL; 6419 6420 /* 6421 * Timed wait for acknowledgment. If the reply has already arrived, 6422 * don't sleep. If awakened from the sleep, fail only if the reply 6423 * has not arrived by then. Otherwise, process the reply. 6424 */ 6425 mutex_enter(&stp->sd_lock); 6426 while (stp->sd_cmdblk == NULL) { 6427 if (stp->sd_flag & errs) { 6428 if ((error = strgeterr(stp, errs, 0)) != 0) 6429 goto waitout; 6430 } 6431 6432 rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0); 6433 if (stp->sd_cmdblk != NULL) 6434 break; 6435 6436 if (rval <= 0) { 6437 error = (rval == 0) ? EINTR : ETIME; 6438 goto waitout; 6439 } 6440 } 6441 6442 /* 6443 * We received a reply. 6444 */ 6445 mp = stp->sd_cmdblk; 6446 stp->sd_cmdblk = NULL; 6447 ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD); 6448 ASSERT(stp->sd_flag & STRCMDWAIT); 6449 stp->sd_flag &= ~STRCMDWAIT; 6450 mutex_exit(&stp->sd_lock); 6451 6452 cmdp = (struct cmdblk *)mp->b_rptr; 6453 if ((error = cmdp->cb_error) != 0) 6454 goto out; 6455 6456 /* 6457 * Data may have been returned in the reply (cb_len > 0). 6458 * If so, copy it out to the user's buffer. 6459 */ 6460 if (cmdp->cb_len > 0) { 6461 if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) { 6462 error = EPROTO; 6463 goto out; 6464 } 6465 6466 cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf)); 6467 (void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len); 6468 } 6469 scp->sc_len = cmdp->cb_len; 6470 out: 6471 freemsg(mp); 6472 crfree(crp); 6473 return (error); 6474 waitout: 6475 ASSERT(stp->sd_cmdblk == NULL); 6476 stp->sd_flag &= ~STRCMDWAIT; 6477 mutex_exit(&stp->sd_lock); 6478 crfree(crp); 6479 return (error); 6480 } 6481 6482 /* 6483 * For the SunOS keyboard driver. 6484 * Return the next available "ioctl" sequence number. 6485 * Exported, so that streams modules can send "ioctl" messages 6486 * downstream from their open routine. 6487 */ 6488 int 6489 getiocseqno(void) 6490 { 6491 int i; 6492 6493 mutex_enter(&strresources); 6494 i = ++ioc_id; 6495 mutex_exit(&strresources); 6496 return (i); 6497 } 6498 6499 /* 6500 * Get the next message from the read queue. If the message is 6501 * priority, STRPRI will have been set by strrput(). This flag 6502 * should be reset only when the entire message at the front of the 6503 * queue as been consumed. 6504 * 6505 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 6506 */ 6507 int 6508 strgetmsg( 6509 struct vnode *vp, 6510 struct strbuf *mctl, 6511 struct strbuf *mdata, 6512 unsigned char *prip, 6513 int *flagsp, 6514 int fmode, 6515 rval_t *rvp) 6516 { 6517 struct stdata *stp; 6518 mblk_t *bp, *nbp; 6519 mblk_t *savemp = NULL; 6520 mblk_t *savemptail = NULL; 6521 uint_t old_sd_flag; 6522 int flg; 6523 int more = 0; 6524 int error = 0; 6525 char first = 1; 6526 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 6527 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 6528 unsigned char pri = 0; 6529 queue_t *q; 6530 int pr = 0; /* Partial read successful */ 6531 struct uio uios; 6532 struct uio *uiop = &uios; 6533 struct iovec iovs; 6534 unsigned char type; 6535 6536 TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER, 6537 "strgetmsg:%p", vp); 6538 6539 ASSERT(vp->v_stream); 6540 stp = vp->v_stream; 6541 rvp->r_val1 = 0; 6542 6543 mutex_enter(&stp->sd_lock); 6544 6545 if ((error = i_straccess(stp, JCREAD)) != 0) { 6546 mutex_exit(&stp->sd_lock); 6547 return (error); 6548 } 6549 6550 if (stp->sd_flag & (STRDERR|STPLEX)) { 6551 error = strgeterr(stp, STRDERR|STPLEX, 0); 6552 if (error != 0) { 6553 mutex_exit(&stp->sd_lock); 6554 return (error); 6555 } 6556 } 6557 mutex_exit(&stp->sd_lock); 6558 6559 switch (*flagsp) { 6560 case MSG_HIPRI: 6561 if (*prip != 0) 6562 return (EINVAL); 6563 break; 6564 6565 case MSG_ANY: 6566 case MSG_BAND: 6567 break; 6568 6569 default: 6570 return (EINVAL); 6571 } 6572 /* 6573 * Setup uio and iov for data part 6574 */ 6575 iovs.iov_base = mdata->buf; 6576 iovs.iov_len = mdata->maxlen; 6577 uios.uio_iov = &iovs; 6578 uios.uio_iovcnt = 1; 6579 uios.uio_loffset = 0; 6580 uios.uio_segflg = UIO_USERSPACE; 6581 uios.uio_fmode = 0; 6582 uios.uio_extflg = UIO_COPY_CACHED; 6583 uios.uio_resid = mdata->maxlen; 6584 uios.uio_offset = 0; 6585 6586 q = _RD(stp->sd_wrq); 6587 mutex_enter(&stp->sd_lock); 6588 old_sd_flag = stp->sd_flag; 6589 mark = 0; 6590 for (;;) { 6591 int done = 0; 6592 mblk_t *q_first = q->q_first; 6593 6594 /* 6595 * Get the next message of appropriate priority 6596 * from the stream head. If the caller is interested 6597 * in band or hipri messages, then they should already 6598 * be enqueued at the stream head. On the other hand 6599 * if the caller wants normal (band 0) messages, they 6600 * might be deferred in a synchronous stream and they 6601 * will need to be pulled up. 6602 * 6603 * After we have dequeued a message, we might find that 6604 * it was a deferred M_SIG that was enqueued at the 6605 * stream head. It must now be posted as part of the 6606 * read by calling strsignal_nolock(). 6607 * 6608 * Also note that strrput does not enqueue an M_PCSIG, 6609 * and there cannot be more than one hipri message, 6610 * so there was no need to have the M_PCSIG case. 6611 * 6612 * At some time it might be nice to try and wrap the 6613 * functionality of kstrgetmsg() and strgetmsg() into 6614 * a common routine so to reduce the amount of replicated 6615 * code (since they are extremely similar). 6616 */ 6617 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) { 6618 /* Asking for normal, band0 data */ 6619 bp = strget(stp, q, uiop, first, &error); 6620 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6621 if (bp != NULL) { 6622 if (DB_TYPE(bp) == M_SIG) { 6623 strsignal_nolock(stp, *bp->b_rptr, 6624 bp->b_band); 6625 freemsg(bp); 6626 continue; 6627 } else { 6628 break; 6629 } 6630 } 6631 if (error != 0) 6632 goto getmout; 6633 6634 /* 6635 * We can't depend on the value of STRPRI here because 6636 * the stream head may be in transit. Therefore, we 6637 * must look at the type of the first message to 6638 * determine if a high priority messages is waiting 6639 */ 6640 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL && 6641 DB_TYPE(q_first) >= QPCTL && 6642 (bp = getq_noenab(q, 0)) != NULL) { 6643 /* Asked for HIPRI and got one */ 6644 ASSERT(DB_TYPE(bp) >= QPCTL); 6645 break; 6646 } else if ((*flagsp & MSG_BAND) && q_first != NULL && 6647 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) && 6648 (bp = getq_noenab(q, 0)) != NULL) { 6649 /* 6650 * Asked for at least band "prip" and got either at 6651 * least that band or a hipri message. 6652 */ 6653 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL); 6654 if (DB_TYPE(bp) == M_SIG) { 6655 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 6656 freemsg(bp); 6657 continue; 6658 } else { 6659 break; 6660 } 6661 } 6662 6663 /* No data. Time to sleep? */ 6664 qbackenable(q, 0); 6665 6666 /* 6667 * If STRHUP or STREOF, return 0 length control and data. 6668 * If resid is 0, then a read(fd,buf,0) was done. Do not 6669 * sleep to satisfy this request because by default we have 6670 * zero bytes to return. 6671 */ 6672 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 && 6673 mdata->maxlen == 0)) { 6674 mctl->len = mdata->len = 0; 6675 *flagsp = 0; 6676 mutex_exit(&stp->sd_lock); 6677 return (0); 6678 } 6679 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT, 6680 "strgetmsg calls strwaitq:%p, %p", 6681 vp, uiop); 6682 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1, 6683 &done)) != 0) || done) { 6684 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE, 6685 "strgetmsg error or done:%p, %p", 6686 vp, uiop); 6687 mutex_exit(&stp->sd_lock); 6688 return (error); 6689 } 6690 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE, 6691 "strgetmsg awakes:%p, %p", vp, uiop); 6692 if ((error = i_straccess(stp, JCREAD)) != 0) { 6693 mutex_exit(&stp->sd_lock); 6694 return (error); 6695 } 6696 first = 0; 6697 } 6698 ASSERT(bp != NULL); 6699 /* 6700 * Extract any mark information. If the message is not completely 6701 * consumed this information will be put in the mblk 6702 * that is putback. 6703 * If MSGMARKNEXT is set and the message is completely consumed 6704 * the STRATMARK flag will be set below. Likewise, if 6705 * MSGNOTMARKNEXT is set and the message is 6706 * completely consumed STRNOTATMARK will be set. 6707 */ 6708 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 6709 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 6710 (MSGMARKNEXT|MSGNOTMARKNEXT)); 6711 if (mark != 0 && bp == stp->sd_mark) { 6712 mark |= _LASTMARK; 6713 stp->sd_mark = NULL; 6714 } 6715 /* 6716 * keep track of the original message type and priority 6717 */ 6718 pri = bp->b_band; 6719 type = bp->b_datap->db_type; 6720 if (type == M_PASSFP) { 6721 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 6722 stp->sd_mark = bp; 6723 bp->b_flag |= mark & ~_LASTMARK; 6724 putback(stp, q, bp, pri); 6725 qbackenable(q, pri); 6726 mutex_exit(&stp->sd_lock); 6727 return (EBADMSG); 6728 } 6729 ASSERT(type != M_SIG); 6730 6731 /* 6732 * Set this flag so strrput will not generate signals. Need to 6733 * make sure this flag is cleared before leaving this routine 6734 * else signals will stop being sent. 6735 */ 6736 stp->sd_flag |= STRGETINPROG; 6737 mutex_exit(&stp->sd_lock); 6738 6739 if (STREAM_NEEDSERVICE(stp)) 6740 stream_runservice(stp); 6741 6742 /* 6743 * Set HIPRI flag if message is priority. 6744 */ 6745 if (type >= QPCTL) 6746 flg = MSG_HIPRI; 6747 else 6748 flg = MSG_BAND; 6749 6750 /* 6751 * First process PROTO or PCPROTO blocks, if any. 6752 */ 6753 if (mctl->maxlen >= 0 && type != M_DATA) { 6754 size_t n, bcnt; 6755 char *ubuf; 6756 6757 bcnt = mctl->maxlen; 6758 ubuf = mctl->buf; 6759 while (bp != NULL && bp->b_datap->db_type != M_DATA) { 6760 if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 && 6761 copyout(bp->b_rptr, ubuf, n)) { 6762 error = EFAULT; 6763 mutex_enter(&stp->sd_lock); 6764 /* 6765 * clear stream head pri flag based on 6766 * first message type 6767 */ 6768 if (type >= QPCTL) { 6769 ASSERT(type == M_PCPROTO); 6770 stp->sd_flag &= ~STRPRI; 6771 } 6772 more = 0; 6773 freemsg(bp); 6774 goto getmout; 6775 } 6776 ubuf += n; 6777 bp->b_rptr += n; 6778 if (bp->b_rptr >= bp->b_wptr) { 6779 nbp = bp; 6780 bp = bp->b_cont; 6781 freeb(nbp); 6782 } 6783 ASSERT(n <= bcnt); 6784 bcnt -= n; 6785 if (bcnt == 0) 6786 break; 6787 } 6788 mctl->len = mctl->maxlen - bcnt; 6789 } else 6790 mctl->len = -1; 6791 6792 if (bp && bp->b_datap->db_type != M_DATA) { 6793 /* 6794 * More PROTO blocks in msg. 6795 */ 6796 more |= MORECTL; 6797 savemp = bp; 6798 while (bp && bp->b_datap->db_type != M_DATA) { 6799 savemptail = bp; 6800 bp = bp->b_cont; 6801 } 6802 savemptail->b_cont = NULL; 6803 } 6804 6805 /* 6806 * Now process DATA blocks, if any. 6807 */ 6808 if (mdata->maxlen >= 0 && bp) { 6809 /* 6810 * struiocopyout will consume a potential zero-length 6811 * M_DATA even if uio_resid is zero. 6812 */ 6813 size_t oldresid = uiop->uio_resid; 6814 6815 bp = struiocopyout(bp, uiop, &error); 6816 if (error != 0) { 6817 mutex_enter(&stp->sd_lock); 6818 /* 6819 * clear stream head hi pri flag based on 6820 * first message 6821 */ 6822 if (type >= QPCTL) { 6823 ASSERT(type == M_PCPROTO); 6824 stp->sd_flag &= ~STRPRI; 6825 } 6826 more = 0; 6827 freemsg(savemp); 6828 goto getmout; 6829 } 6830 /* 6831 * (pr == 1) indicates a partial read. 6832 */ 6833 if (oldresid > uiop->uio_resid) 6834 pr = 1; 6835 mdata->len = mdata->maxlen - uiop->uio_resid; 6836 } else 6837 mdata->len = -1; 6838 6839 if (bp) { /* more data blocks in msg */ 6840 more |= MOREDATA; 6841 if (savemp) 6842 savemptail->b_cont = bp; 6843 else 6844 savemp = bp; 6845 } 6846 6847 mutex_enter(&stp->sd_lock); 6848 if (savemp) { 6849 if (pr && (savemp->b_datap->db_type == M_DATA) && 6850 msgnodata(savemp)) { 6851 /* 6852 * Avoid queuing a zero-length tail part of 6853 * a message. pr=1 indicates that we read some of 6854 * the message. 6855 */ 6856 freemsg(savemp); 6857 more &= ~MOREDATA; 6858 /* 6859 * clear stream head hi pri flag based on 6860 * first message 6861 */ 6862 if (type >= QPCTL) { 6863 ASSERT(type == M_PCPROTO); 6864 stp->sd_flag &= ~STRPRI; 6865 } 6866 } else { 6867 savemp->b_band = pri; 6868 /* 6869 * If the first message was HIPRI and the one we're 6870 * putting back isn't, then clear STRPRI, otherwise 6871 * set STRPRI again. Note that we must set STRPRI 6872 * again since the flush logic in strrput_nondata() 6873 * may have cleared it while we had sd_lock dropped. 6874 */ 6875 if (type >= QPCTL) { 6876 ASSERT(type == M_PCPROTO); 6877 if (queclass(savemp) < QPCTL) 6878 stp->sd_flag &= ~STRPRI; 6879 else 6880 stp->sd_flag |= STRPRI; 6881 } else if (queclass(savemp) >= QPCTL) { 6882 /* 6883 * The first message was not a HIPRI message, 6884 * but the one we are about to putback is. 6885 * For simplicitly, we do not allow for HIPRI 6886 * messages to be embedded in the message 6887 * body, so just force it to same type as 6888 * first message. 6889 */ 6890 ASSERT(type == M_DATA || type == M_PROTO); 6891 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 6892 savemp->b_datap->db_type = type; 6893 } 6894 if (mark != 0) { 6895 savemp->b_flag |= mark & ~_LASTMARK; 6896 if ((mark & _LASTMARK) && 6897 (stp->sd_mark == NULL)) { 6898 /* 6899 * If another marked message arrived 6900 * while sd_lock was not held sd_mark 6901 * would be non-NULL. 6902 */ 6903 stp->sd_mark = savemp; 6904 } 6905 } 6906 putback(stp, q, savemp, pri); 6907 } 6908 } else { 6909 /* 6910 * The complete message was consumed. 6911 * 6912 * If another M_PCPROTO arrived while sd_lock was not held 6913 * it would have been discarded since STRPRI was still set. 6914 * 6915 * Move the MSG*MARKNEXT information 6916 * to the stream head just in case 6917 * the read queue becomes empty. 6918 * clear stream head hi pri flag based on 6919 * first message 6920 * 6921 * If the stream head was at the mark 6922 * (STRATMARK) before we dropped sd_lock above 6923 * and some data was consumed then we have 6924 * moved past the mark thus STRATMARK is 6925 * cleared. However, if a message arrived in 6926 * strrput during the copyout above causing 6927 * STRATMARK to be set we can not clear that 6928 * flag. 6929 */ 6930 if (type >= QPCTL) { 6931 ASSERT(type == M_PCPROTO); 6932 stp->sd_flag &= ~STRPRI; 6933 } 6934 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 6935 if (mark & MSGMARKNEXT) { 6936 stp->sd_flag &= ~STRNOTATMARK; 6937 stp->sd_flag |= STRATMARK; 6938 } else if (mark & MSGNOTMARKNEXT) { 6939 stp->sd_flag &= ~STRATMARK; 6940 stp->sd_flag |= STRNOTATMARK; 6941 } else { 6942 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 6943 } 6944 } else if (pr && (old_sd_flag & STRATMARK)) { 6945 stp->sd_flag &= ~STRATMARK; 6946 } 6947 } 6948 6949 *flagsp = flg; 6950 *prip = pri; 6951 6952 /* 6953 * Getmsg cleanup processing - if the state of the queue has changed 6954 * some signals may need to be sent and/or poll awakened. 6955 */ 6956 getmout: 6957 qbackenable(q, pri); 6958 6959 /* 6960 * We dropped the stream head lock above. Send all M_SIG messages 6961 * before processing stream head for SIGPOLL messages. 6962 */ 6963 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6964 while ((bp = q->q_first) != NULL && 6965 (bp->b_datap->db_type == M_SIG)) { 6966 /* 6967 * sd_lock is held so the content of the read queue can not 6968 * change. 6969 */ 6970 bp = getq(q); 6971 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 6972 6973 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 6974 mutex_exit(&stp->sd_lock); 6975 freemsg(bp); 6976 if (STREAM_NEEDSERVICE(stp)) 6977 stream_runservice(stp); 6978 mutex_enter(&stp->sd_lock); 6979 } 6980 6981 /* 6982 * stream head cannot change while we make the determination 6983 * whether or not to send a signal. Drop the flag to allow strrput 6984 * to send firstmsgsigs again. 6985 */ 6986 stp->sd_flag &= ~STRGETINPROG; 6987 6988 /* 6989 * If the type of message at the front of the queue changed 6990 * due to the receive the appropriate signals and pollwakeup events 6991 * are generated. The type of changes are: 6992 * Processed a hipri message, q_first is not hipri. 6993 * Processed a band X message, and q_first is band Y. 6994 * The generated signals and pollwakeups are identical to what 6995 * strrput() generates should the message that is now on q_first 6996 * arrive to an empty read queue. 6997 * 6998 * Note: only strrput will send a signal for a hipri message. 6999 */ 7000 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7001 strsigset_t signals = 0; 7002 strpollset_t pollwakeups = 0; 7003 7004 if (flg & MSG_HIPRI) { 7005 /* 7006 * Removed a hipri message. Regular data at 7007 * the front of the queue. 7008 */ 7009 if (bp->b_band == 0) { 7010 signals = S_INPUT | S_RDNORM; 7011 pollwakeups = POLLIN | POLLRDNORM; 7012 } else { 7013 signals = S_INPUT | S_RDBAND; 7014 pollwakeups = POLLIN | POLLRDBAND; 7015 } 7016 } else if (pri != bp->b_band) { 7017 /* 7018 * The band is different for the new q_first. 7019 */ 7020 if (bp->b_band == 0) { 7021 signals = S_RDNORM; 7022 pollwakeups = POLLIN | POLLRDNORM; 7023 } else { 7024 signals = S_RDBAND; 7025 pollwakeups = POLLIN | POLLRDBAND; 7026 } 7027 } 7028 7029 if (pollwakeups != 0) { 7030 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7031 if (!(stp->sd_rput_opt & SR_POLLIN)) 7032 goto no_pollwake; 7033 stp->sd_rput_opt &= ~SR_POLLIN; 7034 } 7035 mutex_exit(&stp->sd_lock); 7036 pollwakeup(&stp->sd_pollist, pollwakeups); 7037 mutex_enter(&stp->sd_lock); 7038 } 7039 no_pollwake: 7040 7041 if (stp->sd_sigflags & signals) 7042 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7043 } 7044 mutex_exit(&stp->sd_lock); 7045 7046 rvp->r_val1 = more; 7047 return (error); 7048 #undef _LASTMARK 7049 } 7050 7051 /* 7052 * Get the next message from the read queue. If the message is 7053 * priority, STRPRI will have been set by strrput(). This flag 7054 * should be reset only when the entire message at the front of the 7055 * queue as been consumed. 7056 * 7057 * If uiop is NULL all data is returned in mctlp. 7058 * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed 7059 * not enabled. 7060 * The timeout parameter is in milliseconds; -1 for infinity. 7061 * This routine handles the consolidation private flags: 7062 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7063 * MSG_DELAYERROR Defer the error check until the queue is empty. 7064 * MSG_HOLDSIG Hold signals while waiting for data. 7065 * MSG_IPEEK Only peek at messages. 7066 * MSG_DISCARDTAIL Discard the tail M_DATA part of the message 7067 * that doesn't fit. 7068 * MSG_NOMARK If the message is marked leave it on the queue. 7069 * 7070 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 7071 */ 7072 int 7073 kstrgetmsg( 7074 struct vnode *vp, 7075 mblk_t **mctlp, 7076 struct uio *uiop, 7077 unsigned char *prip, 7078 int *flagsp, 7079 clock_t timout, 7080 rval_t *rvp) 7081 { 7082 struct stdata *stp; 7083 mblk_t *bp, *nbp; 7084 mblk_t *savemp = NULL; 7085 mblk_t *savemptail = NULL; 7086 int flags; 7087 uint_t old_sd_flag; 7088 int flg; 7089 int more = 0; 7090 int error = 0; 7091 char first = 1; 7092 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 7093 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 7094 unsigned char pri = 0; 7095 queue_t *q; 7096 int pr = 0; /* Partial read successful */ 7097 unsigned char type; 7098 7099 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER, 7100 "kstrgetmsg:%p", vp); 7101 7102 ASSERT(vp->v_stream); 7103 stp = vp->v_stream; 7104 rvp->r_val1 = 0; 7105 7106 mutex_enter(&stp->sd_lock); 7107 7108 if ((error = i_straccess(stp, JCREAD)) != 0) { 7109 mutex_exit(&stp->sd_lock); 7110 return (error); 7111 } 7112 7113 flags = *flagsp; 7114 if (stp->sd_flag & (STRDERR|STPLEX)) { 7115 if ((stp->sd_flag & STPLEX) || 7116 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) { 7117 error = strgeterr(stp, STRDERR|STPLEX, 7118 (flags & MSG_IPEEK)); 7119 if (error != 0) { 7120 mutex_exit(&stp->sd_lock); 7121 return (error); 7122 } 7123 } 7124 } 7125 mutex_exit(&stp->sd_lock); 7126 7127 switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) { 7128 case MSG_HIPRI: 7129 if (*prip != 0) 7130 return (EINVAL); 7131 break; 7132 7133 case MSG_ANY: 7134 case MSG_BAND: 7135 break; 7136 7137 default: 7138 return (EINVAL); 7139 } 7140 7141 retry: 7142 q = _RD(stp->sd_wrq); 7143 mutex_enter(&stp->sd_lock); 7144 old_sd_flag = stp->sd_flag; 7145 mark = 0; 7146 for (;;) { 7147 int done = 0; 7148 int waitflag; 7149 int fmode; 7150 mblk_t *q_first = q->q_first; 7151 7152 /* 7153 * This section of the code operates just like the code 7154 * in strgetmsg(). There is a comment there about what 7155 * is going on here. 7156 */ 7157 if (!(flags & (MSG_HIPRI|MSG_BAND))) { 7158 /* Asking for normal, band0 data */ 7159 bp = strget(stp, q, uiop, first, &error); 7160 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7161 if (bp != NULL) { 7162 if (DB_TYPE(bp) == M_SIG) { 7163 strsignal_nolock(stp, *bp->b_rptr, 7164 bp->b_band); 7165 freemsg(bp); 7166 continue; 7167 } else { 7168 break; 7169 } 7170 } 7171 if (error != 0) { 7172 goto getmout; 7173 } 7174 /* 7175 * We can't depend on the value of STRPRI here because 7176 * the stream head may be in transit. Therefore, we 7177 * must look at the type of the first message to 7178 * determine if a high priority messages is waiting 7179 */ 7180 } else if ((flags & MSG_HIPRI) && q_first != NULL && 7181 DB_TYPE(q_first) >= QPCTL && 7182 (bp = getq_noenab(q, 0)) != NULL) { 7183 ASSERT(DB_TYPE(bp) >= QPCTL); 7184 break; 7185 } else if ((flags & MSG_BAND) && q_first != NULL && 7186 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) && 7187 (bp = getq_noenab(q, 0)) != NULL) { 7188 /* 7189 * Asked for at least band "prip" and got either at 7190 * least that band or a hipri message. 7191 */ 7192 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL); 7193 if (DB_TYPE(bp) == M_SIG) { 7194 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7195 freemsg(bp); 7196 continue; 7197 } else { 7198 break; 7199 } 7200 } 7201 7202 /* No data. Time to sleep? */ 7203 qbackenable(q, 0); 7204 7205 /* 7206 * Delayed error notification? 7207 */ 7208 if ((stp->sd_flag & (STRDERR|STPLEX)) && 7209 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) { 7210 error = strgeterr(stp, STRDERR|STPLEX, 7211 (flags & MSG_IPEEK)); 7212 if (error != 0) { 7213 mutex_exit(&stp->sd_lock); 7214 return (error); 7215 } 7216 } 7217 7218 /* 7219 * If STRHUP or STREOF, return 0 length control and data. 7220 * If a read(fd,buf,0) has been done, do not sleep, just 7221 * return. 7222 * 7223 * If mctlp == NULL and uiop == NULL, then the code will 7224 * do the strwaitq. This is an understood way of saying 7225 * sleep "polling" until a message is received. 7226 */ 7227 if ((stp->sd_flag & (STRHUP|STREOF)) || 7228 (uiop != NULL && uiop->uio_resid == 0)) { 7229 if (mctlp != NULL) 7230 *mctlp = NULL; 7231 *flagsp = 0; 7232 mutex_exit(&stp->sd_lock); 7233 return (0); 7234 } 7235 7236 waitflag = GETWAIT; 7237 if (flags & 7238 (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) { 7239 if (flags & MSG_HOLDSIG) 7240 waitflag |= STR_NOSIG; 7241 if (flags & MSG_IGNERROR) 7242 waitflag |= STR_NOERROR; 7243 if (flags & MSG_IPEEK) 7244 waitflag |= STR_PEEK; 7245 if (flags & MSG_DELAYERROR) 7246 waitflag |= STR_DELAYERR; 7247 } 7248 if (uiop != NULL) 7249 fmode = uiop->uio_fmode; 7250 else 7251 fmode = 0; 7252 7253 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT, 7254 "kstrgetmsg calls strwaitq:%p, %p", 7255 vp, uiop); 7256 if (((error = strwaitq(stp, waitflag, (ssize_t)0, 7257 fmode, timout, &done))) != 0 || done) { 7258 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE, 7259 "kstrgetmsg error or done:%p, %p", 7260 vp, uiop); 7261 mutex_exit(&stp->sd_lock); 7262 return (error); 7263 } 7264 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE, 7265 "kstrgetmsg awakes:%p, %p", vp, uiop); 7266 if ((error = i_straccess(stp, JCREAD)) != 0) { 7267 mutex_exit(&stp->sd_lock); 7268 return (error); 7269 } 7270 first = 0; 7271 } 7272 ASSERT(bp != NULL); 7273 /* 7274 * Extract any mark information. If the message is not completely 7275 * consumed this information will be put in the mblk 7276 * that is putback. 7277 * If MSGMARKNEXT is set and the message is completely consumed 7278 * the STRATMARK flag will be set below. Likewise, if 7279 * MSGNOTMARKNEXT is set and the message is 7280 * completely consumed STRNOTATMARK will be set. 7281 */ 7282 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 7283 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 7284 (MSGMARKNEXT|MSGNOTMARKNEXT)); 7285 pri = bp->b_band; 7286 if (mark != 0) { 7287 /* 7288 * If the caller doesn't want the mark return. 7289 * Used to implement MSG_WAITALL in sockets. 7290 */ 7291 if (flags & MSG_NOMARK) { 7292 putback(stp, q, bp, pri); 7293 qbackenable(q, pri); 7294 mutex_exit(&stp->sd_lock); 7295 return (EWOULDBLOCK); 7296 } 7297 if (bp == stp->sd_mark) { 7298 mark |= _LASTMARK; 7299 stp->sd_mark = NULL; 7300 } 7301 } 7302 7303 /* 7304 * keep track of the first message type 7305 */ 7306 type = bp->b_datap->db_type; 7307 7308 if (bp->b_datap->db_type == M_PASSFP) { 7309 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7310 stp->sd_mark = bp; 7311 bp->b_flag |= mark & ~_LASTMARK; 7312 putback(stp, q, bp, pri); 7313 qbackenable(q, pri); 7314 mutex_exit(&stp->sd_lock); 7315 return (EBADMSG); 7316 } 7317 ASSERT(type != M_SIG); 7318 7319 if (flags & MSG_IPEEK) { 7320 /* 7321 * Clear any struioflag - we do the uiomove over again 7322 * when peeking since it simplifies the code. 7323 * 7324 * Dup the message and put the original back on the queue. 7325 * If dupmsg() fails, try again with copymsg() to see if 7326 * there is indeed a shortage of memory. dupmsg() may fail 7327 * if db_ref in any of the messages reaches its limit. 7328 */ 7329 7330 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) { 7331 /* 7332 * Restore the state of the stream head since we 7333 * need to drop sd_lock (strwaitbuf is sleeping). 7334 */ 7335 size_t size = msgdsize(bp); 7336 7337 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7338 stp->sd_mark = bp; 7339 bp->b_flag |= mark & ~_LASTMARK; 7340 putback(stp, q, bp, pri); 7341 mutex_exit(&stp->sd_lock); 7342 error = strwaitbuf(size, BPRI_HI); 7343 if (error) { 7344 /* 7345 * There is no net change to the queue thus 7346 * no need to qbackenable. 7347 */ 7348 return (error); 7349 } 7350 goto retry; 7351 } 7352 7353 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7354 stp->sd_mark = bp; 7355 bp->b_flag |= mark & ~_LASTMARK; 7356 putback(stp, q, bp, pri); 7357 bp = nbp; 7358 } 7359 7360 /* 7361 * Set this flag so strrput will not generate signals. Need to 7362 * make sure this flag is cleared before leaving this routine 7363 * else signals will stop being sent. 7364 */ 7365 stp->sd_flag |= STRGETINPROG; 7366 mutex_exit(&stp->sd_lock); 7367 7368 if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) { 7369 mblk_t *tmp, *prevmp; 7370 7371 /* 7372 * Put first non-data mblk back to stream head and 7373 * cut the mblk chain so sd_rputdatafunc only sees 7374 * M_DATA mblks. We can skip the first mblk since it 7375 * is M_DATA according to the condition above. 7376 */ 7377 for (prevmp = bp, tmp = bp->b_cont; tmp != NULL; 7378 prevmp = tmp, tmp = tmp->b_cont) { 7379 if (DB_TYPE(tmp) != M_DATA) { 7380 prevmp->b_cont = NULL; 7381 mutex_enter(&stp->sd_lock); 7382 putback(stp, q, tmp, tmp->b_band); 7383 mutex_exit(&stp->sd_lock); 7384 break; 7385 } 7386 } 7387 7388 bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp, 7389 NULL, NULL, NULL, NULL); 7390 7391 if (bp == NULL) 7392 goto retry; 7393 } 7394 7395 if (STREAM_NEEDSERVICE(stp)) 7396 stream_runservice(stp); 7397 7398 /* 7399 * Set HIPRI flag if message is priority. 7400 */ 7401 if (type >= QPCTL) 7402 flg = MSG_HIPRI; 7403 else 7404 flg = MSG_BAND; 7405 7406 /* 7407 * First process PROTO or PCPROTO blocks, if any. 7408 */ 7409 if (mctlp != NULL && type != M_DATA) { 7410 mblk_t *nbp; 7411 7412 *mctlp = bp; 7413 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA) 7414 bp = bp->b_cont; 7415 nbp = bp->b_cont; 7416 bp->b_cont = NULL; 7417 bp = nbp; 7418 } 7419 7420 if (bp && bp->b_datap->db_type != M_DATA) { 7421 /* 7422 * More PROTO blocks in msg. Will only happen if mctlp is NULL. 7423 */ 7424 more |= MORECTL; 7425 savemp = bp; 7426 while (bp && bp->b_datap->db_type != M_DATA) { 7427 savemptail = bp; 7428 bp = bp->b_cont; 7429 } 7430 savemptail->b_cont = NULL; 7431 } 7432 7433 /* 7434 * Now process DATA blocks, if any. 7435 */ 7436 if (uiop == NULL) { 7437 /* Append data to tail of mctlp */ 7438 7439 if (mctlp != NULL) { 7440 mblk_t **mpp = mctlp; 7441 7442 while (*mpp != NULL) 7443 mpp = &((*mpp)->b_cont); 7444 *mpp = bp; 7445 bp = NULL; 7446 } 7447 } else if (uiop->uio_resid >= 0 && bp) { 7448 size_t oldresid = uiop->uio_resid; 7449 7450 /* 7451 * If a streams message is likely to consist 7452 * of many small mblks, it is pulled up into 7453 * one continuous chunk of memory. 7454 * The size of the first mblk may be bogus because 7455 * successive read() calls on the socket reduce 7456 * the size of this mblk until it is exhausted 7457 * and then the code walks on to the next. Thus 7458 * the size of the mblk may not be the original size 7459 * that was passed up, it's simply a remainder 7460 * and hence can be very small without any 7461 * implication that the packet is badly fragmented. 7462 * So the size of the possible second mblk is 7463 * used to spot a badly fragmented packet. 7464 * see longer comment at top of page 7465 * by mblk_pull_len declaration. 7466 */ 7467 7468 if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) { 7469 (void) pullupmsg(bp, -1); 7470 } 7471 7472 bp = struiocopyout(bp, uiop, &error); 7473 if (error != 0) { 7474 if (mctlp != NULL) { 7475 freemsg(*mctlp); 7476 *mctlp = NULL; 7477 } else 7478 freemsg(savemp); 7479 mutex_enter(&stp->sd_lock); 7480 /* 7481 * clear stream head hi pri flag based on 7482 * first message 7483 */ 7484 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7485 ASSERT(type == M_PCPROTO); 7486 stp->sd_flag &= ~STRPRI; 7487 } 7488 more = 0; 7489 goto getmout; 7490 } 7491 /* 7492 * (pr == 1) indicates a partial read. 7493 */ 7494 if (oldresid > uiop->uio_resid) 7495 pr = 1; 7496 } 7497 7498 if (bp) { /* more data blocks in msg */ 7499 more |= MOREDATA; 7500 if (savemp) 7501 savemptail->b_cont = bp; 7502 else 7503 savemp = bp; 7504 } 7505 7506 mutex_enter(&stp->sd_lock); 7507 if (savemp) { 7508 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) { 7509 /* 7510 * When MSG_DISCARDTAIL is set or 7511 * when peeking discard any tail. When peeking this 7512 * is the tail of the dup that was copied out - the 7513 * message has already been putback on the queue. 7514 * Return MOREDATA to the caller even though the data 7515 * is discarded. This is used by sockets (to 7516 * set MSG_TRUNC). 7517 */ 7518 freemsg(savemp); 7519 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7520 ASSERT(type == M_PCPROTO); 7521 stp->sd_flag &= ~STRPRI; 7522 } 7523 } else if (pr && (savemp->b_datap->db_type == M_DATA) && 7524 msgnodata(savemp)) { 7525 /* 7526 * Avoid queuing a zero-length tail part of 7527 * a message. pr=1 indicates that we read some of 7528 * the message. 7529 */ 7530 freemsg(savemp); 7531 more &= ~MOREDATA; 7532 if (type >= QPCTL) { 7533 ASSERT(type == M_PCPROTO); 7534 stp->sd_flag &= ~STRPRI; 7535 } 7536 } else { 7537 savemp->b_band = pri; 7538 /* 7539 * If the first message was HIPRI and the one we're 7540 * putting back isn't, then clear STRPRI, otherwise 7541 * set STRPRI again. Note that we must set STRPRI 7542 * again since the flush logic in strrput_nondata() 7543 * may have cleared it while we had sd_lock dropped. 7544 */ 7545 7546 if (type >= QPCTL) { 7547 ASSERT(type == M_PCPROTO); 7548 if (queclass(savemp) < QPCTL) 7549 stp->sd_flag &= ~STRPRI; 7550 else 7551 stp->sd_flag |= STRPRI; 7552 } else if (queclass(savemp) >= QPCTL) { 7553 /* 7554 * The first message was not a HIPRI message, 7555 * but the one we are about to putback is. 7556 * For simplicitly, we do not allow for HIPRI 7557 * messages to be embedded in the message 7558 * body, so just force it to same type as 7559 * first message. 7560 */ 7561 ASSERT(type == M_DATA || type == M_PROTO); 7562 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 7563 savemp->b_datap->db_type = type; 7564 } 7565 if (mark != 0) { 7566 if ((mark & _LASTMARK) && 7567 (stp->sd_mark == NULL)) { 7568 /* 7569 * If another marked message arrived 7570 * while sd_lock was not held sd_mark 7571 * would be non-NULL. 7572 */ 7573 stp->sd_mark = savemp; 7574 } 7575 savemp->b_flag |= mark & ~_LASTMARK; 7576 } 7577 putback(stp, q, savemp, pri); 7578 } 7579 } else if (!(flags & MSG_IPEEK)) { 7580 /* 7581 * The complete message was consumed. 7582 * 7583 * If another M_PCPROTO arrived while sd_lock was not held 7584 * it would have been discarded since STRPRI was still set. 7585 * 7586 * Move the MSG*MARKNEXT information 7587 * to the stream head just in case 7588 * the read queue becomes empty. 7589 * clear stream head hi pri flag based on 7590 * first message 7591 * 7592 * If the stream head was at the mark 7593 * (STRATMARK) before we dropped sd_lock above 7594 * and some data was consumed then we have 7595 * moved past the mark thus STRATMARK is 7596 * cleared. However, if a message arrived in 7597 * strrput during the copyout above causing 7598 * STRATMARK to be set we can not clear that 7599 * flag. 7600 * XXX A "perimeter" would help by single-threading strrput, 7601 * strread, strgetmsg and kstrgetmsg. 7602 */ 7603 if (type >= QPCTL) { 7604 ASSERT(type == M_PCPROTO); 7605 stp->sd_flag &= ~STRPRI; 7606 } 7607 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 7608 if (mark & MSGMARKNEXT) { 7609 stp->sd_flag &= ~STRNOTATMARK; 7610 stp->sd_flag |= STRATMARK; 7611 } else if (mark & MSGNOTMARKNEXT) { 7612 stp->sd_flag &= ~STRATMARK; 7613 stp->sd_flag |= STRNOTATMARK; 7614 } else { 7615 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 7616 } 7617 } else if (pr && (old_sd_flag & STRATMARK)) { 7618 stp->sd_flag &= ~STRATMARK; 7619 } 7620 } 7621 7622 *flagsp = flg; 7623 *prip = pri; 7624 7625 /* 7626 * Getmsg cleanup processing - if the state of the queue has changed 7627 * some signals may need to be sent and/or poll awakened. 7628 */ 7629 getmout: 7630 qbackenable(q, pri); 7631 7632 /* 7633 * We dropped the stream head lock above. Send all M_SIG messages 7634 * before processing stream head for SIGPOLL messages. 7635 */ 7636 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7637 while ((bp = q->q_first) != NULL && 7638 (bp->b_datap->db_type == M_SIG)) { 7639 /* 7640 * sd_lock is held so the content of the read queue can not 7641 * change. 7642 */ 7643 bp = getq(q); 7644 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 7645 7646 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7647 mutex_exit(&stp->sd_lock); 7648 freemsg(bp); 7649 if (STREAM_NEEDSERVICE(stp)) 7650 stream_runservice(stp); 7651 mutex_enter(&stp->sd_lock); 7652 } 7653 7654 /* 7655 * stream head cannot change while we make the determination 7656 * whether or not to send a signal. Drop the flag to allow strrput 7657 * to send firstmsgsigs again. 7658 */ 7659 stp->sd_flag &= ~STRGETINPROG; 7660 7661 /* 7662 * If the type of message at the front of the queue changed 7663 * due to the receive the appropriate signals and pollwakeup events 7664 * are generated. The type of changes are: 7665 * Processed a hipri message, q_first is not hipri. 7666 * Processed a band X message, and q_first is band Y. 7667 * The generated signals and pollwakeups are identical to what 7668 * strrput() generates should the message that is now on q_first 7669 * arrive to an empty read queue. 7670 * 7671 * Note: only strrput will send a signal for a hipri message. 7672 */ 7673 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7674 strsigset_t signals = 0; 7675 strpollset_t pollwakeups = 0; 7676 7677 if (flg & MSG_HIPRI) { 7678 /* 7679 * Removed a hipri message. Regular data at 7680 * the front of the queue. 7681 */ 7682 if (bp->b_band == 0) { 7683 signals = S_INPUT | S_RDNORM; 7684 pollwakeups = POLLIN | POLLRDNORM; 7685 } else { 7686 signals = S_INPUT | S_RDBAND; 7687 pollwakeups = POLLIN | POLLRDBAND; 7688 } 7689 } else if (pri != bp->b_band) { 7690 /* 7691 * The band is different for the new q_first. 7692 */ 7693 if (bp->b_band == 0) { 7694 signals = S_RDNORM; 7695 pollwakeups = POLLIN | POLLRDNORM; 7696 } else { 7697 signals = S_RDBAND; 7698 pollwakeups = POLLIN | POLLRDBAND; 7699 } 7700 } 7701 7702 if (pollwakeups != 0) { 7703 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7704 if (!(stp->sd_rput_opt & SR_POLLIN)) 7705 goto no_pollwake; 7706 stp->sd_rput_opt &= ~SR_POLLIN; 7707 } 7708 mutex_exit(&stp->sd_lock); 7709 pollwakeup(&stp->sd_pollist, pollwakeups); 7710 mutex_enter(&stp->sd_lock); 7711 } 7712 no_pollwake: 7713 7714 if (stp->sd_sigflags & signals) 7715 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7716 } 7717 mutex_exit(&stp->sd_lock); 7718 7719 rvp->r_val1 = more; 7720 return (error); 7721 #undef _LASTMARK 7722 } 7723 7724 /* 7725 * Put a message downstream. 7726 * 7727 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7728 */ 7729 int 7730 strputmsg( 7731 struct vnode *vp, 7732 struct strbuf *mctl, 7733 struct strbuf *mdata, 7734 unsigned char pri, 7735 int flag, 7736 int fmode) 7737 { 7738 struct stdata *stp; 7739 queue_t *wqp; 7740 mblk_t *mp; 7741 ssize_t msgsize; 7742 ssize_t rmin, rmax; 7743 int error; 7744 struct uio uios; 7745 struct uio *uiop = &uios; 7746 struct iovec iovs; 7747 int xpg4 = 0; 7748 7749 ASSERT(vp->v_stream); 7750 stp = vp->v_stream; 7751 wqp = stp->sd_wrq; 7752 7753 /* 7754 * If it is an XPG4 application, we need to send 7755 * SIGPIPE below 7756 */ 7757 7758 xpg4 = (flag & MSG_XPG4) ? 1 : 0; 7759 flag &= ~MSG_XPG4; 7760 7761 if (AU_AUDITING()) 7762 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode); 7763 7764 mutex_enter(&stp->sd_lock); 7765 7766 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7767 mutex_exit(&stp->sd_lock); 7768 return (error); 7769 } 7770 7771 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7772 error = strwriteable(stp, B_FALSE, xpg4); 7773 if (error != 0) { 7774 mutex_exit(&stp->sd_lock); 7775 return (error); 7776 } 7777 } 7778 7779 mutex_exit(&stp->sd_lock); 7780 7781 /* 7782 * Check for legal flag value. 7783 */ 7784 switch (flag) { 7785 case MSG_HIPRI: 7786 if ((mctl->len < 0) || (pri != 0)) 7787 return (EINVAL); 7788 break; 7789 case MSG_BAND: 7790 break; 7791 7792 default: 7793 return (EINVAL); 7794 } 7795 7796 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN, 7797 "strputmsg in:stp %p", stp); 7798 7799 /* get these values from those cached in the stream head */ 7800 rmin = stp->sd_qn_minpsz; 7801 rmax = stp->sd_qn_maxpsz; 7802 7803 /* 7804 * Make sure ctl and data sizes together fall within the 7805 * limits of the max and min receive packet sizes and do 7806 * not exceed system limit. 7807 */ 7808 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 7809 if (rmax == 0) { 7810 return (ERANGE); 7811 } 7812 /* 7813 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 7814 * Needed to prevent partial failures in the strmakedata loop. 7815 */ 7816 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 7817 rmax = stp->sd_maxblk; 7818 7819 if ((msgsize = mdata->len) < 0) { 7820 msgsize = 0; 7821 rmin = 0; /* no range check for NULL data part */ 7822 } 7823 if ((msgsize < rmin) || 7824 ((msgsize > rmax) && (rmax != INFPSZ)) || 7825 (mctl->len > strctlsz)) { 7826 return (ERANGE); 7827 } 7828 7829 /* 7830 * Setup uio and iov for data part 7831 */ 7832 iovs.iov_base = mdata->buf; 7833 iovs.iov_len = msgsize; 7834 uios.uio_iov = &iovs; 7835 uios.uio_iovcnt = 1; 7836 uios.uio_loffset = 0; 7837 uios.uio_segflg = UIO_USERSPACE; 7838 uios.uio_fmode = fmode; 7839 uios.uio_extflg = UIO_COPY_DEFAULT; 7840 uios.uio_resid = msgsize; 7841 uios.uio_offset = 0; 7842 7843 /* Ignore flow control in strput for HIPRI */ 7844 if (flag & MSG_HIPRI) 7845 flag |= MSG_IGNFLOW; 7846 7847 for (;;) { 7848 int done = 0; 7849 7850 /* 7851 * strput will always free the ctl mblk - even when strput 7852 * fails. 7853 */ 7854 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) { 7855 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7856 "strputmsg out:stp %p out %d error %d", 7857 stp, 1, error); 7858 return (error); 7859 } 7860 /* 7861 * Verify that the whole message can be transferred by 7862 * strput. 7863 */ 7864 ASSERT(stp->sd_maxblk == INFPSZ || 7865 stp->sd_maxblk >= mdata->len); 7866 7867 msgsize = mdata->len; 7868 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 7869 mdata->len = msgsize; 7870 7871 if (error == 0) 7872 break; 7873 7874 if (error != EWOULDBLOCK) 7875 goto out; 7876 7877 mutex_enter(&stp->sd_lock); 7878 /* 7879 * Check for a missed wakeup. 7880 * Needed since strput did not hold sd_lock across 7881 * the canputnext. 7882 */ 7883 if (bcanputnext(wqp, pri)) { 7884 /* Try again */ 7885 mutex_exit(&stp->sd_lock); 7886 continue; 7887 } 7888 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT, 7889 "strputmsg wait:stp %p waits pri %d", stp, pri); 7890 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1, 7891 &done)) != 0) || done) { 7892 mutex_exit(&stp->sd_lock); 7893 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7894 "strputmsg out:q %p out %d error %d", 7895 stp, 0, error); 7896 return (error); 7897 } 7898 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE, 7899 "strputmsg wake:stp %p wakes", stp); 7900 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7901 mutex_exit(&stp->sd_lock); 7902 return (error); 7903 } 7904 mutex_exit(&stp->sd_lock); 7905 } 7906 out: 7907 /* 7908 * For historic reasons, applications expect EAGAIN 7909 * when data mblk could not be allocated. so change 7910 * ENOMEM back to EAGAIN 7911 */ 7912 if (error == ENOMEM) 7913 error = EAGAIN; 7914 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7915 "strputmsg out:stp %p out %d error %d", stp, 2, error); 7916 return (error); 7917 } 7918 7919 /* 7920 * Put a message downstream. 7921 * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop. 7922 * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio 7923 * and the fmode parameter. 7924 * 7925 * This routine handles the consolidation private flags: 7926 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7927 * MSG_HOLDSIG Hold signals while waiting for data. 7928 * MSG_IGNFLOW Don't check streams flow control. 7929 * 7930 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7931 */ 7932 int 7933 kstrputmsg( 7934 struct vnode *vp, 7935 mblk_t *mctl, 7936 struct uio *uiop, 7937 ssize_t msgsize, 7938 unsigned char pri, 7939 int flag, 7940 int fmode) 7941 { 7942 struct stdata *stp; 7943 queue_t *wqp; 7944 ssize_t rmin, rmax; 7945 int error; 7946 7947 ASSERT(vp->v_stream); 7948 stp = vp->v_stream; 7949 wqp = stp->sd_wrq; 7950 if (AU_AUDITING()) 7951 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode); 7952 if (mctl == NULL) 7953 return (EINVAL); 7954 7955 mutex_enter(&stp->sd_lock); 7956 7957 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7958 mutex_exit(&stp->sd_lock); 7959 freemsg(mctl); 7960 return (error); 7961 } 7962 7963 if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) { 7964 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7965 error = strwriteable(stp, B_FALSE, B_TRUE); 7966 if (error != 0) { 7967 mutex_exit(&stp->sd_lock); 7968 freemsg(mctl); 7969 return (error); 7970 } 7971 } 7972 } 7973 7974 mutex_exit(&stp->sd_lock); 7975 7976 /* 7977 * Check for legal flag value. 7978 */ 7979 switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) { 7980 case MSG_HIPRI: 7981 if (pri != 0) { 7982 freemsg(mctl); 7983 return (EINVAL); 7984 } 7985 break; 7986 case MSG_BAND: 7987 break; 7988 default: 7989 freemsg(mctl); 7990 return (EINVAL); 7991 } 7992 7993 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN, 7994 "kstrputmsg in:stp %p", stp); 7995 7996 /* get these values from those cached in the stream head */ 7997 rmin = stp->sd_qn_minpsz; 7998 rmax = stp->sd_qn_maxpsz; 7999 8000 /* 8001 * Make sure ctl and data sizes together fall within the 8002 * limits of the max and min receive packet sizes and do 8003 * not exceed system limit. 8004 */ 8005 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 8006 if (rmax == 0) { 8007 freemsg(mctl); 8008 return (ERANGE); 8009 } 8010 /* 8011 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 8012 * Needed to prevent partial failures in the strmakedata loop. 8013 */ 8014 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 8015 rmax = stp->sd_maxblk; 8016 8017 if (uiop == NULL) { 8018 msgsize = -1; 8019 rmin = -1; /* no range check for NULL data part */ 8020 } else { 8021 /* Use uio flags as well as the fmode parameter flags */ 8022 fmode |= uiop->uio_fmode; 8023 8024 if ((msgsize < rmin) || 8025 ((msgsize > rmax) && (rmax != INFPSZ))) { 8026 freemsg(mctl); 8027 return (ERANGE); 8028 } 8029 } 8030 8031 /* Ignore flow control in strput for HIPRI */ 8032 if (flag & MSG_HIPRI) 8033 flag |= MSG_IGNFLOW; 8034 8035 for (;;) { 8036 int done = 0; 8037 int waitflag; 8038 mblk_t *mp; 8039 8040 /* 8041 * strput will always free the ctl mblk - even when strput 8042 * fails. If MSG_IGNFLOW is set then any error returned 8043 * will cause us to break the loop, so we don't need a copy 8044 * of the message. If MSG_IGNFLOW is not set, then we can 8045 * get hit by flow control and be forced to try again. In 8046 * this case we need to have a copy of the message. We 8047 * do this using copymsg since the message may get modified 8048 * by something below us. 8049 * 8050 * We've observed that many TPI providers do not check db_ref 8051 * on the control messages but blindly reuse them for the 8052 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more 8053 * friendly to such providers than using dupmsg. Also, note 8054 * that sockfs uses MSG_IGNFLOW for all TPI control messages. 8055 * Only data messages are subject to flow control, hence 8056 * subject to this copymsg. 8057 */ 8058 if (flag & MSG_IGNFLOW) { 8059 mp = mctl; 8060 mctl = NULL; 8061 } else { 8062 do { 8063 /* 8064 * If a message has a free pointer, the message 8065 * must be dupmsg to maintain this pointer. 8066 * Code using this facility must be sure 8067 * that modules below will not change the 8068 * contents of the dblk without checking db_ref 8069 * first. If db_ref is > 1, then the module 8070 * needs to do a copymsg first. Otherwise, 8071 * the contents of the dblk may become 8072 * inconsistent because the freesmg/freeb below 8073 * may end up calling atomic_add_32_nv. 8074 * The atomic_add_32_nv in freeb (accessing 8075 * all of db_ref, db_type, db_flags, and 8076 * db_struioflag) does not prevent other threads 8077 * from concurrently trying to modify e.g. 8078 * db_type. 8079 */ 8080 if (mctl->b_datap->db_frtnp != NULL) 8081 mp = dupmsg(mctl); 8082 else 8083 mp = copymsg(mctl); 8084 8085 if (mp != NULL) 8086 break; 8087 8088 error = strwaitbuf(msgdsize(mctl), BPRI_MED); 8089 if (error) { 8090 freemsg(mctl); 8091 return (error); 8092 } 8093 } while (mp == NULL); 8094 } 8095 /* 8096 * Verify that all of msgsize can be transferred by 8097 * strput. 8098 */ 8099 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize); 8100 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 8101 if (error == 0) 8102 break; 8103 8104 if (error != EWOULDBLOCK) 8105 goto out; 8106 8107 /* 8108 * IF MSG_IGNFLOW is set we should have broken out of loop 8109 * above. 8110 */ 8111 ASSERT(!(flag & MSG_IGNFLOW)); 8112 mutex_enter(&stp->sd_lock); 8113 /* 8114 * Check for a missed wakeup. 8115 * Needed since strput did not hold sd_lock across 8116 * the canputnext. 8117 */ 8118 if (bcanputnext(wqp, pri)) { 8119 /* Try again */ 8120 mutex_exit(&stp->sd_lock); 8121 continue; 8122 } 8123 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT, 8124 "kstrputmsg wait:stp %p waits pri %d", stp, pri); 8125 8126 waitflag = WRITEWAIT; 8127 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) { 8128 if (flag & MSG_HOLDSIG) 8129 waitflag |= STR_NOSIG; 8130 if (flag & MSG_IGNERROR) 8131 waitflag |= STR_NOERROR; 8132 } 8133 if (((error = strwaitq(stp, waitflag, 8134 (ssize_t)0, fmode, -1, &done)) != 0) || done) { 8135 mutex_exit(&stp->sd_lock); 8136 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8137 "kstrputmsg out:stp %p out %d error %d", 8138 stp, 0, error); 8139 freemsg(mctl); 8140 return (error); 8141 } 8142 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE, 8143 "kstrputmsg wake:stp %p wakes", stp); 8144 if ((error = i_straccess(stp, JCWRITE)) != 0) { 8145 mutex_exit(&stp->sd_lock); 8146 freemsg(mctl); 8147 return (error); 8148 } 8149 mutex_exit(&stp->sd_lock); 8150 } 8151 out: 8152 freemsg(mctl); 8153 /* 8154 * For historic reasons, applications expect EAGAIN 8155 * when data mblk could not be allocated. so change 8156 * ENOMEM back to EAGAIN 8157 */ 8158 if (error == ENOMEM) 8159 error = EAGAIN; 8160 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8161 "kstrputmsg out:stp %p out %d error %d", stp, 2, error); 8162 return (error); 8163 } 8164 8165 /* 8166 * Determines whether the necessary conditions are set on a stream 8167 * for it to be readable, writeable, or have exceptions. 8168 * 8169 * strpoll handles the consolidation private events: 8170 * POLLNOERR Do not return POLLERR even if there are stream 8171 * head errors. 8172 * Used by sockfs. 8173 * POLLRDDATA Do not return POLLIN unless at least one message on 8174 * the queue contains one or more M_DATA mblks. Thus 8175 * when this flag is set a queue with only 8176 * M_PROTO/M_PCPROTO mblks does not return POLLIN. 8177 * Used by sockfs to ignore T_EXDATA_IND messages. 8178 * 8179 * Note: POLLRDDATA assumes that synch streams only return messages with 8180 * an M_DATA attached (i.e. not messages consisting of only 8181 * an M_PROTO/M_PCPROTO part). 8182 */ 8183 int 8184 strpoll( 8185 struct stdata *stp, 8186 short events_arg, 8187 int anyyet, 8188 short *reventsp, 8189 struct pollhead **phpp) 8190 { 8191 int events = (ushort_t)events_arg; 8192 int retevents = 0; 8193 mblk_t *mp; 8194 qband_t *qbp; 8195 long sd_flags = stp->sd_flag; 8196 int headlocked = 0; 8197 8198 /* 8199 * For performance, a single 'if' tests for most possible edge 8200 * conditions in one shot 8201 */ 8202 if (sd_flags & (STPLEX | STRDERR | STWRERR)) { 8203 if (sd_flags & STPLEX) { 8204 *reventsp = POLLNVAL; 8205 return (EINVAL); 8206 } 8207 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) && 8208 (sd_flags & STRDERR)) || 8209 ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) && 8210 (sd_flags & STWRERR))) { 8211 if (!(events & POLLNOERR)) { 8212 *reventsp = POLLERR; 8213 return (0); 8214 } 8215 } 8216 } 8217 if (sd_flags & STRHUP) { 8218 retevents |= POLLHUP; 8219 } else if (events & (POLLWRNORM | POLLWRBAND)) { 8220 queue_t *tq; 8221 queue_t *qp = stp->sd_wrq; 8222 8223 claimstr(qp); 8224 /* Find next module forward that has a service procedure */ 8225 tq = qp->q_next->q_nfsrv; 8226 ASSERT(tq != NULL); 8227 8228 polllock(&stp->sd_pollist, QLOCK(tq)); 8229 if (events & POLLWRNORM) { 8230 queue_t *sqp; 8231 8232 if (tq->q_flag & QFULL) 8233 /* ensure backq svc procedure runs */ 8234 tq->q_flag |= QWANTW; 8235 else if ((sqp = stp->sd_struiowrq) != NULL) { 8236 /* Check sync stream barrier write q */ 8237 mutex_exit(QLOCK(tq)); 8238 polllock(&stp->sd_pollist, QLOCK(sqp)); 8239 if (sqp->q_flag & QFULL) 8240 /* ensure pollwakeup() is done */ 8241 sqp->q_flag |= QWANTWSYNC; 8242 else 8243 retevents |= POLLOUT; 8244 /* More write events to process ??? */ 8245 if (! (events & POLLWRBAND)) { 8246 mutex_exit(QLOCK(sqp)); 8247 releasestr(qp); 8248 goto chkrd; 8249 } 8250 mutex_exit(QLOCK(sqp)); 8251 polllock(&stp->sd_pollist, QLOCK(tq)); 8252 } else 8253 retevents |= POLLOUT; 8254 } 8255 if (events & POLLWRBAND) { 8256 qbp = tq->q_bandp; 8257 if (qbp) { 8258 while (qbp) { 8259 if (qbp->qb_flag & QB_FULL) 8260 qbp->qb_flag |= QB_WANTW; 8261 else 8262 retevents |= POLLWRBAND; 8263 qbp = qbp->qb_next; 8264 } 8265 } else { 8266 retevents |= POLLWRBAND; 8267 } 8268 } 8269 mutex_exit(QLOCK(tq)); 8270 releasestr(qp); 8271 } 8272 chkrd: 8273 if (sd_flags & STRPRI) { 8274 retevents |= (events & POLLPRI); 8275 } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) { 8276 queue_t *qp = _RD(stp->sd_wrq); 8277 int normevents = (events & (POLLIN | POLLRDNORM)); 8278 8279 /* 8280 * Note: Need to do polllock() here since ps_lock may be 8281 * held. See bug 4191544. 8282 */ 8283 polllock(&stp->sd_pollist, &stp->sd_lock); 8284 headlocked = 1; 8285 mp = qp->q_first; 8286 while (mp) { 8287 /* 8288 * For POLLRDDATA we scan b_cont and b_next until we 8289 * find an M_DATA. 8290 */ 8291 if ((events & POLLRDDATA) && 8292 mp->b_datap->db_type != M_DATA) { 8293 mblk_t *nmp = mp->b_cont; 8294 8295 while (nmp != NULL && 8296 nmp->b_datap->db_type != M_DATA) 8297 nmp = nmp->b_cont; 8298 if (nmp == NULL) { 8299 mp = mp->b_next; 8300 continue; 8301 } 8302 } 8303 if (mp->b_band == 0) 8304 retevents |= normevents; 8305 else 8306 retevents |= (events & (POLLIN | POLLRDBAND)); 8307 break; 8308 } 8309 if (! (retevents & normevents) && 8310 (stp->sd_wakeq & RSLEEP)) { 8311 /* 8312 * Sync stream barrier read queue has data. 8313 */ 8314 retevents |= normevents; 8315 } 8316 /* Treat eof as normal data */ 8317 if (sd_flags & STREOF) 8318 retevents |= normevents; 8319 } 8320 8321 *reventsp = (short)retevents; 8322 if (retevents) { 8323 if (headlocked) 8324 mutex_exit(&stp->sd_lock); 8325 return (0); 8326 } 8327 8328 /* 8329 * If poll() has not found any events yet, set up event cell 8330 * to wake up the poll if a requested event occurs on this 8331 * stream. Check for collisions with outstanding poll requests. 8332 */ 8333 if (!anyyet) { 8334 *phpp = &stp->sd_pollist; 8335 if (headlocked == 0) { 8336 polllock(&stp->sd_pollist, &stp->sd_lock); 8337 headlocked = 1; 8338 } 8339 stp->sd_rput_opt |= SR_POLLIN; 8340 } 8341 if (headlocked) 8342 mutex_exit(&stp->sd_lock); 8343 return (0); 8344 } 8345 8346 /* 8347 * The purpose of putback() is to assure sleeping polls/reads 8348 * are awakened when there are no new messages arriving at the, 8349 * stream head, and a message is placed back on the read queue. 8350 * 8351 * sd_lock must be held when messages are placed back on stream 8352 * head. (getq() holds sd_lock when it removes messages from 8353 * the queue) 8354 */ 8355 8356 static void 8357 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band) 8358 { 8359 mblk_t *qfirst; 8360 ASSERT(MUTEX_HELD(&stp->sd_lock)); 8361 8362 /* 8363 * As a result of lock-step ordering around q_lock and sd_lock, 8364 * it's possible for function calls like putnext() and 8365 * canputnext() to get an inaccurate picture of how much 8366 * data is really being processed at the stream head. 8367 * We only consolidate with existing messages on the queue 8368 * if the length of the message we want to put back is smaller 8369 * than the queue hiwater mark. 8370 */ 8371 if ((stp->sd_rput_opt & SR_CONSOL_DATA) && 8372 (DB_TYPE(bp) == M_DATA) && ((qfirst = q->q_first) != NULL) && 8373 (DB_TYPE(qfirst) == M_DATA) && 8374 ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0) && 8375 ((bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT)) == 0) && 8376 (mp_cont_len(bp, NULL) < q->q_hiwat)) { 8377 /* 8378 * We use the same logic as defined in strrput() 8379 * but in reverse as we are putting back onto the 8380 * queue and want to retain byte ordering. 8381 * Consolidate M_DATA messages with M_DATA ONLY. 8382 * strrput() allows the consolidation of M_DATA onto 8383 * M_PROTO | M_PCPROTO but not the other way round. 8384 * 8385 * The consolidation does not take place if the message 8386 * we are returning to the queue is marked with either 8387 * of the marks or the delim flag or if q_first 8388 * is marked with MSGMARK. The MSGMARK check is needed to 8389 * handle the odd semantics of MSGMARK where essentially 8390 * the whole message is to be treated as marked. 8391 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first 8392 * to the front of the b_cont chain. 8393 */ 8394 rmvq_noenab(q, qfirst); 8395 8396 /* 8397 * The first message in the b_cont list 8398 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 8399 * We need to handle the case where we 8400 * are appending: 8401 * 8402 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 8403 * 2) a MSGMARKNEXT to a plain message. 8404 * 3) a MSGNOTMARKNEXT to a plain message 8405 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 8406 * message. 8407 * 8408 * Thus we never append a MSGMARKNEXT or 8409 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 8410 */ 8411 if (qfirst->b_flag & MSGMARKNEXT) { 8412 bp->b_flag |= MSGMARKNEXT; 8413 bp->b_flag &= ~MSGNOTMARKNEXT; 8414 qfirst->b_flag &= ~MSGMARKNEXT; 8415 } else if (qfirst->b_flag & MSGNOTMARKNEXT) { 8416 bp->b_flag |= MSGNOTMARKNEXT; 8417 qfirst->b_flag &= ~MSGNOTMARKNEXT; 8418 } 8419 8420 linkb(bp, qfirst); 8421 } 8422 (void) putbq(q, bp); 8423 8424 /* 8425 * A message may have come in when the sd_lock was dropped in the 8426 * calling routine. If this is the case and STR*ATMARK info was 8427 * received, need to move that from the stream head to the q_last 8428 * so that SIOCATMARK can return the proper value. 8429 */ 8430 if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) { 8431 unsigned short *flagp = &q->q_last->b_flag; 8432 uint_t b_flag = (uint_t)*flagp; 8433 8434 if (stp->sd_flag & STRATMARK) { 8435 b_flag &= ~MSGNOTMARKNEXT; 8436 b_flag |= MSGMARKNEXT; 8437 stp->sd_flag &= ~STRATMARK; 8438 } else { 8439 b_flag &= ~MSGMARKNEXT; 8440 b_flag |= MSGNOTMARKNEXT; 8441 stp->sd_flag &= ~STRNOTATMARK; 8442 } 8443 *flagp = (unsigned short) b_flag; 8444 } 8445 8446 #ifdef DEBUG 8447 /* 8448 * Make sure that the flags are not messed up. 8449 */ 8450 { 8451 mblk_t *mp; 8452 mp = q->q_last; 8453 while (mp != NULL) { 8454 ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 8455 (MSGMARKNEXT|MSGNOTMARKNEXT)); 8456 mp = mp->b_cont; 8457 } 8458 } 8459 #endif 8460 if (q->q_first == bp) { 8461 short pollevents; 8462 8463 if (stp->sd_flag & RSLEEP) { 8464 stp->sd_flag &= ~RSLEEP; 8465 cv_broadcast(&q->q_wait); 8466 } 8467 if (stp->sd_flag & STRPRI) { 8468 pollevents = POLLPRI; 8469 } else { 8470 if (band == 0) { 8471 if (!(stp->sd_rput_opt & SR_POLLIN)) 8472 return; 8473 stp->sd_rput_opt &= ~SR_POLLIN; 8474 pollevents = POLLIN | POLLRDNORM; 8475 } else { 8476 pollevents = POLLIN | POLLRDBAND; 8477 } 8478 } 8479 mutex_exit(&stp->sd_lock); 8480 pollwakeup(&stp->sd_pollist, pollevents); 8481 mutex_enter(&stp->sd_lock); 8482 } 8483 } 8484 8485 /* 8486 * Return the held vnode attached to the stream head of a 8487 * given queue 8488 * It is the responsibility of the calling routine to ensure 8489 * that the queue does not go away (e.g. pop). 8490 */ 8491 vnode_t * 8492 strq2vp(queue_t *qp) 8493 { 8494 vnode_t *vp; 8495 vp = STREAM(qp)->sd_vnode; 8496 ASSERT(vp != NULL); 8497 VN_HOLD(vp); 8498 return (vp); 8499 } 8500 8501 /* 8502 * return the stream head write queue for the given vp 8503 * It is the responsibility of the calling routine to ensure 8504 * that the stream or vnode do not close. 8505 */ 8506 queue_t * 8507 strvp2wq(vnode_t *vp) 8508 { 8509 ASSERT(vp->v_stream != NULL); 8510 return (vp->v_stream->sd_wrq); 8511 } 8512 8513 /* 8514 * pollwakeup stream head 8515 * It is the responsibility of the calling routine to ensure 8516 * that the stream or vnode do not close. 8517 */ 8518 void 8519 strpollwakeup(vnode_t *vp, short event) 8520 { 8521 ASSERT(vp->v_stream); 8522 pollwakeup(&vp->v_stream->sd_pollist, event); 8523 } 8524 8525 /* 8526 * Mate the stream heads of two vnodes together. If the two vnodes are the 8527 * same, we just make the write-side point at the read-side -- otherwise, 8528 * we do a full mate. Only works on vnodes associated with streams that are 8529 * still being built and thus have only a stream head. 8530 */ 8531 void 8532 strmate(vnode_t *vp1, vnode_t *vp2) 8533 { 8534 queue_t *wrq1 = strvp2wq(vp1); 8535 queue_t *wrq2 = strvp2wq(vp2); 8536 8537 /* 8538 * Verify that there are no modules on the stream yet. We also 8539 * rely on the stream head always having a service procedure to 8540 * avoid tweaking q_nfsrv. 8541 */ 8542 ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL); 8543 ASSERT(wrq1->q_qinfo->qi_srvp != NULL); 8544 ASSERT(wrq2->q_qinfo->qi_srvp != NULL); 8545 8546 /* 8547 * If the queues are the same, just twist; otherwise do a full mate. 8548 */ 8549 if (wrq1 == wrq2) { 8550 wrq1->q_next = _RD(wrq1); 8551 } else { 8552 wrq1->q_next = _RD(wrq2); 8553 wrq2->q_next = _RD(wrq1); 8554 STREAM(wrq1)->sd_mate = STREAM(wrq2); 8555 STREAM(wrq1)->sd_flag |= STRMATE; 8556 STREAM(wrq2)->sd_mate = STREAM(wrq1); 8557 STREAM(wrq2)->sd_flag |= STRMATE; 8558 } 8559 } 8560 8561 /* 8562 * XXX will go away when console is correctly fixed. 8563 * Clean up the console PIDS, from previous I_SETSIG, 8564 * called only for cnopen which never calls strclean(). 8565 */ 8566 void 8567 str_cn_clean(struct vnode *vp) 8568 { 8569 strsig_t *ssp, *pssp, *tssp; 8570 struct stdata *stp; 8571 struct pid *pidp; 8572 int update = 0; 8573 8574 ASSERT(vp->v_stream); 8575 stp = vp->v_stream; 8576 pssp = NULL; 8577 mutex_enter(&stp->sd_lock); 8578 ssp = stp->sd_siglist; 8579 while (ssp) { 8580 mutex_enter(&pidlock); 8581 pidp = ssp->ss_pidp; 8582 /* 8583 * Get rid of PID if the proc is gone. 8584 */ 8585 if (pidp->pid_prinactive) { 8586 tssp = ssp->ss_next; 8587 if (pssp) 8588 pssp->ss_next = tssp; 8589 else 8590 stp->sd_siglist = tssp; 8591 ASSERT(pidp->pid_ref <= 1); 8592 PID_RELE(ssp->ss_pidp); 8593 mutex_exit(&pidlock); 8594 kmem_free(ssp, sizeof (strsig_t)); 8595 update = 1; 8596 ssp = tssp; 8597 continue; 8598 } else 8599 mutex_exit(&pidlock); 8600 pssp = ssp; 8601 ssp = ssp->ss_next; 8602 } 8603 if (update) { 8604 stp->sd_sigflags = 0; 8605 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 8606 stp->sd_sigflags |= ssp->ss_events; 8607 } 8608 mutex_exit(&stp->sd_lock); 8609 } 8610 8611 /* 8612 * Return B_TRUE if there is data in the message, B_FALSE otherwise. 8613 */ 8614 static boolean_t 8615 msghasdata(mblk_t *bp) 8616 { 8617 for (; bp; bp = bp->b_cont) 8618 if (bp->b_datap->db_type == M_DATA) { 8619 ASSERT(bp->b_wptr >= bp->b_rptr); 8620 if (bp->b_wptr > bp->b_rptr) 8621 return (B_TRUE); 8622 } 8623 return (B_FALSE); 8624 } 8625