1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/sysmacros.h> 32 #include <sys/param.h> 33 #include <sys/errno.h> 34 #include <sys/signal.h> 35 #include <sys/stat.h> 36 #include <sys/proc.h> 37 #include <sys/cred.h> 38 #include <sys/user.h> 39 #include <sys/vnode.h> 40 #include <sys/file.h> 41 #include <sys/stream.h> 42 #include <sys/strsubr.h> 43 #include <sys/stropts.h> 44 #include <sys/tihdr.h> 45 #include <sys/var.h> 46 #include <sys/poll.h> 47 #include <sys/termio.h> 48 #include <sys/ttold.h> 49 #include <sys/systm.h> 50 #include <sys/uio.h> 51 #include <sys/cmn_err.h> 52 #include <sys/sad.h> 53 #include <sys/netstack.h> 54 #include <sys/priocntl.h> 55 #include <sys/jioctl.h> 56 #include <sys/procset.h> 57 #include <sys/session.h> 58 #include <sys/kmem.h> 59 #include <sys/filio.h> 60 #include <sys/vtrace.h> 61 #include <sys/debug.h> 62 #include <sys/strredir.h> 63 #include <sys/fs/fifonode.h> 64 #include <sys/fs/snode.h> 65 #include <sys/strlog.h> 66 #include <sys/strsun.h> 67 #include <sys/project.h> 68 #include <sys/kbio.h> 69 #include <sys/msio.h> 70 #include <sys/tty.h> 71 #include <sys/ptyvar.h> 72 #include <sys/vuid_event.h> 73 #include <sys/modctl.h> 74 #include <sys/sunddi.h> 75 #include <sys/sunldi_impl.h> 76 #include <sys/autoconf.h> 77 #include <sys/policy.h> 78 #include <sys/dld.h> 79 #include <sys/zone.h> 80 #include <sys/sodirect.h> 81 82 /* 83 * This define helps improve the readability of streams code while 84 * still maintaining a very old streams performance enhancement. The 85 * performance enhancement basically involved having all callers 86 * of straccess() perform the first check that straccess() will do 87 * locally before actually calling straccess(). (There by reducing 88 * the number of unnecessary calls to straccess().) 89 */ 90 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \ 91 (stp->sd_vnode->v_type == VFIFO) ? 0 : \ 92 straccess((x), (y))) 93 94 /* 95 * what is mblk_pull_len? 96 * 97 * If a streams message consists of many short messages, 98 * a performance degradation occurs from copyout overhead. 99 * To decrease the per mblk overhead, messages that are 100 * likely to consist of many small mblks are pulled up into 101 * one continuous chunk of memory. 102 * 103 * To avoid the processing overhead of examining every 104 * mblk, a quick heuristic is used. If the first mblk in 105 * the message is shorter than mblk_pull_len, it is likely 106 * that the rest of the mblk will be short. 107 * 108 * This heuristic was decided upon after performance tests 109 * indicated that anything more complex slowed down the main 110 * code path. 111 */ 112 #define MBLK_PULL_LEN 64 113 uint32_t mblk_pull_len = MBLK_PULL_LEN; 114 115 /* 116 * The sgttyb_handling flag controls the handling of the old BSD 117 * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows: 118 * 119 * 0 - Emit no warnings at all and retain old, broken behavior. 120 * 1 - Emit no warnings and silently handle new semantics. 121 * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used 122 * (once per system invocation). Handle with new semantics. 123 * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is 124 * made (so that offenders drop core and are easy to debug). 125 * 126 * The "new semantics" are that TIOCGETP returns B38400 for 127 * sg_[io]speed if the corresponding value is over B38400, and that 128 * TIOCSET[PN] accept B38400 in these cases to mean "retain current 129 * bit rate." 130 */ 131 int sgttyb_handling = 1; 132 static boolean_t sgttyb_complaint; 133 134 /* don't push drcompat module by default on Style-2 streams */ 135 static int push_drcompat = 0; 136 137 /* 138 * id value used to distinguish between different ioctl messages 139 */ 140 static uint32_t ioc_id; 141 142 static void putback(struct stdata *, queue_t *, mblk_t *, int); 143 static void strcleanall(struct vnode *); 144 static int strwsrv(queue_t *); 145 static int strdocmd(struct stdata *, struct strcmd *, cred_t *); 146 static void struioainit(queue_t *, sodirect_t *, uio_t *); 147 148 /* 149 * qinit and module_info structures for stream head read and write queues 150 */ 151 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW }; 152 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 }; 153 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info }; 154 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info }; 155 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT, 156 FIFOLOWAT }; 157 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 }; 158 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info }; 159 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info }; 160 161 extern kmutex_t strresources; /* protects global resources */ 162 extern kmutex_t muxifier; /* single-threads multiplexor creation */ 163 164 static boolean_t msghasdata(mblk_t *bp); 165 #define msgnodata(bp) (!msghasdata(bp)) 166 167 /* 168 * Stream head locking notes: 169 * There are four monitors associated with the stream head: 170 * 1. v_stream monitor: in stropen() and strclose() v_lock 171 * is held while the association of vnode and stream 172 * head is established or tested for. 173 * 2. open/close/push/pop monitor: sd_lock is held while each 174 * thread bids for exclusive access to this monitor 175 * for opening or closing a stream. In addition, this 176 * monitor is entered during pushes and pops. This 177 * guarantees that during plumbing operations there 178 * is only one thread trying to change the plumbing. 179 * Any other threads present in the stream are only 180 * using the plumbing. 181 * 3. read/write monitor: in the case of read, a thread holds 182 * sd_lock while trying to get data from the stream 183 * head queue. if there is none to fulfill a read 184 * request, it sets RSLEEP and calls cv_wait_sig() down 185 * in strwaitq() to await the arrival of new data. 186 * when new data arrives in strrput(), sd_lock is acquired 187 * before testing for RSLEEP and calling cv_broadcast(). 188 * the behavior of strwrite(), strwsrv(), and WSLEEP 189 * mirror this. 190 * 4. ioctl monitor: sd_lock is gotten to ensure that only one 191 * thread is doing an ioctl at a time. 192 * 193 * Note, for sodirect case 3. is extended to (*sodirect_t.sod_enqueue)() 194 * call-back from below, further the sodirect support is for code paths 195 * called via kstgetmsg(), all other code paths ASSERT() that sodirect 196 * uioa generated mblk_t's (i.e. DBLK_UIOA) aren't processed. 197 */ 198 199 static int 200 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name, 201 int anchor, cred_t *crp, uint_t anchor_zoneid) 202 { 203 int error; 204 fmodsw_impl_t *fp; 205 206 if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) { 207 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO; 208 return (error); 209 } 210 if (stp->sd_pushcnt >= nstrpush) { 211 return (EINVAL); 212 } 213 214 if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) { 215 stp->sd_flag |= STREOPENFAIL; 216 return (EINVAL); 217 } 218 219 /* 220 * push new module and call its open routine via qattach 221 */ 222 if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0) 223 return (error); 224 225 /* 226 * Check to see if caller wants a STREAMS anchor 227 * put at this place in the stream, and add if so. 228 */ 229 mutex_enter(&stp->sd_lock); 230 if (anchor == stp->sd_pushcnt) { 231 stp->sd_anchor = stp->sd_pushcnt; 232 stp->sd_anchorzone = anchor_zoneid; 233 } 234 mutex_exit(&stp->sd_lock); 235 236 return (0); 237 } 238 239 /* 240 * Open a stream device. 241 */ 242 int 243 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp) 244 { 245 struct stdata *stp; 246 queue_t *qp; 247 int s; 248 dev_t dummydev, savedev; 249 struct autopush *ap; 250 struct dlautopush dlap; 251 int error = 0; 252 ssize_t rmin, rmax; 253 int cloneopen; 254 queue_t *brq; 255 major_t major; 256 str_stack_t *ss; 257 zoneid_t zoneid; 258 uint_t anchor; 259 260 if (audit_active) 261 audit_stropen(vp, devp, flag, crp); 262 263 /* 264 * If the stream already exists, wait for any open in progress 265 * to complete, then call the open function of each module and 266 * driver in the stream. Otherwise create the stream. 267 */ 268 TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp); 269 retry: 270 mutex_enter(&vp->v_lock); 271 if ((stp = vp->v_stream) != NULL) { 272 273 /* 274 * Waiting for stream to be created to device 275 * due to another open. 276 */ 277 mutex_exit(&vp->v_lock); 278 279 if (STRMATED(stp)) { 280 struct stdata *strmatep = stp->sd_mate; 281 282 STRLOCKMATES(stp); 283 if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 284 if (flag & (FNDELAY|FNONBLOCK)) { 285 error = EAGAIN; 286 mutex_exit(&strmatep->sd_lock); 287 goto ckreturn; 288 } 289 mutex_exit(&stp->sd_lock); 290 if (!cv_wait_sig(&strmatep->sd_monitor, 291 &strmatep->sd_lock)) { 292 error = EINTR; 293 mutex_exit(&strmatep->sd_lock); 294 mutex_enter(&stp->sd_lock); 295 goto ckreturn; 296 } 297 mutex_exit(&strmatep->sd_lock); 298 goto retry; 299 } 300 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 301 if (flag & (FNDELAY|FNONBLOCK)) { 302 error = EAGAIN; 303 mutex_exit(&strmatep->sd_lock); 304 goto ckreturn; 305 } 306 mutex_exit(&strmatep->sd_lock); 307 if (!cv_wait_sig(&stp->sd_monitor, 308 &stp->sd_lock)) { 309 error = EINTR; 310 goto ckreturn; 311 } 312 mutex_exit(&stp->sd_lock); 313 goto retry; 314 } 315 316 if (stp->sd_flag & (STRDERR|STWRERR)) { 317 error = EIO; 318 mutex_exit(&strmatep->sd_lock); 319 goto ckreturn; 320 } 321 322 stp->sd_flag |= STWOPEN; 323 STRUNLOCKMATES(stp); 324 } else { 325 mutex_enter(&stp->sd_lock); 326 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 327 if (flag & (FNDELAY|FNONBLOCK)) { 328 error = EAGAIN; 329 goto ckreturn; 330 } 331 if (!cv_wait_sig(&stp->sd_monitor, 332 &stp->sd_lock)) { 333 error = EINTR; 334 goto ckreturn; 335 } 336 mutex_exit(&stp->sd_lock); 337 goto retry; /* could be clone! */ 338 } 339 340 if (stp->sd_flag & (STRDERR|STWRERR)) { 341 error = EIO; 342 goto ckreturn; 343 } 344 345 stp->sd_flag |= STWOPEN; 346 mutex_exit(&stp->sd_lock); 347 } 348 349 /* 350 * Open all modules and devices down stream to notify 351 * that another user is streaming. For modules, set the 352 * last argument to MODOPEN and do not pass any open flags. 353 * Ignore dummydev since this is not the first open. 354 */ 355 claimstr(stp->sd_wrq); 356 qp = stp->sd_wrq; 357 while (_SAMESTR(qp)) { 358 qp = qp->q_next; 359 if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0) 360 break; 361 } 362 releasestr(stp->sd_wrq); 363 mutex_enter(&stp->sd_lock); 364 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR); 365 stp->sd_rerror = 0; 366 stp->sd_werror = 0; 367 ckreturn: 368 cv_broadcast(&stp->sd_monitor); 369 mutex_exit(&stp->sd_lock); 370 return (error); 371 } 372 373 /* 374 * This vnode isn't streaming. SPECFS already 375 * checked for multiple vnodes pointing to the 376 * same stream, so create a stream to the driver. 377 */ 378 qp = allocq(); 379 stp = shalloc(qp); 380 381 /* 382 * Initialize stream head. shalloc() has given us 383 * exclusive access, and we have the vnode locked; 384 * we can do whatever we want with stp. 385 */ 386 stp->sd_flag = STWOPEN; 387 stp->sd_siglist = NULL; 388 stp->sd_pollist.ph_list = NULL; 389 stp->sd_sigflags = 0; 390 stp->sd_mark = NULL; 391 stp->sd_closetime = STRTIMOUT; 392 stp->sd_sidp = NULL; 393 stp->sd_pgidp = NULL; 394 stp->sd_vnode = vp; 395 stp->sd_rerror = 0; 396 stp->sd_werror = 0; 397 stp->sd_wroff = 0; 398 stp->sd_tail = 0; 399 stp->sd_iocblk = NULL; 400 stp->sd_cmdblk = NULL; 401 stp->sd_pushcnt = 0; 402 stp->sd_qn_minpsz = 0; 403 stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */ 404 stp->sd_maxblk = INFPSZ; 405 stp->sd_sodirect = NULL; 406 qp->q_ptr = _WR(qp)->q_ptr = stp; 407 STREAM(qp) = STREAM(_WR(qp)) = stp; 408 vp->v_stream = stp; 409 mutex_exit(&vp->v_lock); 410 if (vp->v_type == VFIFO) { 411 stp->sd_flag |= OLDNDELAY; 412 /* 413 * This means, both for pipes and fifos 414 * strwrite will send SIGPIPE if the other 415 * end is closed. For putmsg it depends 416 * on whether it is a XPG4_2 application 417 * or not 418 */ 419 stp->sd_wput_opt = SW_SIGPIPE; 420 421 /* setq might sleep in kmem_alloc - avoid holding locks. */ 422 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE, 423 SQ_CI|SQ_CO, B_FALSE); 424 425 set_qend(qp); 426 stp->sd_strtab = fifo_getinfo(); 427 _WR(qp)->q_nfsrv = _WR(qp); 428 qp->q_nfsrv = qp; 429 /* 430 * Wake up others that are waiting for stream to be created. 431 */ 432 mutex_enter(&stp->sd_lock); 433 /* 434 * nothing is be pushed on stream yet, so 435 * optimized stream head packetsizes are just that 436 * of the read queue 437 */ 438 stp->sd_qn_minpsz = qp->q_minpsz; 439 stp->sd_qn_maxpsz = qp->q_maxpsz; 440 stp->sd_flag &= ~STWOPEN; 441 goto fifo_opendone; 442 } 443 /* setq might sleep in kmem_alloc - avoid holding locks. */ 444 setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE); 445 446 set_qend(qp); 447 448 /* 449 * Open driver and create stream to it (via qattach). 450 */ 451 savedev = *devp; 452 cloneopen = (getmajor(*devp) == clone_major); 453 if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) { 454 mutex_enter(&vp->v_lock); 455 vp->v_stream = NULL; 456 mutex_exit(&vp->v_lock); 457 mutex_enter(&stp->sd_lock); 458 cv_broadcast(&stp->sd_monitor); 459 mutex_exit(&stp->sd_lock); 460 freeq(_RD(qp)); 461 shfree(stp); 462 return (error); 463 } 464 /* 465 * Set sd_strtab after open in order to handle clonable drivers 466 */ 467 stp->sd_strtab = STREAMSTAB(getmajor(*devp)); 468 469 /* 470 * Historical note: dummydev used to be be prior to the initial 471 * open (via qattach above), which made the value seen 472 * inconsistent between an I_PUSH and an autopush of a module. 473 */ 474 dummydev = *devp; 475 476 /* 477 * For clone open of old style (Q not associated) network driver, 478 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH 479 */ 480 brq = _RD(_WR(qp)->q_next); 481 major = getmajor(*devp); 482 if (push_drcompat && cloneopen && NETWORK_DRV(major) && 483 ((brq->q_flag & _QASSOCIATED) == 0)) { 484 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0) 485 cmn_err(CE_WARN, "cannot push " DRMODNAME 486 " streams module"); 487 } 488 489 if (!NETWORK_DRV(major)) { 490 savedev = *devp; 491 } else { 492 /* 493 * For network devices, process differently based on the 494 * return value from dld_autopush(): 495 * 496 * 0: the passed-in device points to a GLDv3 datalink with 497 * per-link autopush configuration; use that configuration 498 * and ignore any per-driver autopush configuration. 499 * 500 * 1: the passed-in device points to a physical GLDv3 501 * datalink without per-link autopush configuration. The 502 * passed in device was changed to refer to the actual 503 * physical device (if it's not already); we use that new 504 * device to look up any per-driver autopush configuration. 505 * 506 * -1: neither of the above cases applied; use the initial 507 * device to look up any per-driver autopush configuration. 508 */ 509 switch (dld_autopush(&savedev, &dlap)) { 510 case 0: 511 zoneid = crgetzoneid(crp); 512 for (s = 0; s < dlap.dap_npush; s++) { 513 error = push_mod(qp, &dummydev, stp, 514 dlap.dap_aplist[s], dlap.dap_anchor, crp, 515 zoneid); 516 if (error != 0) 517 break; 518 } 519 goto opendone; 520 case 1: 521 break; 522 case -1: 523 savedev = *devp; 524 break; 525 } 526 } 527 /* 528 * Find the autopush configuration based on "savedev". Start with the 529 * global zone. If not found check in the local zone. 530 */ 531 zoneid = GLOBAL_ZONEID; 532 retryap: 533 ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))-> 534 netstack_str; 535 if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) { 536 netstack_rele(ss->ss_netstack); 537 if (zoneid == GLOBAL_ZONEID) { 538 /* 539 * None found. Also look in the zone's autopush table. 540 */ 541 zoneid = crgetzoneid(crp); 542 if (zoneid != GLOBAL_ZONEID) 543 goto retryap; 544 } 545 goto opendone; 546 } 547 anchor = ap->ap_anchor; 548 zoneid = crgetzoneid(crp); 549 for (s = 0; s < ap->ap_npush; s++) { 550 error = push_mod(qp, &dummydev, stp, ap->ap_list[s], 551 anchor, crp, zoneid); 552 if (error != 0) 553 break; 554 } 555 sad_ap_rele(ap, ss); 556 netstack_rele(ss->ss_netstack); 557 558 opendone: 559 560 /* 561 * let specfs know that open failed part way through 562 */ 563 if (error) { 564 mutex_enter(&stp->sd_lock); 565 stp->sd_flag |= STREOPENFAIL; 566 mutex_exit(&stp->sd_lock); 567 } 568 569 /* 570 * Wake up others that are waiting for stream to be created. 571 */ 572 mutex_enter(&stp->sd_lock); 573 stp->sd_flag &= ~STWOPEN; 574 575 /* 576 * As a performance concern we are caching the values of 577 * q_minpsz and q_maxpsz of the module below the stream 578 * head in the stream head. 579 */ 580 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 581 rmin = stp->sd_wrq->q_next->q_minpsz; 582 rmax = stp->sd_wrq->q_next->q_maxpsz; 583 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 584 585 /* do this processing here as a performance concern */ 586 if (strmsgsz != 0) { 587 if (rmax == INFPSZ) 588 rmax = strmsgsz; 589 else 590 rmax = MIN(strmsgsz, rmax); 591 } 592 593 mutex_enter(QLOCK(stp->sd_wrq)); 594 stp->sd_qn_minpsz = rmin; 595 stp->sd_qn_maxpsz = rmax; 596 mutex_exit(QLOCK(stp->sd_wrq)); 597 598 fifo_opendone: 599 cv_broadcast(&stp->sd_monitor); 600 mutex_exit(&stp->sd_lock); 601 return (error); 602 } 603 604 static int strsink(queue_t *, mblk_t *); 605 static struct qinit deadrend = { 606 strsink, NULL, NULL, NULL, NULL, &strm_info, NULL 607 }; 608 static struct qinit deadwend = { 609 NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL 610 }; 611 612 /* 613 * Close a stream. 614 * This is called from closef() on the last close of an open stream. 615 * Strclean() will already have removed the siglist and pollist 616 * information, so all that remains is to remove all multiplexor links 617 * for the stream, pop all the modules (and the driver), and free the 618 * stream structure. 619 */ 620 621 int 622 strclose(struct vnode *vp, int flag, cred_t *crp) 623 { 624 struct stdata *stp; 625 queue_t *qp; 626 int rval; 627 int freestp = 1; 628 queue_t *rmq; 629 630 if (audit_active) 631 audit_strclose(vp, flag, crp); 632 633 TRACE_1(TR_FAC_STREAMS_FR, 634 TR_STRCLOSE, "strclose:%p", vp); 635 ASSERT(vp->v_stream); 636 637 stp = vp->v_stream; 638 ASSERT(!(stp->sd_flag & STPLEX)); 639 qp = stp->sd_wrq; 640 641 /* 642 * Needed so that strpoll will return non-zero for this fd. 643 * Note that with POLLNOERR STRHUP does still cause POLLHUP. 644 */ 645 mutex_enter(&stp->sd_lock); 646 stp->sd_flag |= STRHUP; 647 mutex_exit(&stp->sd_lock); 648 649 /* 650 * If the registered process or process group did not have an 651 * open instance of this stream then strclean would not be 652 * called. Thus at the time of closing all remaining siglist entries 653 * are removed. 654 */ 655 if (stp->sd_siglist != NULL) 656 strcleanall(vp); 657 658 ASSERT(stp->sd_siglist == NULL); 659 ASSERT(stp->sd_sigflags == 0); 660 661 if (STRMATED(stp)) { 662 struct stdata *strmatep = stp->sd_mate; 663 int waited = 1; 664 665 STRLOCKMATES(stp); 666 while (waited) { 667 waited = 0; 668 while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 669 mutex_exit(&strmatep->sd_lock); 670 cv_wait(&stp->sd_monitor, &stp->sd_lock); 671 mutex_exit(&stp->sd_lock); 672 STRLOCKMATES(stp); 673 waited = 1; 674 } 675 while (strmatep->sd_flag & 676 (STWOPEN|STRCLOSE|STRPLUMB)) { 677 mutex_exit(&stp->sd_lock); 678 cv_wait(&strmatep->sd_monitor, 679 &strmatep->sd_lock); 680 mutex_exit(&strmatep->sd_lock); 681 STRLOCKMATES(stp); 682 waited = 1; 683 } 684 } 685 stp->sd_flag |= STRCLOSE; 686 STRUNLOCKMATES(stp); 687 } else { 688 mutex_enter(&stp->sd_lock); 689 stp->sd_flag |= STRCLOSE; 690 mutex_exit(&stp->sd_lock); 691 } 692 693 ASSERT(qp->q_first == NULL); /* No more delayed write */ 694 695 /* Check if an I_LINK was ever done on this stream */ 696 if (stp->sd_flag & STRHASLINKS) { 697 netstack_t *ns; 698 str_stack_t *ss; 699 700 ns = netstack_find_by_cred(crp); 701 ASSERT(ns != NULL); 702 ss = ns->netstack_str; 703 ASSERT(ss != NULL); 704 705 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss); 706 netstack_rele(ss->ss_netstack); 707 } 708 709 while (_SAMESTR(qp)) { 710 /* 711 * Holding sd_lock prevents q_next from changing in 712 * this stream. 713 */ 714 mutex_enter(&stp->sd_lock); 715 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) { 716 717 /* 718 * sleep until awakened by strwsrv() or timeout 719 */ 720 for (;;) { 721 mutex_enter(QLOCK(qp->q_next)); 722 if (!(qp->q_next->q_mblkcnt)) { 723 mutex_exit(QLOCK(qp->q_next)); 724 break; 725 } 726 stp->sd_flag |= WSLEEP; 727 728 /* ensure strwsrv gets enabled */ 729 qp->q_next->q_flag |= QWANTW; 730 mutex_exit(QLOCK(qp->q_next)); 731 /* get out if we timed out or recv'd a signal */ 732 if (str_cv_wait(&qp->q_wait, &stp->sd_lock, 733 stp->sd_closetime, 0) <= 0) { 734 break; 735 } 736 } 737 stp->sd_flag &= ~WSLEEP; 738 } 739 mutex_exit(&stp->sd_lock); 740 741 rmq = qp->q_next; 742 if (rmq->q_flag & QISDRV) { 743 ASSERT(!_SAMESTR(rmq)); 744 wait_sq_svc(_RD(qp)->q_syncq); 745 } 746 747 qdetach(_RD(rmq), 1, flag, crp, B_FALSE); 748 } 749 750 /* 751 * Since we call pollwakeup in close() now, the poll list should 752 * be empty in most cases. The only exception is the layered devices 753 * (e.g. the console drivers with redirection modules pushed on top 754 * of it). We have to do this after calling qdetach() because 755 * the redirection module won't have torn down the console 756 * redirection until after qdetach() has been invoked. 757 */ 758 if (stp->sd_pollist.ph_list != NULL) { 759 pollwakeup(&stp->sd_pollist, POLLERR); 760 pollhead_clean(&stp->sd_pollist); 761 } 762 ASSERT(stp->sd_pollist.ph_list == NULL); 763 ASSERT(stp->sd_sidp == NULL); 764 ASSERT(stp->sd_pgidp == NULL); 765 766 /* Prevent qenable from re-enabling the stream head queue */ 767 disable_svc(_RD(qp)); 768 769 /* 770 * Wait until service procedure of each queue is 771 * run, if QINSERVICE is set. 772 */ 773 wait_svc(_RD(qp)); 774 775 /* 776 * Now, flush both queues. 777 */ 778 flushq(_RD(qp), FLUSHALL); 779 flushq(qp, FLUSHALL); 780 781 /* 782 * If the write queue of the stream head is pointing to a 783 * read queue, we have a twisted stream. If the read queue 784 * is alive, convert the stream head queues into a dead end. 785 * If the read queue is dead, free the dead pair. 786 */ 787 if (qp->q_next && !_SAMESTR(qp)) { 788 if (qp->q_next->q_qinfo == &deadrend) { /* half-closed pipe */ 789 flushq(qp->q_next, FLUSHALL); /* ensure no message */ 790 shfree(qp->q_next->q_stream); 791 freeq(qp->q_next); 792 freeq(_RD(qp)); 793 } else if (qp->q_next == _RD(qp)) { /* fifo */ 794 freeq(_RD(qp)); 795 } else { /* pipe */ 796 freestp = 0; 797 /* 798 * The q_info pointers are never accessed when 799 * SQLOCK is held. 800 */ 801 ASSERT(qp->q_syncq == _RD(qp)->q_syncq); 802 mutex_enter(SQLOCK(qp->q_syncq)); 803 qp->q_qinfo = &deadwend; 804 _RD(qp)->q_qinfo = &deadrend; 805 mutex_exit(SQLOCK(qp->q_syncq)); 806 } 807 } else { 808 freeq(_RD(qp)); /* free stream head queue pair */ 809 } 810 811 mutex_enter(&vp->v_lock); 812 if (stp->sd_iocblk) { 813 if (stp->sd_iocblk != (mblk_t *)-1) { 814 freemsg(stp->sd_iocblk); 815 } 816 stp->sd_iocblk = NULL; 817 } 818 stp->sd_vnode = NULL; 819 vp->v_stream = NULL; 820 mutex_exit(&vp->v_lock); 821 mutex_enter(&stp->sd_lock); 822 freemsg(stp->sd_cmdblk); 823 stp->sd_cmdblk = NULL; 824 stp->sd_flag &= ~STRCLOSE; 825 cv_broadcast(&stp->sd_monitor); 826 mutex_exit(&stp->sd_lock); 827 828 if (freestp) 829 shfree(stp); 830 return (0); 831 } 832 833 static int 834 strsink(queue_t *q, mblk_t *bp) 835 { 836 struct copyresp *resp; 837 838 switch (bp->b_datap->db_type) { 839 case M_FLUSH: 840 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 841 *bp->b_rptr &= ~FLUSHR; 842 bp->b_flag |= MSGNOLOOP; 843 /* 844 * Protect against the driver passing up 845 * messages after it has done a qprocsoff. 846 */ 847 if (_OTHERQ(q)->q_next == NULL) 848 freemsg(bp); 849 else 850 qreply(q, bp); 851 } else { 852 freemsg(bp); 853 } 854 break; 855 856 case M_COPYIN: 857 case M_COPYOUT: 858 if (bp->b_cont) { 859 freemsg(bp->b_cont); 860 bp->b_cont = NULL; 861 } 862 bp->b_datap->db_type = M_IOCDATA; 863 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 864 resp = (struct copyresp *)bp->b_rptr; 865 resp->cp_rval = (caddr_t)1; /* failure */ 866 /* 867 * Protect against the driver passing up 868 * messages after it has done a qprocsoff. 869 */ 870 if (_OTHERQ(q)->q_next == NULL) 871 freemsg(bp); 872 else 873 qreply(q, bp); 874 break; 875 876 case M_IOCTL: 877 if (bp->b_cont) { 878 freemsg(bp->b_cont); 879 bp->b_cont = NULL; 880 } 881 bp->b_datap->db_type = M_IOCNAK; 882 /* 883 * Protect against the driver passing up 884 * messages after it has done a qprocsoff. 885 */ 886 if (_OTHERQ(q)->q_next == NULL) 887 freemsg(bp); 888 else 889 qreply(q, bp); 890 break; 891 892 default: 893 freemsg(bp); 894 break; 895 } 896 897 return (0); 898 } 899 900 /* 901 * Clean up after a process when it closes a stream. This is called 902 * from closef for all closes, whereas strclose is called only for the 903 * last close on a stream. The siglist is scanned for entries for the 904 * current process, and these are removed. 905 */ 906 void 907 strclean(struct vnode *vp) 908 { 909 strsig_t *ssp, *pssp, *tssp; 910 stdata_t *stp; 911 int update = 0; 912 913 TRACE_1(TR_FAC_STREAMS_FR, 914 TR_STRCLEAN, "strclean:%p", vp); 915 stp = vp->v_stream; 916 pssp = NULL; 917 mutex_enter(&stp->sd_lock); 918 ssp = stp->sd_siglist; 919 while (ssp) { 920 if (ssp->ss_pidp == curproc->p_pidp) { 921 tssp = ssp->ss_next; 922 if (pssp) 923 pssp->ss_next = tssp; 924 else 925 stp->sd_siglist = tssp; 926 mutex_enter(&pidlock); 927 PID_RELE(ssp->ss_pidp); 928 mutex_exit(&pidlock); 929 kmem_free(ssp, sizeof (strsig_t)); 930 update = 1; 931 ssp = tssp; 932 } else { 933 pssp = ssp; 934 ssp = ssp->ss_next; 935 } 936 } 937 if (update) { 938 stp->sd_sigflags = 0; 939 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 940 stp->sd_sigflags |= ssp->ss_events; 941 } 942 mutex_exit(&stp->sd_lock); 943 } 944 945 /* 946 * Used on the last close to remove any remaining items on the siglist. 947 * These could be present on the siglist due to I_ESETSIG calls that 948 * use process groups or processed that do not have an open file descriptor 949 * for this stream (Such entries would not be removed by strclean). 950 */ 951 static void 952 strcleanall(struct vnode *vp) 953 { 954 strsig_t *ssp, *nssp; 955 stdata_t *stp; 956 957 stp = vp->v_stream; 958 mutex_enter(&stp->sd_lock); 959 ssp = stp->sd_siglist; 960 stp->sd_siglist = NULL; 961 while (ssp) { 962 nssp = ssp->ss_next; 963 mutex_enter(&pidlock); 964 PID_RELE(ssp->ss_pidp); 965 mutex_exit(&pidlock); 966 kmem_free(ssp, sizeof (strsig_t)); 967 ssp = nssp; 968 } 969 stp->sd_sigflags = 0; 970 mutex_exit(&stp->sd_lock); 971 } 972 973 /* 974 * Retrieve the next message from the logical stream head read queue 975 * using either rwnext (if sync stream) or getq_noenab. 976 * It is the callers responsibility to call qbackenable after 977 * it is finished with the message. The caller should not call 978 * qbackenable until after any putback calls to avoid spurious backenabling. 979 * 980 * Also, handle uioa initialization and process any DBLK_UIOA flaged messages. 981 */ 982 mblk_t * 983 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first, 984 int *errorp) 985 { 986 sodirect_t *sodp = stp->sd_sodirect; 987 mblk_t *bp; 988 int error; 989 ssize_t rbytes = 0; 990 991 /* Holding sd_lock prevents the read queue from changing */ 992 ASSERT(MUTEX_HELD(&stp->sd_lock)); 993 994 if (uiop != NULL && stp->sd_struiordq != NULL && 995 q->q_first == NULL && 996 (!first || (stp->sd_wakeq & RSLEEP))) { 997 /* 998 * Stream supports rwnext() for the read side. 999 * If this is the first time we're called by e.g. strread 1000 * only do the downcall if there is a deferred wakeup 1001 * (registered in sd_wakeq). 1002 */ 1003 struiod_t uiod; 1004 1005 if (first) 1006 stp->sd_wakeq &= ~RSLEEP; 1007 1008 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 1009 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 1010 uiod.d_mp = 0; 1011 /* 1012 * Mark that a thread is in rwnext on the read side 1013 * to prevent strrput from nacking ioctls immediately. 1014 * When the last concurrent rwnext returns 1015 * the ioctls are nack'ed. 1016 */ 1017 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1018 stp->sd_struiodnak++; 1019 /* 1020 * Note: rwnext will drop sd_lock. 1021 */ 1022 error = rwnext(q, &uiod); 1023 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 1024 mutex_enter(&stp->sd_lock); 1025 stp->sd_struiodnak--; 1026 while (stp->sd_struiodnak == 0 && 1027 ((bp = stp->sd_struionak) != NULL)) { 1028 stp->sd_struionak = bp->b_next; 1029 bp->b_next = NULL; 1030 bp->b_datap->db_type = M_IOCNAK; 1031 /* 1032 * Protect against the driver passing up 1033 * messages after it has done a qprocsoff. 1034 */ 1035 if (_OTHERQ(q)->q_next == NULL) 1036 freemsg(bp); 1037 else { 1038 mutex_exit(&stp->sd_lock); 1039 qreply(q, bp); 1040 mutex_enter(&stp->sd_lock); 1041 } 1042 } 1043 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1044 if (error == 0 || error == EWOULDBLOCK) { 1045 if ((bp = uiod.d_mp) != NULL) { 1046 *errorp = 0; 1047 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1048 return (bp); 1049 } 1050 error = 0; 1051 } else if (error == EINVAL) { 1052 /* 1053 * The stream plumbing must have 1054 * changed while we were away, so 1055 * just turn off rwnext()s. 1056 */ 1057 error = 0; 1058 } else if (error == EBUSY) { 1059 /* 1060 * The module might have data in transit using putnext 1061 * Fall back on waiting + getq. 1062 */ 1063 error = 0; 1064 } else { 1065 *errorp = error; 1066 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1067 return (NULL); 1068 } 1069 /* 1070 * Try a getq in case a rwnext() generated mblk 1071 * has bubbled up via strrput(). 1072 */ 1073 } 1074 *errorp = 0; 1075 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1076 1077 if (sodp != NULL && sodp->sod_state & SOD_ENABLED) { 1078 if (sodp->sod_uioa.uioa_state & UIOA_INIT) { 1079 /* 1080 * First kstrgetmsg() call for an uioa_t so if any 1081 * queued mblk_t's need to consume them before uioa 1082 * from below can occur. 1083 */ 1084 sodp->sod_uioa.uioa_state &= UIOA_CLR; 1085 sodp->sod_uioa.uioa_state |= UIOA_ENABLED; 1086 if (q->q_first != NULL) { 1087 struioainit(q, sodp, uiop); 1088 } 1089 } else if (sodp->sod_uioa.uioa_state & 1090 (UIOA_ENABLED|UIOA_FINI)) { 1091 ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 1092 rbytes = 0; 1093 } else { 1094 rbytes = uiop->uio_resid; 1095 } 1096 } else { 1097 /* 1098 * If we have a valid uio, try and use this as a guide for how 1099 * many bytes to retrieve from the queue via getq_noenab(). 1100 * Doing this can avoid unneccesary counting of overlong 1101 * messages in putback(). We currently only do this for sockets 1102 * and only if there is no sd_rputdatafunc hook. 1103 * 1104 * The sd_rputdatafunc hook transforms the entire message 1105 * before any bytes in it can be given to a client. So, rbytes 1106 * must be 0 if there is a hook. 1107 */ 1108 if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) && 1109 (stp->sd_rputdatafunc == NULL)) 1110 rbytes = uiop->uio_resid; 1111 } 1112 1113 bp = getq_noenab(q, rbytes); 1114 sod_uioa_mblk_done(sodp, bp); 1115 1116 return (bp); 1117 } 1118 1119 /* 1120 * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'. 1121 * If the message does not fit in the uio the remainder of it is returned; 1122 * otherwise NULL is returned. Any embedded zero-length mblk_t's are 1123 * consumed, even if uio_resid reaches zero. On error, `*errorp' is set to 1124 * the error code, the message is consumed, and NULL is returned. 1125 */ 1126 static mblk_t * 1127 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp) 1128 { 1129 int error; 1130 ptrdiff_t n; 1131 mblk_t *nbp; 1132 1133 ASSERT(bp->b_wptr >= bp->b_rptr); 1134 1135 do { 1136 ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); 1137 1138 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) { 1139 ASSERT(n > 0); 1140 1141 error = uiomove(bp->b_rptr, n, UIO_READ, uiop); 1142 if (error != 0) { 1143 freemsg(bp); 1144 *errorp = error; 1145 return (NULL); 1146 } 1147 } 1148 1149 bp->b_rptr += n; 1150 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) { 1151 nbp = bp; 1152 bp = bp->b_cont; 1153 freeb(nbp); 1154 } 1155 } while (bp != NULL && uiop->uio_resid > 0); 1156 1157 *errorp = 0; 1158 return (bp); 1159 } 1160 1161 /* 1162 * Read a stream according to the mode flags in sd_flag: 1163 * 1164 * (default mode) - Byte stream, msg boundaries are ignored 1165 * RD_MSGDIS (msg discard) - Read on msg boundaries and throw away 1166 * any data remaining in msg 1167 * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back 1168 * any remaining data on head of read queue 1169 * 1170 * Consume readable messages on the front of the queue until 1171 * ttolwp(curthread)->lwp_count 1172 * is satisfied, the readable messages are exhausted, or a message 1173 * boundary is reached in a message mode. If no data was read and 1174 * the stream was not opened with the NDELAY flag, block until data arrives. 1175 * Otherwise return the data read and update the count. 1176 * 1177 * In default mode a 0 length message signifies end-of-file and terminates 1178 * a read in progress. The 0 length message is removed from the queue 1179 * only if it is the only message read (no data is read). 1180 * 1181 * An attempt to read an M_PROTO or M_PCPROTO message results in an 1182 * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set. 1183 * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data. 1184 * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message 1185 * are unlinked from and M_DATA blocks in the message, the protos are 1186 * thrown away, and the data is read. 1187 */ 1188 /* ARGSUSED */ 1189 int 1190 strread(struct vnode *vp, struct uio *uiop, cred_t *crp) 1191 { 1192 struct stdata *stp; 1193 mblk_t *bp, *nbp; 1194 queue_t *q; 1195 int error = 0; 1196 uint_t old_sd_flag; 1197 int first; 1198 char rflg; 1199 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 1200 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 1201 short delim; 1202 unsigned char pri = 0; 1203 char waitflag; 1204 unsigned char type; 1205 1206 TRACE_1(TR_FAC_STREAMS_FR, 1207 TR_STRREAD_ENTER, "strread:%p", vp); 1208 ASSERT(vp->v_stream); 1209 stp = vp->v_stream; 1210 1211 mutex_enter(&stp->sd_lock); 1212 1213 if ((error = i_straccess(stp, JCREAD)) != 0) { 1214 mutex_exit(&stp->sd_lock); 1215 return (error); 1216 } 1217 1218 if (stp->sd_flag & (STRDERR|STPLEX)) { 1219 error = strgeterr(stp, STRDERR|STPLEX, 0); 1220 if (error != 0) { 1221 mutex_exit(&stp->sd_lock); 1222 return (error); 1223 } 1224 } 1225 1226 /* 1227 * Loop terminates when uiop->uio_resid == 0. 1228 */ 1229 rflg = 0; 1230 waitflag = READWAIT; 1231 q = _RD(stp->sd_wrq); 1232 for (;;) { 1233 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1234 old_sd_flag = stp->sd_flag; 1235 mark = 0; 1236 delim = 0; 1237 first = 1; 1238 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) { 1239 int done = 0; 1240 1241 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1242 1243 if (error != 0) 1244 goto oops; 1245 1246 if (stp->sd_flag & (STRHUP|STREOF)) { 1247 goto oops; 1248 } 1249 if (rflg && !(stp->sd_flag & STRDELIM)) { 1250 goto oops; 1251 } 1252 /* 1253 * If a read(fd,buf,0) has been done, there is no 1254 * need to sleep. We always have zero bytes to 1255 * return. 1256 */ 1257 if (uiop->uio_resid == 0) { 1258 goto oops; 1259 } 1260 1261 qbackenable(q, 0); 1262 1263 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT, 1264 "strread calls strwaitq:%p, %p, %p", 1265 vp, uiop, crp); 1266 if ((error = strwaitq(stp, waitflag, uiop->uio_resid, 1267 uiop->uio_fmode, -1, &done)) != 0 || done) { 1268 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE, 1269 "strread error or done:%p, %p, %p", 1270 vp, uiop, crp); 1271 if ((uiop->uio_fmode & FNDELAY) && 1272 (stp->sd_flag & OLDNDELAY) && 1273 (error == EAGAIN)) 1274 error = 0; 1275 goto oops; 1276 } 1277 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE, 1278 "strread awakes:%p, %p, %p", vp, uiop, crp); 1279 if ((error = i_straccess(stp, JCREAD)) != 0) { 1280 goto oops; 1281 } 1282 first = 0; 1283 } 1284 1285 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1286 ASSERT(bp); 1287 ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); 1288 pri = bp->b_band; 1289 /* 1290 * Extract any mark information. If the message is not 1291 * completely consumed this information will be put in the mblk 1292 * that is putback. 1293 * If MSGMARKNEXT is set and the message is completely consumed 1294 * the STRATMARK flag will be set below. Likewise, if 1295 * MSGNOTMARKNEXT is set and the message is 1296 * completely consumed STRNOTATMARK will be set. 1297 * 1298 * For some unknown reason strread only breaks the read at the 1299 * last mark. 1300 */ 1301 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 1302 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 1303 (MSGMARKNEXT|MSGNOTMARKNEXT)); 1304 if (mark != 0 && bp == stp->sd_mark) { 1305 if (rflg) { 1306 putback(stp, q, bp, pri); 1307 goto oops; 1308 } 1309 mark |= _LASTMARK; 1310 stp->sd_mark = NULL; 1311 } 1312 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM)) 1313 delim = 1; 1314 mutex_exit(&stp->sd_lock); 1315 1316 if (STREAM_NEEDSERVICE(stp)) 1317 stream_runservice(stp); 1318 1319 type = bp->b_datap->db_type; 1320 1321 switch (type) { 1322 1323 case M_DATA: 1324 ismdata: 1325 if (msgnodata(bp)) { 1326 if (mark || delim) { 1327 freemsg(bp); 1328 } else if (rflg) { 1329 1330 /* 1331 * If already read data put zero 1332 * length message back on queue else 1333 * free msg and return 0. 1334 */ 1335 bp->b_band = pri; 1336 mutex_enter(&stp->sd_lock); 1337 putback(stp, q, bp, pri); 1338 mutex_exit(&stp->sd_lock); 1339 } else { 1340 freemsg(bp); 1341 } 1342 error = 0; 1343 goto oops1; 1344 } 1345 1346 rflg = 1; 1347 waitflag |= NOINTR; 1348 bp = struiocopyout(bp, uiop, &error); 1349 if (error != 0) 1350 goto oops1; 1351 1352 mutex_enter(&stp->sd_lock); 1353 if (bp) { 1354 /* 1355 * Have remaining data in message. 1356 * Free msg if in discard mode. 1357 */ 1358 if (stp->sd_read_opt & RD_MSGDIS) { 1359 freemsg(bp); 1360 } else { 1361 bp->b_band = pri; 1362 if ((mark & _LASTMARK) && 1363 (stp->sd_mark == NULL)) 1364 stp->sd_mark = bp; 1365 bp->b_flag |= mark & ~_LASTMARK; 1366 if (delim) 1367 bp->b_flag |= MSGDELIM; 1368 if (msgnodata(bp)) 1369 freemsg(bp); 1370 else 1371 putback(stp, q, bp, pri); 1372 } 1373 } else { 1374 /* 1375 * Consumed the complete message. 1376 * Move the MSG*MARKNEXT information 1377 * to the stream head just in case 1378 * the read queue becomes empty. 1379 * 1380 * If the stream head was at the mark 1381 * (STRATMARK) before we dropped sd_lock above 1382 * and some data was consumed then we have 1383 * moved past the mark thus STRATMARK is 1384 * cleared. However, if a message arrived in 1385 * strrput during the copyout above causing 1386 * STRATMARK to be set we can not clear that 1387 * flag. 1388 */ 1389 if (mark & 1390 (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 1391 if (mark & MSGMARKNEXT) { 1392 stp->sd_flag &= ~STRNOTATMARK; 1393 stp->sd_flag |= STRATMARK; 1394 } else if (mark & MSGNOTMARKNEXT) { 1395 stp->sd_flag &= ~STRATMARK; 1396 stp->sd_flag |= STRNOTATMARK; 1397 } else { 1398 stp->sd_flag &= 1399 ~(STRATMARK|STRNOTATMARK); 1400 } 1401 } else if (rflg && (old_sd_flag & STRATMARK)) { 1402 stp->sd_flag &= ~STRATMARK; 1403 } 1404 } 1405 1406 /* 1407 * Check for signal messages at the front of the read 1408 * queue and generate the signal(s) if appropriate. 1409 * The only signal that can be on queue is M_SIG at 1410 * this point. 1411 */ 1412 while ((((bp = q->q_first)) != NULL) && 1413 (bp->b_datap->db_type == M_SIG)) { 1414 bp = getq_noenab(q, 0); 1415 /* 1416 * sd_lock is held so the content of the 1417 * read queue can not change. 1418 */ 1419 ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG); 1420 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 1421 mutex_exit(&stp->sd_lock); 1422 freemsg(bp); 1423 if (STREAM_NEEDSERVICE(stp)) 1424 stream_runservice(stp); 1425 mutex_enter(&stp->sd_lock); 1426 } 1427 1428 if ((uiop->uio_resid == 0) || (mark & _LASTMARK) || 1429 delim || 1430 (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) { 1431 goto oops; 1432 } 1433 continue; 1434 1435 case M_SIG: 1436 strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band); 1437 freemsg(bp); 1438 mutex_enter(&stp->sd_lock); 1439 continue; 1440 1441 case M_PROTO: 1442 case M_PCPROTO: 1443 /* 1444 * Only data messages are readable. 1445 * Any others generate an error, unless 1446 * RD_PROTDIS or RD_PROTDAT is set. 1447 */ 1448 if (stp->sd_read_opt & RD_PROTDAT) { 1449 for (nbp = bp; nbp; nbp = nbp->b_next) { 1450 if ((nbp->b_datap->db_type == 1451 M_PROTO) || 1452 (nbp->b_datap->db_type == 1453 M_PCPROTO)) { 1454 nbp->b_datap->db_type = M_DATA; 1455 } else { 1456 break; 1457 } 1458 } 1459 /* 1460 * clear stream head hi pri flag based on 1461 * first message 1462 */ 1463 if (type == M_PCPROTO) { 1464 mutex_enter(&stp->sd_lock); 1465 stp->sd_flag &= ~STRPRI; 1466 mutex_exit(&stp->sd_lock); 1467 } 1468 goto ismdata; 1469 } else if (stp->sd_read_opt & RD_PROTDIS) { 1470 /* 1471 * discard non-data messages 1472 */ 1473 while (bp && 1474 ((bp->b_datap->db_type == M_PROTO) || 1475 (bp->b_datap->db_type == M_PCPROTO))) { 1476 nbp = unlinkb(bp); 1477 freeb(bp); 1478 bp = nbp; 1479 } 1480 /* 1481 * clear stream head hi pri flag based on 1482 * first message 1483 */ 1484 if (type == M_PCPROTO) { 1485 mutex_enter(&stp->sd_lock); 1486 stp->sd_flag &= ~STRPRI; 1487 mutex_exit(&stp->sd_lock); 1488 } 1489 if (bp) { 1490 bp->b_band = pri; 1491 goto ismdata; 1492 } else { 1493 break; 1494 } 1495 } 1496 /* FALLTHRU */ 1497 case M_PASSFP: 1498 if ((bp->b_datap->db_type == M_PASSFP) && 1499 (stp->sd_read_opt & RD_PROTDIS)) { 1500 freemsg(bp); 1501 break; 1502 } 1503 mutex_enter(&stp->sd_lock); 1504 putback(stp, q, bp, pri); 1505 mutex_exit(&stp->sd_lock); 1506 if (rflg == 0) 1507 error = EBADMSG; 1508 goto oops1; 1509 1510 default: 1511 /* 1512 * Garbage on stream head read queue. 1513 */ 1514 cmn_err(CE_WARN, "bad %x found at stream head\n", 1515 bp->b_datap->db_type); 1516 freemsg(bp); 1517 goto oops1; 1518 } 1519 mutex_enter(&stp->sd_lock); 1520 } 1521 oops: 1522 mutex_exit(&stp->sd_lock); 1523 oops1: 1524 qbackenable(q, pri); 1525 return (error); 1526 #undef _LASTMARK 1527 } 1528 1529 /* 1530 * Default processing of M_PROTO/M_PCPROTO messages. 1531 * Determine which wakeups and signals are needed. 1532 * This can be replaced by a user-specified procedure for kernel users 1533 * of STREAMS. 1534 */ 1535 /* ARGSUSED */ 1536 mblk_t * 1537 strrput_proto(vnode_t *vp, mblk_t *mp, 1538 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1539 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1540 { 1541 *wakeups = RSLEEP; 1542 *allmsgsigs = 0; 1543 1544 switch (mp->b_datap->db_type) { 1545 case M_PROTO: 1546 if (mp->b_band == 0) { 1547 *firstmsgsigs = S_INPUT | S_RDNORM; 1548 *pollwakeups = POLLIN | POLLRDNORM; 1549 } else { 1550 *firstmsgsigs = S_INPUT | S_RDBAND; 1551 *pollwakeups = POLLIN | POLLRDBAND; 1552 } 1553 break; 1554 case M_PCPROTO: 1555 *firstmsgsigs = S_HIPRI; 1556 *pollwakeups = POLLPRI; 1557 break; 1558 } 1559 return (mp); 1560 } 1561 1562 /* 1563 * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and 1564 * M_PASSFP messages. 1565 * Determine which wakeups and signals are needed. 1566 * This can be replaced by a user-specified procedure for kernel users 1567 * of STREAMS. 1568 */ 1569 /* ARGSUSED */ 1570 mblk_t * 1571 strrput_misc(vnode_t *vp, mblk_t *mp, 1572 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1573 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1574 { 1575 *wakeups = 0; 1576 *firstmsgsigs = 0; 1577 *allmsgsigs = 0; 1578 *pollwakeups = 0; 1579 return (mp); 1580 } 1581 1582 /* 1583 * Stream read put procedure. Called from downstream driver/module 1584 * with messages for the stream head. Data, protocol, and in-stream 1585 * signal messages are placed on the queue, others are handled directly. 1586 */ 1587 int 1588 strrput(queue_t *q, mblk_t *bp) 1589 { 1590 struct stdata *stp; 1591 ulong_t rput_opt; 1592 strwakeup_t wakeups; 1593 strsigset_t firstmsgsigs; /* Signals if first message on queue */ 1594 strsigset_t allmsgsigs; /* Signals for all messages */ 1595 strsigset_t signals; /* Signals events to generate */ 1596 strpollset_t pollwakeups; 1597 mblk_t *nextbp; 1598 uchar_t band = 0; 1599 int hipri_sig; 1600 1601 stp = (struct stdata *)q->q_ptr; 1602 /* 1603 * Use rput_opt for optimized access to the SR_ flags except 1604 * SR_POLLIN. That flag has to be checked under sd_lock since it 1605 * is modified by strpoll(). 1606 */ 1607 rput_opt = stp->sd_rput_opt; 1608 1609 ASSERT(qclaimed(q)); 1610 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER, 1611 "strrput called with message type:q %p bp %p", q, bp); 1612 1613 /* 1614 * Perform initial processing and pass to the parameterized functions. 1615 */ 1616 ASSERT(bp->b_next == NULL); 1617 1618 switch (bp->b_datap->db_type) { 1619 case M_DATA: 1620 /* 1621 * sockfs is the only consumer of STREOF and when it is set, 1622 * it implies that the receiver is not interested in receiving 1623 * any more data, hence the mblk is freed to prevent unnecessary 1624 * message queueing at the stream head. 1625 */ 1626 if (stp->sd_flag == STREOF) { 1627 freemsg(bp); 1628 return (0); 1629 } 1630 if ((rput_opt & SR_IGN_ZEROLEN) && 1631 bp->b_rptr == bp->b_wptr && msgnodata(bp)) { 1632 /* 1633 * Ignore zero-length M_DATA messages. These might be 1634 * generated by some transports. 1635 * The zero-length M_DATA messages, even if they 1636 * are ignored, should effect the atmark tracking and 1637 * should wake up a thread sleeping in strwaitmark. 1638 */ 1639 mutex_enter(&stp->sd_lock); 1640 if (bp->b_flag & MSGMARKNEXT) { 1641 /* 1642 * Record the position of the mark either 1643 * in q_last or in STRATMARK. 1644 */ 1645 if (q->q_last != NULL) { 1646 q->q_last->b_flag &= ~MSGNOTMARKNEXT; 1647 q->q_last->b_flag |= MSGMARKNEXT; 1648 } else { 1649 stp->sd_flag &= ~STRNOTATMARK; 1650 stp->sd_flag |= STRATMARK; 1651 } 1652 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1653 /* 1654 * Record that this is not the position of 1655 * the mark either in q_last or in 1656 * STRNOTATMARK. 1657 */ 1658 if (q->q_last != NULL) { 1659 q->q_last->b_flag &= ~MSGMARKNEXT; 1660 q->q_last->b_flag |= MSGNOTMARKNEXT; 1661 } else { 1662 stp->sd_flag &= ~STRATMARK; 1663 stp->sd_flag |= STRNOTATMARK; 1664 } 1665 } 1666 if (stp->sd_flag & RSLEEP) { 1667 stp->sd_flag &= ~RSLEEP; 1668 cv_broadcast(&q->q_wait); 1669 } 1670 mutex_exit(&stp->sd_lock); 1671 freemsg(bp); 1672 return (0); 1673 } 1674 wakeups = RSLEEP; 1675 if (bp->b_band == 0) { 1676 firstmsgsigs = S_INPUT | S_RDNORM; 1677 pollwakeups = POLLIN | POLLRDNORM; 1678 } else { 1679 firstmsgsigs = S_INPUT | S_RDBAND; 1680 pollwakeups = POLLIN | POLLRDBAND; 1681 } 1682 if (rput_opt & SR_SIGALLDATA) 1683 allmsgsigs = firstmsgsigs; 1684 else 1685 allmsgsigs = 0; 1686 1687 mutex_enter(&stp->sd_lock); 1688 if ((rput_opt & SR_CONSOL_DATA) && 1689 (q->q_last != NULL) && 1690 (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) { 1691 /* 1692 * Consolidate an M_DATA message onto an M_DATA, 1693 * M_PROTO, or M_PCPROTO by merging it with q_last. 1694 * The consolidation does not take place if 1695 * the old message is marked with either of the 1696 * marks or the delim flag or if the new 1697 * message is marked with MSGMARK. The MSGMARK 1698 * check is needed to handle the odd semantics of 1699 * MSGMARK where essentially the whole message 1700 * is to be treated as marked. 1701 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from the 1702 * new message to the front of the b_cont chain. 1703 */ 1704 mblk_t *lbp = q->q_last; 1705 unsigned char db_type = lbp->b_datap->db_type; 1706 1707 if ((db_type == M_DATA || db_type == M_PROTO || 1708 db_type == M_PCPROTO) && 1709 !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) { 1710 rmvq_noenab(q, lbp); 1711 /* 1712 * The first message in the b_cont list 1713 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 1714 * We need to handle the case where we 1715 * are appending: 1716 * 1717 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 1718 * 2) a MSGMARKNEXT to a plain message. 1719 * 3) a MSGNOTMARKNEXT to a plain message 1720 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 1721 * message. 1722 * 1723 * Thus we never append a MSGMARKNEXT or 1724 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 1725 */ 1726 if (bp->b_flag & MSGMARKNEXT) { 1727 lbp->b_flag |= MSGMARKNEXT; 1728 lbp->b_flag &= ~MSGNOTMARKNEXT; 1729 bp->b_flag &= ~MSGMARKNEXT; 1730 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1731 lbp->b_flag |= MSGNOTMARKNEXT; 1732 bp->b_flag &= ~MSGNOTMARKNEXT; 1733 } 1734 1735 linkb(lbp, bp); 1736 bp = lbp; 1737 /* 1738 * The new message logically isn't the first 1739 * even though the q_first check below thinks 1740 * it is. Clear the firstmsgsigs to make it 1741 * not appear to be first. 1742 */ 1743 firstmsgsigs = 0; 1744 } 1745 } 1746 break; 1747 1748 case M_PASSFP: 1749 wakeups = RSLEEP; 1750 allmsgsigs = 0; 1751 if (bp->b_band == 0) { 1752 firstmsgsigs = S_INPUT | S_RDNORM; 1753 pollwakeups = POLLIN | POLLRDNORM; 1754 } else { 1755 firstmsgsigs = S_INPUT | S_RDBAND; 1756 pollwakeups = POLLIN | POLLRDBAND; 1757 } 1758 mutex_enter(&stp->sd_lock); 1759 break; 1760 1761 case M_PROTO: 1762 case M_PCPROTO: 1763 ASSERT(stp->sd_rprotofunc != NULL); 1764 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp, 1765 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1766 #define ALLSIG (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\ 1767 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG) 1768 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\ 1769 POLLWRBAND) 1770 1771 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1772 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1773 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1774 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1775 1776 mutex_enter(&stp->sd_lock); 1777 break; 1778 1779 default: 1780 ASSERT(stp->sd_rmiscfunc != NULL); 1781 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp, 1782 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1783 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1784 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1785 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1786 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1787 #undef ALLSIG 1788 #undef ALLPOLL 1789 mutex_enter(&stp->sd_lock); 1790 break; 1791 } 1792 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1793 1794 /* By default generate superset of signals */ 1795 signals = (firstmsgsigs | allmsgsigs); 1796 1797 /* 1798 * The proto and misc functions can return multiple messages 1799 * as a b_next chain. Such messages are processed separately. 1800 */ 1801 one_more: 1802 hipri_sig = 0; 1803 if (bp == NULL) { 1804 nextbp = NULL; 1805 } else { 1806 nextbp = bp->b_next; 1807 bp->b_next = NULL; 1808 1809 switch (bp->b_datap->db_type) { 1810 case M_PCPROTO: 1811 /* 1812 * Only one priority protocol message is allowed at the 1813 * stream head at a time. 1814 */ 1815 if (stp->sd_flag & STRPRI) { 1816 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR, 1817 "M_PCPROTO already at head"); 1818 freemsg(bp); 1819 mutex_exit(&stp->sd_lock); 1820 goto done; 1821 } 1822 stp->sd_flag |= STRPRI; 1823 hipri_sig = 1; 1824 /* FALLTHRU */ 1825 case M_DATA: 1826 case M_PROTO: 1827 case M_PASSFP: 1828 band = bp->b_band; 1829 /* 1830 * Marking doesn't work well when messages 1831 * are marked in more than one band. We only 1832 * remember the last message received, even if 1833 * it is placed on the queue ahead of other 1834 * marked messages. 1835 */ 1836 if (bp->b_flag & MSGMARK) 1837 stp->sd_mark = bp; 1838 (void) putq(q, bp); 1839 1840 /* 1841 * If message is a PCPROTO message, always use 1842 * firstmsgsigs to determine if a signal should be 1843 * sent as strrput is the only place to send 1844 * signals for PCPROTO. Other messages are based on 1845 * the STRGETINPROG flag. The flag determines if 1846 * strrput or (k)strgetmsg will be responsible for 1847 * sending the signals, in the firstmsgsigs case. 1848 */ 1849 if ((hipri_sig == 1) || 1850 (((stp->sd_flag & STRGETINPROG) == 0) && 1851 (q->q_first == bp))) 1852 signals = (firstmsgsigs | allmsgsigs); 1853 else 1854 signals = allmsgsigs; 1855 break; 1856 1857 default: 1858 mutex_exit(&stp->sd_lock); 1859 (void) strrput_nondata(q, bp); 1860 mutex_enter(&stp->sd_lock); 1861 break; 1862 } 1863 } 1864 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1865 /* 1866 * Wake sleeping read/getmsg and cancel deferred wakeup 1867 */ 1868 if (wakeups & RSLEEP) 1869 stp->sd_wakeq &= ~RSLEEP; 1870 1871 wakeups &= stp->sd_flag; 1872 if (wakeups & RSLEEP) { 1873 stp->sd_flag &= ~RSLEEP; 1874 cv_broadcast(&q->q_wait); 1875 } 1876 if (wakeups & WSLEEP) { 1877 stp->sd_flag &= ~WSLEEP; 1878 cv_broadcast(&_WR(q)->q_wait); 1879 } 1880 1881 if (pollwakeups != 0) { 1882 if (pollwakeups == (POLLIN | POLLRDNORM)) { 1883 /* 1884 * Can't use rput_opt since it was not 1885 * read when sd_lock was held and SR_POLLIN is changed 1886 * by strpoll() under sd_lock. 1887 */ 1888 if (!(stp->sd_rput_opt & SR_POLLIN)) 1889 goto no_pollwake; 1890 stp->sd_rput_opt &= ~SR_POLLIN; 1891 } 1892 mutex_exit(&stp->sd_lock); 1893 pollwakeup(&stp->sd_pollist, pollwakeups); 1894 mutex_enter(&stp->sd_lock); 1895 } 1896 no_pollwake: 1897 1898 /* 1899 * strsendsig can handle multiple signals with a 1900 * single call. 1901 */ 1902 if (stp->sd_sigflags & signals) 1903 strsendsig(stp->sd_siglist, signals, band, 0); 1904 mutex_exit(&stp->sd_lock); 1905 1906 1907 done: 1908 if (nextbp == NULL) 1909 return (0); 1910 1911 /* 1912 * Any signals were handled the first time. 1913 * Wakeups and pollwakeups are redone to avoid any race 1914 * conditions - all the messages are not queued until the 1915 * last message has been processed by strrput. 1916 */ 1917 bp = nextbp; 1918 signals = firstmsgsigs = allmsgsigs = 0; 1919 mutex_enter(&stp->sd_lock); 1920 goto one_more; 1921 } 1922 1923 static void 1924 log_dupioc(queue_t *rq, mblk_t *bp) 1925 { 1926 queue_t *wq, *qp; 1927 char *modnames, *mnp, *dname; 1928 size_t maxmodstr; 1929 boolean_t islast; 1930 1931 /* 1932 * Allocate a buffer large enough to hold the names of nstrpush modules 1933 * and one driver, with spaces between and NUL terminator. If we can't 1934 * get memory, then we'll just log the driver name. 1935 */ 1936 maxmodstr = nstrpush * (FMNAMESZ + 1); 1937 mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP); 1938 1939 /* march down write side to print log message down to the driver */ 1940 wq = WR(rq); 1941 1942 /* make sure q_next doesn't shift around while we're grabbing data */ 1943 claimstr(wq); 1944 qp = wq->q_next; 1945 do { 1946 dname = Q2NAME(qp); 1947 islast = !SAMESTR(qp) || qp->q_next == NULL; 1948 if (modnames == NULL) { 1949 /* 1950 * If we don't have memory, then get the driver name in 1951 * the log where we can see it. Note that memory 1952 * pressure is a possible cause of these sorts of bugs. 1953 */ 1954 if (islast) { 1955 modnames = dname; 1956 maxmodstr = 0; 1957 } 1958 } else { 1959 mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname); 1960 if (!islast) 1961 *mnp++ = ' '; 1962 } 1963 qp = qp->q_next; 1964 } while (!islast); 1965 releasestr(wq); 1966 /* Cannot happen unless stream head is corrupt. */ 1967 ASSERT(modnames != NULL); 1968 (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1, 1969 SL_CONSOLE|SL_TRACE|SL_ERROR, 1970 "Warning: stream %p received duplicate %X M_IOC%s; module list: %s", 1971 rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd, 1972 (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames); 1973 if (maxmodstr != 0) 1974 kmem_free(modnames, maxmodstr); 1975 } 1976 1977 int 1978 strrput_nondata(queue_t *q, mblk_t *bp) 1979 { 1980 struct stdata *stp; 1981 struct iocblk *iocbp; 1982 struct stroptions *sop; 1983 struct copyreq *reqp; 1984 struct copyresp *resp; 1985 unsigned char bpri; 1986 unsigned char flushed_already = 0; 1987 1988 stp = (struct stdata *)q->q_ptr; 1989 1990 ASSERT(!(stp->sd_flag & STPLEX)); 1991 ASSERT(qclaimed(q)); 1992 1993 switch (bp->b_datap->db_type) { 1994 case M_ERROR: 1995 /* 1996 * An error has occurred downstream, the errno is in the first 1997 * bytes of the message. 1998 */ 1999 if ((bp->b_wptr - bp->b_rptr) == 2) { /* New flavor */ 2000 unsigned char rw = 0; 2001 2002 mutex_enter(&stp->sd_lock); 2003 if (*bp->b_rptr != NOERROR) { /* read error */ 2004 if (*bp->b_rptr != 0) { 2005 if (stp->sd_flag & STRDERR) 2006 flushed_already |= FLUSHR; 2007 stp->sd_flag |= STRDERR; 2008 rw |= FLUSHR; 2009 } else { 2010 stp->sd_flag &= ~STRDERR; 2011 } 2012 stp->sd_rerror = *bp->b_rptr; 2013 } 2014 bp->b_rptr++; 2015 if (*bp->b_rptr != NOERROR) { /* write error */ 2016 if (*bp->b_rptr != 0) { 2017 if (stp->sd_flag & STWRERR) 2018 flushed_already |= FLUSHW; 2019 stp->sd_flag |= STWRERR; 2020 rw |= FLUSHW; 2021 } else { 2022 stp->sd_flag &= ~STWRERR; 2023 } 2024 stp->sd_werror = *bp->b_rptr; 2025 } 2026 if (rw) { 2027 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE, 2028 "strrput cv_broadcast:q %p, bp %p", 2029 q, bp); 2030 cv_broadcast(&q->q_wait); /* readers */ 2031 cv_broadcast(&_WR(q)->q_wait); /* writers */ 2032 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 2033 2034 mutex_exit(&stp->sd_lock); 2035 pollwakeup(&stp->sd_pollist, POLLERR); 2036 mutex_enter(&stp->sd_lock); 2037 2038 if (stp->sd_sigflags & S_ERROR) 2039 strsendsig(stp->sd_siglist, S_ERROR, 0, 2040 ((rw & FLUSHR) ? stp->sd_rerror : 2041 stp->sd_werror)); 2042 mutex_exit(&stp->sd_lock); 2043 /* 2044 * Send the M_FLUSH only 2045 * for the first M_ERROR 2046 * message on the stream 2047 */ 2048 if (flushed_already == rw) { 2049 freemsg(bp); 2050 return (0); 2051 } 2052 2053 bp->b_datap->db_type = M_FLUSH; 2054 *bp->b_rptr = rw; 2055 bp->b_wptr = bp->b_rptr + 1; 2056 /* 2057 * Protect against the driver 2058 * passing up messages after 2059 * it has done a qprocsoff 2060 */ 2061 if (_OTHERQ(q)->q_next == NULL) 2062 freemsg(bp); 2063 else 2064 qreply(q, bp); 2065 return (0); 2066 } else 2067 mutex_exit(&stp->sd_lock); 2068 } else if (*bp->b_rptr != 0) { /* Old flavor */ 2069 if (stp->sd_flag & (STRDERR|STWRERR)) 2070 flushed_already = FLUSHRW; 2071 mutex_enter(&stp->sd_lock); 2072 stp->sd_flag |= (STRDERR|STWRERR); 2073 stp->sd_rerror = *bp->b_rptr; 2074 stp->sd_werror = *bp->b_rptr; 2075 TRACE_2(TR_FAC_STREAMS_FR, 2076 TR_STRRPUT_WAKE2, 2077 "strrput wakeup #2:q %p, bp %p", q, bp); 2078 cv_broadcast(&q->q_wait); /* the readers */ 2079 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2080 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 2081 2082 mutex_exit(&stp->sd_lock); 2083 pollwakeup(&stp->sd_pollist, POLLERR); 2084 mutex_enter(&stp->sd_lock); 2085 2086 if (stp->sd_sigflags & S_ERROR) 2087 strsendsig(stp->sd_siglist, S_ERROR, 0, 2088 (stp->sd_werror ? stp->sd_werror : 2089 stp->sd_rerror)); 2090 mutex_exit(&stp->sd_lock); 2091 2092 /* 2093 * Send the M_FLUSH only 2094 * for the first M_ERROR 2095 * message on the stream 2096 */ 2097 if (flushed_already != FLUSHRW) { 2098 bp->b_datap->db_type = M_FLUSH; 2099 *bp->b_rptr = FLUSHRW; 2100 /* 2101 * Protect against the driver passing up 2102 * messages after it has done a 2103 * qprocsoff. 2104 */ 2105 if (_OTHERQ(q)->q_next == NULL) 2106 freemsg(bp); 2107 else 2108 qreply(q, bp); 2109 return (0); 2110 } 2111 } 2112 freemsg(bp); 2113 return (0); 2114 2115 case M_HANGUP: 2116 2117 freemsg(bp); 2118 mutex_enter(&stp->sd_lock); 2119 stp->sd_werror = ENXIO; 2120 stp->sd_flag |= STRHUP; 2121 stp->sd_flag &= ~(WSLEEP|RSLEEP); 2122 2123 /* 2124 * send signal if controlling tty 2125 */ 2126 2127 if (stp->sd_sidp) { 2128 prsignal(stp->sd_sidp, SIGHUP); 2129 if (stp->sd_sidp != stp->sd_pgidp) 2130 pgsignal(stp->sd_pgidp, SIGTSTP); 2131 } 2132 2133 /* 2134 * wake up read, write, and exception pollers and 2135 * reset wakeup mechanism. 2136 */ 2137 cv_broadcast(&q->q_wait); /* the readers */ 2138 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2139 cv_broadcast(&stp->sd_monitor); /* the ioctllers */ 2140 strhup(stp); 2141 mutex_exit(&stp->sd_lock); 2142 return (0); 2143 2144 case M_UNHANGUP: 2145 freemsg(bp); 2146 mutex_enter(&stp->sd_lock); 2147 stp->sd_werror = 0; 2148 stp->sd_flag &= ~STRHUP; 2149 mutex_exit(&stp->sd_lock); 2150 return (0); 2151 2152 case M_SIG: 2153 /* 2154 * Someone downstream wants to post a signal. The 2155 * signal to post is contained in the first byte of the 2156 * message. If the message would go on the front of 2157 * the queue, send a signal to the process group 2158 * (if not SIGPOLL) or to the siglist processes 2159 * (SIGPOLL). If something is already on the queue, 2160 * OR if we are delivering a delayed suspend (*sigh* 2161 * another "tty" hack) and there's no one sleeping already, 2162 * just enqueue the message. 2163 */ 2164 mutex_enter(&stp->sd_lock); 2165 if (q->q_first || (*bp->b_rptr == SIGTSTP && 2166 !(stp->sd_flag & RSLEEP))) { 2167 (void) putq(q, bp); 2168 mutex_exit(&stp->sd_lock); 2169 return (0); 2170 } 2171 mutex_exit(&stp->sd_lock); 2172 /* FALLTHRU */ 2173 2174 case M_PCSIG: 2175 /* 2176 * Don't enqueue, just post the signal. 2177 */ 2178 strsignal(stp, *bp->b_rptr, 0L); 2179 freemsg(bp); 2180 return (0); 2181 2182 case M_CMD: 2183 if (MBLKL(bp) != sizeof (cmdblk_t)) { 2184 freemsg(bp); 2185 return (0); 2186 } 2187 2188 mutex_enter(&stp->sd_lock); 2189 if (stp->sd_flag & STRCMDWAIT) { 2190 ASSERT(stp->sd_cmdblk == NULL); 2191 stp->sd_cmdblk = bp; 2192 cv_broadcast(&stp->sd_monitor); 2193 mutex_exit(&stp->sd_lock); 2194 } else { 2195 mutex_exit(&stp->sd_lock); 2196 freemsg(bp); 2197 } 2198 return (0); 2199 2200 case M_FLUSH: 2201 /* 2202 * Flush queues. The indication of which queues to flush 2203 * is in the first byte of the message. If the read queue 2204 * is specified, then flush it. If FLUSHBAND is set, just 2205 * flush the band specified by the second byte of the message. 2206 * 2207 * If a module has issued a M_SETOPT to not flush hi 2208 * priority messages off of the stream head, then pass this 2209 * flag into the flushq code to preserve such messages. 2210 */ 2211 2212 if (*bp->b_rptr & FLUSHR) { 2213 mutex_enter(&stp->sd_lock); 2214 if (*bp->b_rptr & FLUSHBAND) { 2215 ASSERT((bp->b_wptr - bp->b_rptr) >= 2); 2216 flushband(q, *(bp->b_rptr + 1), FLUSHALL); 2217 } else 2218 flushq_common(q, FLUSHALL, 2219 stp->sd_read_opt & RFLUSHPCPROT); 2220 if ((q->q_first == NULL) || 2221 (q->q_first->b_datap->db_type < QPCTL)) 2222 stp->sd_flag &= ~STRPRI; 2223 else { 2224 ASSERT(stp->sd_flag & STRPRI); 2225 } 2226 mutex_exit(&stp->sd_lock); 2227 } 2228 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 2229 *bp->b_rptr &= ~FLUSHR; 2230 bp->b_flag |= MSGNOLOOP; 2231 /* 2232 * Protect against the driver passing up 2233 * messages after it has done a qprocsoff. 2234 */ 2235 if (_OTHERQ(q)->q_next == NULL) 2236 freemsg(bp); 2237 else 2238 qreply(q, bp); 2239 return (0); 2240 } 2241 freemsg(bp); 2242 return (0); 2243 2244 case M_IOCACK: 2245 case M_IOCNAK: 2246 iocbp = (struct iocblk *)bp->b_rptr; 2247 /* 2248 * If not waiting for ACK or NAK then just free msg. 2249 * If incorrect id sequence number then just free msg. 2250 * If already have ACK or NAK for user then this is a 2251 * duplicate, display a warning and free the msg. 2252 */ 2253 mutex_enter(&stp->sd_lock); 2254 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2255 (stp->sd_iocid != iocbp->ioc_id)) { 2256 /* 2257 * If the ACK/NAK is a dup, display a message 2258 * Dup is when sd_iocid == ioc_id, and 2259 * sd_iocblk == <valid ptr> or -1 (the former 2260 * is when an ioctl has been put on the stream 2261 * head, but has not yet been consumed, the 2262 * later is when it has been consumed). 2263 */ 2264 if ((stp->sd_iocid == iocbp->ioc_id) && 2265 (stp->sd_iocblk != NULL)) { 2266 log_dupioc(q, bp); 2267 } 2268 freemsg(bp); 2269 mutex_exit(&stp->sd_lock); 2270 return (0); 2271 } 2272 2273 /* 2274 * Assign ACK or NAK to user and wake up. 2275 */ 2276 stp->sd_iocblk = bp; 2277 cv_broadcast(&stp->sd_monitor); 2278 mutex_exit(&stp->sd_lock); 2279 return (0); 2280 2281 case M_COPYIN: 2282 case M_COPYOUT: 2283 reqp = (struct copyreq *)bp->b_rptr; 2284 2285 /* 2286 * If not waiting for ACK or NAK then just fail request. 2287 * If already have ACK, NAK, or copy request, then just 2288 * fail request. 2289 * If incorrect id sequence number then just fail request. 2290 */ 2291 mutex_enter(&stp->sd_lock); 2292 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2293 (stp->sd_iocid != reqp->cq_id)) { 2294 if (bp->b_cont) { 2295 freemsg(bp->b_cont); 2296 bp->b_cont = NULL; 2297 } 2298 bp->b_datap->db_type = M_IOCDATA; 2299 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 2300 resp = (struct copyresp *)bp->b_rptr; 2301 resp->cp_rval = (caddr_t)1; /* failure */ 2302 mutex_exit(&stp->sd_lock); 2303 putnext(stp->sd_wrq, bp); 2304 return (0); 2305 } 2306 2307 /* 2308 * Assign copy request to user and wake up. 2309 */ 2310 stp->sd_iocblk = bp; 2311 cv_broadcast(&stp->sd_monitor); 2312 mutex_exit(&stp->sd_lock); 2313 return (0); 2314 2315 case M_SETOPTS: 2316 /* 2317 * Set stream head options (read option, write offset, 2318 * min/max packet size, and/or high/low water marks for 2319 * the read side only). 2320 */ 2321 2322 bpri = 0; 2323 sop = (struct stroptions *)bp->b_rptr; 2324 mutex_enter(&stp->sd_lock); 2325 if (sop->so_flags & SO_READOPT) { 2326 switch (sop->so_readopt & RMODEMASK) { 2327 case RNORM: 2328 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 2329 break; 2330 2331 case RMSGD: 2332 stp->sd_read_opt = 2333 ((stp->sd_read_opt & ~RD_MSGNODIS) | 2334 RD_MSGDIS); 2335 break; 2336 2337 case RMSGN: 2338 stp->sd_read_opt = 2339 ((stp->sd_read_opt & ~RD_MSGDIS) | 2340 RD_MSGNODIS); 2341 break; 2342 } 2343 switch (sop->so_readopt & RPROTMASK) { 2344 case RPROTNORM: 2345 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 2346 break; 2347 2348 case RPROTDAT: 2349 stp->sd_read_opt = 2350 ((stp->sd_read_opt & ~RD_PROTDIS) | 2351 RD_PROTDAT); 2352 break; 2353 2354 case RPROTDIS: 2355 stp->sd_read_opt = 2356 ((stp->sd_read_opt & ~RD_PROTDAT) | 2357 RD_PROTDIS); 2358 break; 2359 } 2360 switch (sop->so_readopt & RFLUSHMASK) { 2361 case RFLUSHPCPROT: 2362 /* 2363 * This sets the stream head to NOT flush 2364 * M_PCPROTO messages. 2365 */ 2366 stp->sd_read_opt |= RFLUSHPCPROT; 2367 break; 2368 } 2369 } 2370 if (sop->so_flags & SO_ERROPT) { 2371 switch (sop->so_erropt & RERRMASK) { 2372 case RERRNORM: 2373 stp->sd_flag &= ~STRDERRNONPERSIST; 2374 break; 2375 case RERRNONPERSIST: 2376 stp->sd_flag |= STRDERRNONPERSIST; 2377 break; 2378 } 2379 switch (sop->so_erropt & WERRMASK) { 2380 case WERRNORM: 2381 stp->sd_flag &= ~STWRERRNONPERSIST; 2382 break; 2383 case WERRNONPERSIST: 2384 stp->sd_flag |= STWRERRNONPERSIST; 2385 break; 2386 } 2387 } 2388 if (sop->so_flags & SO_COPYOPT) { 2389 if (sop->so_copyopt & ZCVMSAFE) { 2390 stp->sd_copyflag |= STZCVMSAFE; 2391 stp->sd_copyflag &= ~STZCVMUNSAFE; 2392 } else if (sop->so_copyopt & ZCVMUNSAFE) { 2393 stp->sd_copyflag |= STZCVMUNSAFE; 2394 stp->sd_copyflag &= ~STZCVMSAFE; 2395 } 2396 2397 if (sop->so_copyopt & COPYCACHED) { 2398 stp->sd_copyflag |= STRCOPYCACHED; 2399 } 2400 } 2401 if (sop->so_flags & SO_WROFF) 2402 stp->sd_wroff = sop->so_wroff; 2403 if (sop->so_flags & SO_TAIL) 2404 stp->sd_tail = sop->so_tail; 2405 if (sop->so_flags & SO_MINPSZ) 2406 q->q_minpsz = sop->so_minpsz; 2407 if (sop->so_flags & SO_MAXPSZ) 2408 q->q_maxpsz = sop->so_maxpsz; 2409 if (sop->so_flags & SO_MAXBLK) 2410 stp->sd_maxblk = sop->so_maxblk; 2411 if (sop->so_flags & SO_HIWAT) { 2412 if (sop->so_flags & SO_BAND) { 2413 if (strqset(q, QHIWAT, 2414 sop->so_band, sop->so_hiwat)) { 2415 cmn_err(CE_WARN, "strrput: could not " 2416 "allocate qband\n"); 2417 } else { 2418 bpri = sop->so_band; 2419 } 2420 } else { 2421 q->q_hiwat = sop->so_hiwat; 2422 } 2423 } 2424 if (sop->so_flags & SO_LOWAT) { 2425 if (sop->so_flags & SO_BAND) { 2426 if (strqset(q, QLOWAT, 2427 sop->so_band, sop->so_lowat)) { 2428 cmn_err(CE_WARN, "strrput: could not " 2429 "allocate qband\n"); 2430 } else { 2431 bpri = sop->so_band; 2432 } 2433 } else { 2434 q->q_lowat = sop->so_lowat; 2435 } 2436 } 2437 if (sop->so_flags & SO_MREADON) 2438 stp->sd_flag |= SNDMREAD; 2439 if (sop->so_flags & SO_MREADOFF) 2440 stp->sd_flag &= ~SNDMREAD; 2441 if (sop->so_flags & SO_NDELON) 2442 stp->sd_flag |= OLDNDELAY; 2443 if (sop->so_flags & SO_NDELOFF) 2444 stp->sd_flag &= ~OLDNDELAY; 2445 if (sop->so_flags & SO_ISTTY) 2446 stp->sd_flag |= STRISTTY; 2447 if (sop->so_flags & SO_ISNTTY) 2448 stp->sd_flag &= ~STRISTTY; 2449 if (sop->so_flags & SO_TOSTOP) 2450 stp->sd_flag |= STRTOSTOP; 2451 if (sop->so_flags & SO_TONSTOP) 2452 stp->sd_flag &= ~STRTOSTOP; 2453 if (sop->so_flags & SO_DELIM) 2454 stp->sd_flag |= STRDELIM; 2455 if (sop->so_flags & SO_NODELIM) 2456 stp->sd_flag &= ~STRDELIM; 2457 2458 mutex_exit(&stp->sd_lock); 2459 freemsg(bp); 2460 2461 /* Check backenable in case the water marks changed */ 2462 qbackenable(q, bpri); 2463 return (0); 2464 2465 /* 2466 * The following set of cases deal with situations where two stream 2467 * heads are connected to each other (twisted streams). These messages 2468 * have no meaning at the stream head. 2469 */ 2470 case M_BREAK: 2471 case M_CTL: 2472 case M_DELAY: 2473 case M_START: 2474 case M_STOP: 2475 case M_IOCDATA: 2476 case M_STARTI: 2477 case M_STOPI: 2478 freemsg(bp); 2479 return (0); 2480 2481 case M_IOCTL: 2482 /* 2483 * Always NAK this condition 2484 * (makes no sense) 2485 * If there is one or more threads in the read side 2486 * rwnext we have to defer the nacking until that thread 2487 * returns (in strget). 2488 */ 2489 mutex_enter(&stp->sd_lock); 2490 if (stp->sd_struiodnak != 0) { 2491 /* 2492 * Defer NAK to the streamhead. Queue at the end 2493 * the list. 2494 */ 2495 mblk_t *mp = stp->sd_struionak; 2496 2497 while (mp && mp->b_next) 2498 mp = mp->b_next; 2499 if (mp) 2500 mp->b_next = bp; 2501 else 2502 stp->sd_struionak = bp; 2503 bp->b_next = NULL; 2504 mutex_exit(&stp->sd_lock); 2505 return (0); 2506 } 2507 mutex_exit(&stp->sd_lock); 2508 2509 bp->b_datap->db_type = M_IOCNAK; 2510 /* 2511 * Protect against the driver passing up 2512 * messages after it has done a qprocsoff. 2513 */ 2514 if (_OTHERQ(q)->q_next == NULL) 2515 freemsg(bp); 2516 else 2517 qreply(q, bp); 2518 return (0); 2519 2520 default: 2521 #ifdef DEBUG 2522 cmn_err(CE_WARN, 2523 "bad message type %x received at stream head\n", 2524 bp->b_datap->db_type); 2525 #endif 2526 freemsg(bp); 2527 return (0); 2528 } 2529 2530 /* NOTREACHED */ 2531 } 2532 2533 /* 2534 * Check if the stream pointed to by `stp' can be written to, and return an 2535 * error code if not. If `eiohup' is set, then return EIO if STRHUP is set. 2536 * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream, 2537 * then always return EPIPE and send a SIGPIPE to the invoking thread. 2538 */ 2539 static int 2540 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok) 2541 { 2542 int error; 2543 2544 ASSERT(MUTEX_HELD(&stp->sd_lock)); 2545 2546 /* 2547 * For modem support, POSIX states that on writes, EIO should 2548 * be returned if the stream has been hung up. 2549 */ 2550 if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP) 2551 error = EIO; 2552 else 2553 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0); 2554 2555 if (error != 0) { 2556 if (!(stp->sd_flag & STPLEX) && 2557 (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) { 2558 tsignal(curthread, SIGPIPE); 2559 error = EPIPE; 2560 } 2561 } 2562 2563 return (error); 2564 } 2565 2566 /* 2567 * Copyin and send data down a stream. 2568 * The caller will allocate and copyin any control part that precedes the 2569 * message and pass that in as mctl. 2570 * 2571 * Caller should *not* hold sd_lock. 2572 * When EWOULDBLOCK is returned the caller has to redo the canputnext 2573 * under sd_lock in order to avoid missing a backenabling wakeup. 2574 * 2575 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA. 2576 * 2577 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages. 2578 * For sync streams we can only ignore flow control by reverting to using 2579 * putnext. 2580 * 2581 * If sd_maxblk is less than *iosize this routine might return without 2582 * transferring all of *iosize. In all cases, on return *iosize will contain 2583 * the amount of data that was transferred. 2584 */ 2585 static int 2586 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize, 2587 int b_flag, int pri, int flags) 2588 { 2589 struiod_t uiod; 2590 mblk_t *mp; 2591 queue_t *wqp = stp->sd_wrq; 2592 int error = 0; 2593 ssize_t count = *iosize; 2594 2595 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 2596 2597 if (uiop != NULL && count >= 0) 2598 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0; 2599 2600 if (!(flags & STRUIO_POSTPONE)) { 2601 /* 2602 * Use regular canputnext, strmakedata, putnext sequence. 2603 */ 2604 if (pri == 0) { 2605 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2606 freemsg(mctl); 2607 return (EWOULDBLOCK); 2608 } 2609 } else { 2610 if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) { 2611 freemsg(mctl); 2612 return (EWOULDBLOCK); 2613 } 2614 } 2615 2616 if ((error = strmakedata(iosize, uiop, stp, flags, 2617 &mp)) != 0) { 2618 freemsg(mctl); 2619 /* 2620 * need to change return code to ENOMEM 2621 * so that this is not confused with 2622 * flow control, EAGAIN. 2623 */ 2624 2625 if (error == EAGAIN) 2626 return (ENOMEM); 2627 else 2628 return (error); 2629 } 2630 if (mctl != NULL) { 2631 if (mctl->b_cont == NULL) 2632 mctl->b_cont = mp; 2633 else if (mp != NULL) 2634 linkb(mctl, mp); 2635 mp = mctl; 2636 } else if (mp == NULL) 2637 return (0); 2638 2639 mp->b_flag |= b_flag; 2640 mp->b_band = (uchar_t)pri; 2641 2642 if (flags & MSG_IGNFLOW) { 2643 /* 2644 * XXX Hack: Don't get stuck running service 2645 * procedures. This is needed for sockfs when 2646 * sending the unbind message out of the rput 2647 * procedure - we don't want a put procedure 2648 * to run service procedures. 2649 */ 2650 putnext(wqp, mp); 2651 } else { 2652 stream_willservice(stp); 2653 putnext(wqp, mp); 2654 stream_runservice(stp); 2655 } 2656 return (0); 2657 } 2658 /* 2659 * Stream supports rwnext() for the write side. 2660 */ 2661 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) { 2662 freemsg(mctl); 2663 /* 2664 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled". 2665 */ 2666 return (error == EAGAIN ? ENOMEM : error); 2667 } 2668 if (mctl != NULL) { 2669 if (mctl->b_cont == NULL) 2670 mctl->b_cont = mp; 2671 else if (mp != NULL) 2672 linkb(mctl, mp); 2673 mp = mctl; 2674 } else if (mp == NULL) { 2675 return (0); 2676 } 2677 2678 mp->b_flag |= b_flag; 2679 mp->b_band = (uchar_t)pri; 2680 2681 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 2682 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 2683 uiod.d_uio.uio_offset = 0; 2684 uiod.d_mp = mp; 2685 error = rwnext(wqp, &uiod); 2686 if (! uiod.d_mp) { 2687 uioskip(uiop, *iosize); 2688 return (error); 2689 } 2690 ASSERT(mp == uiod.d_mp); 2691 if (error == EINVAL) { 2692 /* 2693 * The stream plumbing must have changed while 2694 * we were away, so just turn off rwnext()s. 2695 */ 2696 error = 0; 2697 } else if (error == EBUSY || error == EWOULDBLOCK) { 2698 /* 2699 * Couldn't enter a perimeter or took a page fault, 2700 * so fall-back to putnext(). 2701 */ 2702 error = 0; 2703 } else { 2704 freemsg(mp); 2705 return (error); 2706 } 2707 /* Have to check canput before consuming data from the uio */ 2708 if (pri == 0) { 2709 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2710 freemsg(mp); 2711 return (EWOULDBLOCK); 2712 } 2713 } else { 2714 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) { 2715 freemsg(mp); 2716 return (EWOULDBLOCK); 2717 } 2718 } 2719 ASSERT(mp == uiod.d_mp); 2720 /* Copyin data from the uio */ 2721 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) { 2722 freemsg(mp); 2723 return (error); 2724 } 2725 uioskip(uiop, *iosize); 2726 if (flags & MSG_IGNFLOW) { 2727 /* 2728 * XXX Hack: Don't get stuck running service procedures. 2729 * This is needed for sockfs when sending the unbind message 2730 * out of the rput procedure - we don't want a put procedure 2731 * to run service procedures. 2732 */ 2733 putnext(wqp, mp); 2734 } else { 2735 stream_willservice(stp); 2736 putnext(wqp, mp); 2737 stream_runservice(stp); 2738 } 2739 return (0); 2740 } 2741 2742 /* 2743 * Write attempts to break the write request into messages conforming 2744 * with the minimum and maximum packet sizes set downstream. 2745 * 2746 * Write will not block if downstream queue is full and 2747 * O_NDELAY is set, otherwise it will block waiting for the queue to get room. 2748 * 2749 * A write of zero bytes gets packaged into a zero length message and sent 2750 * downstream like any other message. 2751 * 2752 * If buffers of the requested sizes are not available, the write will 2753 * sleep until the buffers become available. 2754 * 2755 * Write (if specified) will supply a write offset in a message if it 2756 * makes sense. This can be specified by downstream modules as part of 2757 * a M_SETOPTS message. Write will not supply the write offset if it 2758 * cannot supply any data in a buffer. In other words, write will never 2759 * send down an empty packet due to a write offset. 2760 */ 2761 /* ARGSUSED2 */ 2762 int 2763 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp) 2764 { 2765 return (strwrite_common(vp, uiop, crp, 0)); 2766 } 2767 2768 /* ARGSUSED2 */ 2769 int 2770 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) 2771 { 2772 struct stdata *stp; 2773 struct queue *wqp; 2774 ssize_t rmin, rmax; 2775 ssize_t iosize; 2776 int waitflag; 2777 int tempmode; 2778 int error = 0; 2779 int b_flag; 2780 2781 ASSERT(vp->v_stream); 2782 stp = vp->v_stream; 2783 2784 mutex_enter(&stp->sd_lock); 2785 2786 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2787 mutex_exit(&stp->sd_lock); 2788 return (error); 2789 } 2790 2791 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 2792 error = strwriteable(stp, B_TRUE, B_TRUE); 2793 if (error != 0) { 2794 mutex_exit(&stp->sd_lock); 2795 return (error); 2796 } 2797 } 2798 2799 mutex_exit(&stp->sd_lock); 2800 2801 wqp = stp->sd_wrq; 2802 2803 /* get these values from them cached in the stream head */ 2804 rmin = stp->sd_qn_minpsz; 2805 rmax = stp->sd_qn_maxpsz; 2806 2807 /* 2808 * Check the min/max packet size constraints. If min packet size 2809 * is non-zero, the write cannot be split into multiple messages 2810 * and still guarantee the size constraints. 2811 */ 2812 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp); 2813 2814 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 2815 if (rmax == 0) { 2816 return (0); 2817 } 2818 if (rmin > 0) { 2819 if (uiop->uio_resid < rmin) { 2820 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2821 "strwrite out:q %p out %d error %d", 2822 wqp, 0, ERANGE); 2823 return (ERANGE); 2824 } 2825 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) { 2826 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2827 "strwrite out:q %p out %d error %d", 2828 wqp, 1, ERANGE); 2829 return (ERANGE); 2830 } 2831 } 2832 2833 /* 2834 * Do until count satisfied or error. 2835 */ 2836 waitflag = WRITEWAIT | wflag; 2837 if (stp->sd_flag & OLDNDELAY) 2838 tempmode = uiop->uio_fmode & ~FNDELAY; 2839 else 2840 tempmode = uiop->uio_fmode; 2841 2842 if (rmax == INFPSZ) 2843 rmax = uiop->uio_resid; 2844 2845 /* 2846 * Note that tempmode does not get used in strput/strmakedata 2847 * but only in strwaitq. The other routines use uio_fmode 2848 * unmodified. 2849 */ 2850 2851 /* LINTED: constant in conditional context */ 2852 while (1) { /* breaks when uio_resid reaches zero */ 2853 /* 2854 * Determine the size of the next message to be 2855 * packaged. May have to break write into several 2856 * messages based on max packet size. 2857 */ 2858 iosize = MIN(uiop->uio_resid, rmax); 2859 2860 /* 2861 * Put block downstream when flow control allows it. 2862 */ 2863 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize)) 2864 b_flag = MSGDELIM; 2865 else 2866 b_flag = 0; 2867 2868 for (;;) { 2869 int done = 0; 2870 2871 error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0); 2872 if (error == 0) 2873 break; 2874 if (error != EWOULDBLOCK) 2875 goto out; 2876 2877 mutex_enter(&stp->sd_lock); 2878 /* 2879 * Check for a missed wakeup. 2880 * Needed since strput did not hold sd_lock across 2881 * the canputnext. 2882 */ 2883 if (canputnext(wqp)) { 2884 /* Try again */ 2885 mutex_exit(&stp->sd_lock); 2886 continue; 2887 } 2888 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT, 2889 "strwrite wait:q %p wait", wqp); 2890 if ((error = strwaitq(stp, waitflag, (ssize_t)0, 2891 tempmode, -1, &done)) != 0 || done) { 2892 mutex_exit(&stp->sd_lock); 2893 if ((vp->v_type == VFIFO) && 2894 (uiop->uio_fmode & FNDELAY) && 2895 (error == EAGAIN)) 2896 error = 0; 2897 goto out; 2898 } 2899 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE, 2900 "strwrite wake:q %p awakes", wqp); 2901 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2902 mutex_exit(&stp->sd_lock); 2903 goto out; 2904 } 2905 mutex_exit(&stp->sd_lock); 2906 } 2907 waitflag |= NOINTR; 2908 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID, 2909 "strwrite resid:q %p uiop %p", wqp, uiop); 2910 if (uiop->uio_resid) { 2911 /* Recheck for errors - needed for sockets */ 2912 if ((stp->sd_wput_opt & SW_RECHECK_ERR) && 2913 (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 2914 mutex_enter(&stp->sd_lock); 2915 error = strwriteable(stp, B_FALSE, B_TRUE); 2916 mutex_exit(&stp->sd_lock); 2917 if (error != 0) 2918 return (error); 2919 } 2920 continue; 2921 } 2922 break; 2923 } 2924 out: 2925 /* 2926 * For historical reasons, applications expect EAGAIN when a data 2927 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN. 2928 */ 2929 if (error == ENOMEM) 2930 error = EAGAIN; 2931 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2932 "strwrite out:q %p out %d error %d", wqp, 2, error); 2933 return (error); 2934 } 2935 2936 /* 2937 * Stream head write service routine. 2938 * Its job is to wake up any sleeping writers when a queue 2939 * downstream needs data (part of the flow control in putq and getq). 2940 * It also must wake anyone sleeping on a poll(). 2941 * For stream head right below mux module, it must also invoke put procedure 2942 * of next downstream module. 2943 */ 2944 int 2945 strwsrv(queue_t *q) 2946 { 2947 struct stdata *stp; 2948 queue_t *tq; 2949 qband_t *qbp; 2950 int i; 2951 qband_t *myqbp; 2952 int isevent; 2953 unsigned char qbf[NBAND]; /* band flushing backenable flags */ 2954 2955 TRACE_1(TR_FAC_STREAMS_FR, 2956 TR_STRWSRV, "strwsrv:q %p", q); 2957 stp = (struct stdata *)q->q_ptr; 2958 ASSERT(qclaimed(q)); 2959 mutex_enter(&stp->sd_lock); 2960 ASSERT(!(stp->sd_flag & STPLEX)); 2961 2962 if (stp->sd_flag & WSLEEP) { 2963 stp->sd_flag &= ~WSLEEP; 2964 cv_broadcast(&q->q_wait); 2965 } 2966 mutex_exit(&stp->sd_lock); 2967 2968 /* The other end of a stream pipe went away. */ 2969 if ((tq = q->q_next) == NULL) { 2970 return (0); 2971 } 2972 2973 /* Find the next module forward that has a service procedure */ 2974 claimstr(q); 2975 tq = q->q_nfsrv; 2976 ASSERT(tq != NULL); 2977 2978 if ((q->q_flag & QBACK)) { 2979 if ((tq->q_flag & QFULL)) { 2980 mutex_enter(QLOCK(tq)); 2981 if (!(tq->q_flag & QFULL)) { 2982 mutex_exit(QLOCK(tq)); 2983 goto wakeup; 2984 } 2985 /* 2986 * The queue must have become full again. Set QWANTW 2987 * again so strwsrv will be back enabled when 2988 * the queue becomes non-full next time. 2989 */ 2990 tq->q_flag |= QWANTW; 2991 mutex_exit(QLOCK(tq)); 2992 } else { 2993 wakeup: 2994 pollwakeup(&stp->sd_pollist, POLLWRNORM); 2995 mutex_enter(&stp->sd_lock); 2996 if (stp->sd_sigflags & S_WRNORM) 2997 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 2998 mutex_exit(&stp->sd_lock); 2999 } 3000 } 3001 3002 isevent = 0; 3003 i = 1; 3004 bzero((caddr_t)qbf, NBAND); 3005 mutex_enter(QLOCK(tq)); 3006 if ((myqbp = q->q_bandp) != NULL) 3007 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) { 3008 ASSERT(myqbp); 3009 if ((myqbp->qb_flag & QB_BACK)) { 3010 if (qbp->qb_flag & QB_FULL) { 3011 /* 3012 * The band must have become full again. 3013 * Set QB_WANTW again so strwsrv will 3014 * be back enabled when the band becomes 3015 * non-full next time. 3016 */ 3017 qbp->qb_flag |= QB_WANTW; 3018 } else { 3019 isevent = 1; 3020 qbf[i] = 1; 3021 } 3022 } 3023 myqbp = myqbp->qb_next; 3024 i++; 3025 } 3026 mutex_exit(QLOCK(tq)); 3027 3028 if (isevent) { 3029 for (i = tq->q_nband; i; i--) { 3030 if (qbf[i]) { 3031 pollwakeup(&stp->sd_pollist, POLLWRBAND); 3032 mutex_enter(&stp->sd_lock); 3033 if (stp->sd_sigflags & S_WRBAND) 3034 strsendsig(stp->sd_siglist, S_WRBAND, 3035 (uchar_t)i, 0); 3036 mutex_exit(&stp->sd_lock); 3037 } 3038 } 3039 } 3040 3041 releasestr(q); 3042 return (0); 3043 } 3044 3045 /* 3046 * Special case of strcopyin/strcopyout for copying 3047 * struct strioctl that can deal with both data 3048 * models. 3049 */ 3050 3051 #ifdef _LP64 3052 3053 static int 3054 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3055 { 3056 struct strioctl32 strioc32; 3057 struct strioctl *striocp; 3058 3059 if (copyflag & U_TO_K) { 3060 ASSERT((copyflag & K_TO_K) == 0); 3061 3062 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3063 if (copyin(from, &strioc32, sizeof (strioc32))) 3064 return (EFAULT); 3065 3066 striocp = (struct strioctl *)to; 3067 striocp->ic_cmd = strioc32.ic_cmd; 3068 striocp->ic_timout = strioc32.ic_timout; 3069 striocp->ic_len = strioc32.ic_len; 3070 striocp->ic_dp = (char *)(uintptr_t)strioc32.ic_dp; 3071 3072 } else { /* NATIVE data model */ 3073 if (copyin(from, to, sizeof (struct strioctl))) { 3074 return (EFAULT); 3075 } else { 3076 return (0); 3077 } 3078 } 3079 } else { 3080 ASSERT(copyflag & K_TO_K); 3081 bcopy(from, to, sizeof (struct strioctl)); 3082 } 3083 return (0); 3084 } 3085 3086 static int 3087 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3088 { 3089 struct strioctl32 strioc32; 3090 struct strioctl *striocp; 3091 3092 if (copyflag & U_TO_K) { 3093 ASSERT((copyflag & K_TO_K) == 0); 3094 3095 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3096 striocp = (struct strioctl *)from; 3097 strioc32.ic_cmd = striocp->ic_cmd; 3098 strioc32.ic_timout = striocp->ic_timout; 3099 strioc32.ic_len = striocp->ic_len; 3100 strioc32.ic_dp = (caddr32_t)(uintptr_t)striocp->ic_dp; 3101 ASSERT((char *)(uintptr_t)strioc32.ic_dp == 3102 striocp->ic_dp); 3103 3104 if (copyout(&strioc32, to, sizeof (strioc32))) 3105 return (EFAULT); 3106 3107 } else { /* NATIVE data model */ 3108 if (copyout(from, to, sizeof (struct strioctl))) { 3109 return (EFAULT); 3110 } else { 3111 return (0); 3112 } 3113 } 3114 } else { 3115 ASSERT(copyflag & K_TO_K); 3116 bcopy(from, to, sizeof (struct strioctl)); 3117 } 3118 return (0); 3119 } 3120 3121 #else /* ! _LP64 */ 3122 3123 /* ARGSUSED2 */ 3124 static int 3125 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3126 { 3127 return (strcopyin(from, to, sizeof (struct strioctl), copyflag)); 3128 } 3129 3130 /* ARGSUSED2 */ 3131 static int 3132 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3133 { 3134 return (strcopyout(from, to, sizeof (struct strioctl), copyflag)); 3135 } 3136 3137 #endif /* _LP64 */ 3138 3139 /* 3140 * Determine type of job control semantics expected by user. The 3141 * possibilities are: 3142 * JCREAD - Behaves like read() on fd; send SIGTTIN 3143 * JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set 3144 * JCSETP - Sets a value in the stream; send SIGTTOU, ignore TOSTOP 3145 * JCGETP - Gets a value in the stream; no signals. 3146 * See straccess in strsubr.c for usage of these values. 3147 * 3148 * This routine also returns -1 for I_STR as a special case; the 3149 * caller must call again with the real ioctl number for 3150 * classification. 3151 */ 3152 static int 3153 job_control_type(int cmd) 3154 { 3155 switch (cmd) { 3156 case I_STR: 3157 return (-1); 3158 3159 case I_RECVFD: 3160 case I_E_RECVFD: 3161 return (JCREAD); 3162 3163 case I_FDINSERT: 3164 case I_SENDFD: 3165 return (JCWRITE); 3166 3167 case TCSETA: 3168 case TCSETAW: 3169 case TCSETAF: 3170 case TCSBRK: 3171 case TCXONC: 3172 case TCFLSH: 3173 case TCDSET: /* Obsolete */ 3174 case TIOCSWINSZ: 3175 case TCSETS: 3176 case TCSETSW: 3177 case TCSETSF: 3178 case TIOCSETD: 3179 case TIOCHPCL: 3180 case TIOCSETP: 3181 case TIOCSETN: 3182 case TIOCEXCL: 3183 case TIOCNXCL: 3184 case TIOCFLUSH: 3185 case TIOCSETC: 3186 case TIOCLBIS: 3187 case TIOCLBIC: 3188 case TIOCLSET: 3189 case TIOCSBRK: 3190 case TIOCCBRK: 3191 case TIOCSDTR: 3192 case TIOCCDTR: 3193 case TIOCSLTC: 3194 case TIOCSTOP: 3195 case TIOCSTART: 3196 case TIOCSTI: 3197 case TIOCSPGRP: 3198 case TIOCMSET: 3199 case TIOCMBIS: 3200 case TIOCMBIC: 3201 case TIOCREMOTE: 3202 case TIOCSIGNAL: 3203 case LDSETT: 3204 case LDSMAP: /* Obsolete */ 3205 case DIOCSETP: 3206 case I_FLUSH: 3207 case I_SRDOPT: 3208 case I_SETSIG: 3209 case I_SWROPT: 3210 case I_FLUSHBAND: 3211 case I_SETCLTIME: 3212 case I_SERROPT: 3213 case I_ESETSIG: 3214 case FIONBIO: 3215 case FIOASYNC: 3216 case FIOSETOWN: 3217 case JBOOT: /* Obsolete */ 3218 case JTERM: /* Obsolete */ 3219 case JTIMOM: /* Obsolete */ 3220 case JZOMBOOT: /* Obsolete */ 3221 case JAGENT: /* Obsolete */ 3222 case JTRUN: /* Obsolete */ 3223 case JXTPROTO: /* Obsolete */ 3224 case TIOCSETLD: 3225 return (JCSETP); 3226 } 3227 3228 return (JCGETP); 3229 } 3230 3231 /* 3232 * ioctl for streams 3233 */ 3234 int 3235 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, 3236 cred_t *crp, int *rvalp) 3237 { 3238 struct stdata *stp; 3239 struct strcmd *scp; 3240 struct strioctl strioc; 3241 struct uio uio; 3242 struct iovec iov; 3243 int access; 3244 mblk_t *mp; 3245 int error = 0; 3246 int done = 0; 3247 ssize_t rmin, rmax; 3248 queue_t *wrq; 3249 queue_t *rdq; 3250 boolean_t kioctl = B_FALSE; 3251 3252 if (flag & FKIOCTL) { 3253 copyflag = K_TO_K; 3254 kioctl = B_TRUE; 3255 } 3256 ASSERT(vp->v_stream); 3257 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 3258 stp = vp->v_stream; 3259 3260 TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER, 3261 "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg); 3262 3263 if (audit_active) 3264 audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp); 3265 3266 /* 3267 * If the copy is kernel to kernel, make sure that the FNATIVE 3268 * flag is set. After this it would be a serious error to have 3269 * no model flag. 3270 */ 3271 if (copyflag == K_TO_K) 3272 flag = (flag & ~FMODELS) | FNATIVE; 3273 3274 ASSERT((flag & FMODELS) != 0); 3275 3276 wrq = stp->sd_wrq; 3277 rdq = _RD(wrq); 3278 3279 access = job_control_type(cmd); 3280 3281 /* We should never see these here, should be handled by iwscn */ 3282 if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR) 3283 return (EINVAL); 3284 3285 mutex_enter(&stp->sd_lock); 3286 if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) { 3287 mutex_exit(&stp->sd_lock); 3288 return (error); 3289 } 3290 mutex_exit(&stp->sd_lock); 3291 3292 /* 3293 * Check for sgttyb-related ioctls first, and complain as 3294 * necessary. 3295 */ 3296 switch (cmd) { 3297 case TIOCGETP: 3298 case TIOCSETP: 3299 case TIOCSETN: 3300 if (sgttyb_handling >= 2 && !sgttyb_complaint) { 3301 sgttyb_complaint = B_TRUE; 3302 cmn_err(CE_NOTE, 3303 "application used obsolete TIOC[GS]ET"); 3304 } 3305 if (sgttyb_handling >= 3) { 3306 tsignal(curthread, SIGSYS); 3307 return (EIO); 3308 } 3309 break; 3310 } 3311 3312 mutex_enter(&stp->sd_lock); 3313 3314 switch (cmd) { 3315 case I_RECVFD: 3316 case I_E_RECVFD: 3317 case I_PEEK: 3318 case I_NREAD: 3319 case FIONREAD: 3320 case FIORDCHK: 3321 case I_ATMARK: 3322 case FIONBIO: 3323 case FIOASYNC: 3324 if (stp->sd_flag & (STRDERR|STPLEX)) { 3325 error = strgeterr(stp, STRDERR|STPLEX, 0); 3326 if (error != 0) { 3327 mutex_exit(&stp->sd_lock); 3328 return (error); 3329 } 3330 } 3331 break; 3332 3333 default: 3334 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) { 3335 error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0); 3336 if (error != 0) { 3337 mutex_exit(&stp->sd_lock); 3338 return (error); 3339 } 3340 } 3341 } 3342 3343 mutex_exit(&stp->sd_lock); 3344 3345 switch (cmd) { 3346 default: 3347 /* 3348 * The stream head has hardcoded knowledge of a 3349 * miscellaneous collection of terminal-, keyboard- and 3350 * mouse-related ioctls, enumerated below. This hardcoded 3351 * knowledge allows the stream head to automatically 3352 * convert transparent ioctl requests made by userland 3353 * programs into I_STR ioctls which many old STREAMS 3354 * modules and drivers require. 3355 * 3356 * No new ioctls should ever be added to this list. 3357 * Instead, the STREAMS module or driver should be written 3358 * to either handle transparent ioctls or require any 3359 * userland programs to use I_STR ioctls (by returning 3360 * EINVAL to any transparent ioctl requests). 3361 * 3362 * More importantly, removing ioctls from this list should 3363 * be done with the utmost care, since our STREAMS modules 3364 * and drivers *count* on the stream head performing this 3365 * conversion, and thus may panic while processing 3366 * transparent ioctl request for one of these ioctls (keep 3367 * in mind that third party modules and drivers may have 3368 * similar problems). 3369 */ 3370 if (((cmd & IOCTYPE) == LDIOC) || 3371 ((cmd & IOCTYPE) == tIOC) || 3372 ((cmd & IOCTYPE) == TIOC) || 3373 ((cmd & IOCTYPE) == KIOC) || 3374 ((cmd & IOCTYPE) == MSIOC) || 3375 ((cmd & IOCTYPE) == VUIOC)) { 3376 /* 3377 * The ioctl is a tty ioctl - set up strioc buffer 3378 * and call strdoioctl() to do the work. 3379 */ 3380 if (stp->sd_flag & STRHUP) 3381 return (ENXIO); 3382 strioc.ic_cmd = cmd; 3383 strioc.ic_timout = INFTIM; 3384 3385 switch (cmd) { 3386 3387 case TCXONC: 3388 case TCSBRK: 3389 case TCFLSH: 3390 case TCDSET: 3391 { 3392 int native_arg = (int)arg; 3393 strioc.ic_len = sizeof (int); 3394 strioc.ic_dp = (char *)&native_arg; 3395 return (strdoioctl(stp, &strioc, flag, 3396 K_TO_K, crp, rvalp)); 3397 } 3398 3399 case TCSETA: 3400 case TCSETAW: 3401 case TCSETAF: 3402 strioc.ic_len = sizeof (struct termio); 3403 strioc.ic_dp = (char *)arg; 3404 return (strdoioctl(stp, &strioc, flag, 3405 copyflag, crp, rvalp)); 3406 3407 case TCSETS: 3408 case TCSETSW: 3409 case TCSETSF: 3410 strioc.ic_len = sizeof (struct termios); 3411 strioc.ic_dp = (char *)arg; 3412 return (strdoioctl(stp, &strioc, flag, 3413 copyflag, crp, rvalp)); 3414 3415 case LDSETT: 3416 strioc.ic_len = sizeof (struct termcb); 3417 strioc.ic_dp = (char *)arg; 3418 return (strdoioctl(stp, &strioc, flag, 3419 copyflag, crp, rvalp)); 3420 3421 case TIOCSETP: 3422 strioc.ic_len = sizeof (struct sgttyb); 3423 strioc.ic_dp = (char *)arg; 3424 return (strdoioctl(stp, &strioc, flag, 3425 copyflag, crp, rvalp)); 3426 3427 case TIOCSTI: 3428 if ((flag & FREAD) == 0 && 3429 secpolicy_sti(crp) != 0) { 3430 return (EPERM); 3431 } 3432 mutex_enter(&stp->sd_lock); 3433 mutex_enter(&curproc->p_splock); 3434 if (stp->sd_sidp != curproc->p_sessp->s_sidp && 3435 secpolicy_sti(crp) != 0) { 3436 mutex_exit(&curproc->p_splock); 3437 mutex_exit(&stp->sd_lock); 3438 return (EACCES); 3439 } 3440 mutex_exit(&curproc->p_splock); 3441 mutex_exit(&stp->sd_lock); 3442 3443 strioc.ic_len = sizeof (char); 3444 strioc.ic_dp = (char *)arg; 3445 return (strdoioctl(stp, &strioc, flag, 3446 copyflag, crp, rvalp)); 3447 3448 case TIOCSWINSZ: 3449 strioc.ic_len = sizeof (struct winsize); 3450 strioc.ic_dp = (char *)arg; 3451 return (strdoioctl(stp, &strioc, flag, 3452 copyflag, crp, rvalp)); 3453 3454 case TIOCSSIZE: 3455 strioc.ic_len = sizeof (struct ttysize); 3456 strioc.ic_dp = (char *)arg; 3457 return (strdoioctl(stp, &strioc, flag, 3458 copyflag, crp, rvalp)); 3459 3460 case TIOCSSOFTCAR: 3461 case KIOCTRANS: 3462 case KIOCTRANSABLE: 3463 case KIOCCMD: 3464 case KIOCSDIRECT: 3465 case KIOCSCOMPAT: 3466 case KIOCSKABORTEN: 3467 case KIOCSRPTDELAY: 3468 case KIOCSRPTRATE: 3469 case VUIDSFORMAT: 3470 case TIOCSPPS: 3471 strioc.ic_len = sizeof (int); 3472 strioc.ic_dp = (char *)arg; 3473 return (strdoioctl(stp, &strioc, flag, 3474 copyflag, crp, rvalp)); 3475 3476 case KIOCSETKEY: 3477 case KIOCGETKEY: 3478 strioc.ic_len = sizeof (struct kiockey); 3479 strioc.ic_dp = (char *)arg; 3480 return (strdoioctl(stp, &strioc, flag, 3481 copyflag, crp, rvalp)); 3482 3483 case KIOCSKEY: 3484 case KIOCGKEY: 3485 strioc.ic_len = sizeof (struct kiockeymap); 3486 strioc.ic_dp = (char *)arg; 3487 return (strdoioctl(stp, &strioc, flag, 3488 copyflag, crp, rvalp)); 3489 3490 case KIOCSLED: 3491 /* arg is a pointer to char */ 3492 strioc.ic_len = sizeof (char); 3493 strioc.ic_dp = (char *)arg; 3494 return (strdoioctl(stp, &strioc, flag, 3495 copyflag, crp, rvalp)); 3496 3497 case MSIOSETPARMS: 3498 strioc.ic_len = sizeof (Ms_parms); 3499 strioc.ic_dp = (char *)arg; 3500 return (strdoioctl(stp, &strioc, flag, 3501 copyflag, crp, rvalp)); 3502 3503 case VUIDSADDR: 3504 case VUIDGADDR: 3505 strioc.ic_len = sizeof (struct vuid_addr_probe); 3506 strioc.ic_dp = (char *)arg; 3507 return (strdoioctl(stp, &strioc, flag, 3508 copyflag, crp, rvalp)); 3509 3510 /* 3511 * These M_IOCTL's don't require any data to be sent 3512 * downstream, and the driver will allocate and link 3513 * on its own mblk_t upon M_IOCACK -- thus we set 3514 * ic_len to zero and set ic_dp to arg so we know 3515 * where to copyout to later. 3516 */ 3517 case TIOCGSOFTCAR: 3518 case TIOCGWINSZ: 3519 case TIOCGSIZE: 3520 case KIOCGTRANS: 3521 case KIOCGTRANSABLE: 3522 case KIOCTYPE: 3523 case KIOCGDIRECT: 3524 case KIOCGCOMPAT: 3525 case KIOCLAYOUT: 3526 case KIOCGLED: 3527 case MSIOGETPARMS: 3528 case MSIOBUTTONS: 3529 case VUIDGFORMAT: 3530 case TIOCGPPS: 3531 case TIOCGPPSEV: 3532 case TCGETA: 3533 case TCGETS: 3534 case LDGETT: 3535 case TIOCGETP: 3536 case KIOCGRPTDELAY: 3537 case KIOCGRPTRATE: 3538 strioc.ic_len = 0; 3539 strioc.ic_dp = (char *)arg; 3540 return (strdoioctl(stp, &strioc, flag, 3541 copyflag, crp, rvalp)); 3542 } 3543 } 3544 3545 /* 3546 * Unknown cmd - send it down as a transparent ioctl. 3547 */ 3548 strioc.ic_cmd = cmd; 3549 strioc.ic_timout = INFTIM; 3550 strioc.ic_len = TRANSPARENT; 3551 strioc.ic_dp = (char *)&arg; 3552 3553 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp)); 3554 3555 case I_STR: 3556 /* 3557 * Stream ioctl. Read in an strioctl buffer from the user 3558 * along with any data specified and send it downstream. 3559 * Strdoioctl will wait allow only one ioctl message at 3560 * a time, and waits for the acknowledgement. 3561 */ 3562 3563 if (stp->sd_flag & STRHUP) 3564 return (ENXIO); 3565 3566 error = strcopyin_strioctl((void *)arg, &strioc, flag, 3567 copyflag); 3568 if (error != 0) 3569 return (error); 3570 3571 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1)) 3572 return (EINVAL); 3573 3574 access = job_control_type(strioc.ic_cmd); 3575 mutex_enter(&stp->sd_lock); 3576 if ((access != -1) && 3577 ((error = i_straccess(stp, access)) != 0)) { 3578 mutex_exit(&stp->sd_lock); 3579 return (error); 3580 } 3581 mutex_exit(&stp->sd_lock); 3582 3583 /* 3584 * The I_STR facility provides a trap door for malicious 3585 * code to send down bogus streamio(7I) ioctl commands to 3586 * unsuspecting STREAMS modules and drivers which expect to 3587 * only get these messages from the stream head. 3588 * Explicitly prohibit any streamio ioctls which can be 3589 * passed downstream by the stream head. Note that we do 3590 * not block all streamio ioctls because the ioctl 3591 * numberspace is not well managed and thus it's possible 3592 * that a module or driver's ioctl numbers may accidentally 3593 * collide with them. 3594 */ 3595 switch (strioc.ic_cmd) { 3596 case I_LINK: 3597 case I_PLINK: 3598 case I_UNLINK: 3599 case I_PUNLINK: 3600 case _I_GETPEERCRED: 3601 case _I_PLINK_LH: 3602 return (EINVAL); 3603 } 3604 3605 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp); 3606 if (error == 0) { 3607 error = strcopyout_strioctl(&strioc, (void *)arg, 3608 flag, copyflag); 3609 } 3610 return (error); 3611 3612 case _I_CMD: 3613 /* 3614 * Like I_STR, but without using M_IOC* messages and without 3615 * copyins/copyouts beyond the passed-in argument. 3616 */ 3617 if (stp->sd_flag & STRHUP) 3618 return (ENXIO); 3619 3620 if ((scp = kmem_alloc(sizeof (strcmd_t), KM_NOSLEEP)) == NULL) 3621 return (ENOMEM); 3622 3623 if (copyin((void *)arg, scp, sizeof (strcmd_t))) { 3624 kmem_free(scp, sizeof (strcmd_t)); 3625 return (EFAULT); 3626 } 3627 3628 access = job_control_type(scp->sc_cmd); 3629 mutex_enter(&stp->sd_lock); 3630 if (access != -1 && (error = i_straccess(stp, access)) != 0) { 3631 mutex_exit(&stp->sd_lock); 3632 kmem_free(scp, sizeof (strcmd_t)); 3633 return (error); 3634 } 3635 mutex_exit(&stp->sd_lock); 3636 3637 *rvalp = 0; 3638 if ((error = strdocmd(stp, scp, crp)) == 0) { 3639 if (copyout(scp, (void *)arg, sizeof (strcmd_t))) 3640 error = EFAULT; 3641 } 3642 kmem_free(scp, sizeof (strcmd_t)); 3643 return (error); 3644 3645 case I_NREAD: 3646 /* 3647 * Return number of bytes of data in first message 3648 * in queue in "arg" and return the number of messages 3649 * in queue in return value. 3650 */ 3651 { 3652 size_t size; 3653 int retval; 3654 int count = 0; 3655 3656 mutex_enter(QLOCK(rdq)); 3657 3658 size = msgdsize(rdq->q_first); 3659 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3660 count++; 3661 3662 mutex_exit(QLOCK(rdq)); 3663 if (stp->sd_struiordq) { 3664 infod_t infod; 3665 3666 infod.d_cmd = INFOD_COUNT; 3667 infod.d_count = 0; 3668 if (count == 0) { 3669 infod.d_cmd |= INFOD_FIRSTBYTES; 3670 infod.d_bytes = 0; 3671 } 3672 infod.d_res = 0; 3673 (void) infonext(rdq, &infod); 3674 count += infod.d_count; 3675 if (infod.d_res & INFOD_FIRSTBYTES) 3676 size = infod.d_bytes; 3677 } 3678 3679 /* 3680 * Drop down from size_t to the "int" required by the 3681 * interface. Cap at INT_MAX. 3682 */ 3683 retval = MIN(size, INT_MAX); 3684 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3685 copyflag); 3686 if (!error) 3687 *rvalp = count; 3688 return (error); 3689 } 3690 3691 case FIONREAD: 3692 /* 3693 * Return number of bytes of data in all data messages 3694 * in queue in "arg". 3695 */ 3696 { 3697 size_t size = 0; 3698 int retval; 3699 3700 mutex_enter(QLOCK(rdq)); 3701 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3702 size += msgdsize(mp); 3703 mutex_exit(QLOCK(rdq)); 3704 3705 if (stp->sd_struiordq) { 3706 infod_t infod; 3707 3708 infod.d_cmd = INFOD_BYTES; 3709 infod.d_res = 0; 3710 infod.d_bytes = 0; 3711 (void) infonext(rdq, &infod); 3712 size += infod.d_bytes; 3713 } 3714 3715 /* 3716 * Drop down from size_t to the "int" required by the 3717 * interface. Cap at INT_MAX. 3718 */ 3719 retval = MIN(size, INT_MAX); 3720 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3721 copyflag); 3722 3723 *rvalp = 0; 3724 return (error); 3725 } 3726 case FIORDCHK: 3727 /* 3728 * FIORDCHK does not use arg value (like FIONREAD), 3729 * instead a count is returned. I_NREAD value may 3730 * not be accurate but safe. The real thing to do is 3731 * to add the msgdsizes of all data messages until 3732 * a non-data message. 3733 */ 3734 { 3735 size_t size = 0; 3736 3737 mutex_enter(QLOCK(rdq)); 3738 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3739 size += msgdsize(mp); 3740 mutex_exit(QLOCK(rdq)); 3741 3742 if (stp->sd_struiordq) { 3743 infod_t infod; 3744 3745 infod.d_cmd = INFOD_BYTES; 3746 infod.d_res = 0; 3747 infod.d_bytes = 0; 3748 (void) infonext(rdq, &infod); 3749 size += infod.d_bytes; 3750 } 3751 3752 /* 3753 * Since ioctl returns an int, and memory sizes under 3754 * LP64 may not fit, we return INT_MAX if the count was 3755 * actually greater. 3756 */ 3757 *rvalp = MIN(size, INT_MAX); 3758 return (0); 3759 } 3760 3761 case I_FIND: 3762 /* 3763 * Get module name. 3764 */ 3765 { 3766 char mname[FMNAMESZ + 1]; 3767 queue_t *q; 3768 3769 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3770 mname, FMNAMESZ + 1, NULL); 3771 if (error) 3772 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3773 3774 /* 3775 * Return EINVAL if we're handed a bogus module name. 3776 */ 3777 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) { 3778 TRACE_0(TR_FAC_STREAMS_FR, 3779 TR_I_CANT_FIND, "couldn't I_FIND"); 3780 return (EINVAL); 3781 } 3782 3783 *rvalp = 0; 3784 3785 /* Look downstream to see if module is there. */ 3786 claimstr(stp->sd_wrq); 3787 for (q = stp->sd_wrq->q_next; q; q = q->q_next) { 3788 if (q->q_flag & QREADR) { 3789 q = NULL; 3790 break; 3791 } 3792 if (strcmp(mname, Q2NAME(q)) == 0) 3793 break; 3794 } 3795 releasestr(stp->sd_wrq); 3796 3797 *rvalp = (q ? 1 : 0); 3798 return (error); 3799 } 3800 3801 case I_PUSH: 3802 case __I_PUSH_NOCTTY: 3803 /* 3804 * Push a module. 3805 * For the case __I_PUSH_NOCTTY push a module but 3806 * do not allocate controlling tty. See bugid 4025044 3807 */ 3808 3809 { 3810 char mname[FMNAMESZ + 1]; 3811 fmodsw_impl_t *fp; 3812 dev_t dummydev; 3813 3814 if (stp->sd_flag & STRHUP) 3815 return (ENXIO); 3816 3817 /* 3818 * Get module name and look up in fmodsw. 3819 */ 3820 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3821 mname, FMNAMESZ + 1, NULL); 3822 if (error) 3823 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3824 3825 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) == 3826 NULL) 3827 return (EINVAL); 3828 3829 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH, 3830 "I_PUSH:fp %p stp %p", fp, stp); 3831 3832 if (error = strstartplumb(stp, flag, cmd)) { 3833 fmodsw_rele(fp); 3834 return (error); 3835 } 3836 3837 /* 3838 * See if any more modules can be pushed on this stream. 3839 * Note that this check must be done after strstartplumb() 3840 * since otherwise multiple threads issuing I_PUSHes on 3841 * the same stream will be able to exceed nstrpush. 3842 */ 3843 mutex_enter(&stp->sd_lock); 3844 if (stp->sd_pushcnt >= nstrpush) { 3845 fmodsw_rele(fp); 3846 strendplumb(stp); 3847 mutex_exit(&stp->sd_lock); 3848 return (EINVAL); 3849 } 3850 mutex_exit(&stp->sd_lock); 3851 3852 /* 3853 * Push new module and call its open routine 3854 * via qattach(). Modules don't change device 3855 * numbers, so just ignore dummydev here. 3856 */ 3857 dummydev = vp->v_rdev; 3858 if ((error = qattach(rdq, &dummydev, 0, crp, fp, 3859 B_FALSE)) == 0) { 3860 if (vp->v_type == VCHR && /* sorry, no pipes allowed */ 3861 (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) { 3862 /* 3863 * try to allocate it as a controlling terminal 3864 */ 3865 (void) strctty(stp); 3866 } 3867 } 3868 3869 mutex_enter(&stp->sd_lock); 3870 3871 /* 3872 * As a performance concern we are caching the values of 3873 * q_minpsz and q_maxpsz of the module below the stream 3874 * head in the stream head. 3875 */ 3876 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 3877 rmin = stp->sd_wrq->q_next->q_minpsz; 3878 rmax = stp->sd_wrq->q_next->q_maxpsz; 3879 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 3880 3881 /* Do this processing here as a performance concern */ 3882 if (strmsgsz != 0) { 3883 if (rmax == INFPSZ) 3884 rmax = strmsgsz; 3885 else { 3886 if (vp->v_type == VFIFO) 3887 rmax = MIN(PIPE_BUF, rmax); 3888 else rmax = MIN(strmsgsz, rmax); 3889 } 3890 } 3891 3892 mutex_enter(QLOCK(wrq)); 3893 stp->sd_qn_minpsz = rmin; 3894 stp->sd_qn_maxpsz = rmax; 3895 mutex_exit(QLOCK(wrq)); 3896 3897 strendplumb(stp); 3898 mutex_exit(&stp->sd_lock); 3899 return (error); 3900 } 3901 3902 case I_POP: 3903 { 3904 queue_t *q; 3905 3906 if (stp->sd_flag & STRHUP) 3907 return (ENXIO); 3908 if (!wrq->q_next) /* for broken pipes */ 3909 return (EINVAL); 3910 3911 if (error = strstartplumb(stp, flag, cmd)) 3912 return (error); 3913 3914 /* 3915 * If there is an anchor on this stream and popping 3916 * the current module would attempt to pop through the 3917 * anchor, then disallow the pop unless we have sufficient 3918 * privileges; take the cheapest (non-locking) check 3919 * first. 3920 */ 3921 if (secpolicy_ip_config(crp, B_TRUE) != 0 || 3922 (stp->sd_anchorzone != crgetzoneid(crp))) { 3923 mutex_enter(&stp->sd_lock); 3924 /* 3925 * Anchors only apply if there's at least one 3926 * module on the stream (sd_pushcnt > 0). 3927 */ 3928 if (stp->sd_pushcnt > 0 && 3929 stp->sd_pushcnt == stp->sd_anchor && 3930 stp->sd_vnode->v_type != VFIFO) { 3931 strendplumb(stp); 3932 mutex_exit(&stp->sd_lock); 3933 if (stp->sd_anchorzone != crgetzoneid(crp)) 3934 return (EINVAL); 3935 /* Audit and report error */ 3936 return (secpolicy_ip_config(crp, B_FALSE)); 3937 } 3938 mutex_exit(&stp->sd_lock); 3939 } 3940 3941 q = wrq->q_next; 3942 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP, 3943 "I_POP:%p from %p", q, stp); 3944 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) { 3945 error = EINVAL; 3946 } else { 3947 qdetach(_RD(q), 1, flag, crp, B_FALSE); 3948 error = 0; 3949 } 3950 mutex_enter(&stp->sd_lock); 3951 3952 /* 3953 * As a performance concern we are caching the values of 3954 * q_minpsz and q_maxpsz of the module below the stream 3955 * head in the stream head. 3956 */ 3957 mutex_enter(QLOCK(wrq->q_next)); 3958 rmin = wrq->q_next->q_minpsz; 3959 rmax = wrq->q_next->q_maxpsz; 3960 mutex_exit(QLOCK(wrq->q_next)); 3961 3962 /* Do this processing here as a performance concern */ 3963 if (strmsgsz != 0) { 3964 if (rmax == INFPSZ) 3965 rmax = strmsgsz; 3966 else { 3967 if (vp->v_type == VFIFO) 3968 rmax = MIN(PIPE_BUF, rmax); 3969 else rmax = MIN(strmsgsz, rmax); 3970 } 3971 } 3972 3973 mutex_enter(QLOCK(wrq)); 3974 stp->sd_qn_minpsz = rmin; 3975 stp->sd_qn_maxpsz = rmax; 3976 mutex_exit(QLOCK(wrq)); 3977 3978 /* If we popped through the anchor, then reset the anchor. */ 3979 if (stp->sd_pushcnt < stp->sd_anchor) { 3980 stp->sd_anchor = 0; 3981 stp->sd_anchorzone = 0; 3982 } 3983 strendplumb(stp); 3984 mutex_exit(&stp->sd_lock); 3985 return (error); 3986 } 3987 3988 case _I_MUXID2FD: 3989 { 3990 /* 3991 * Create a fd for a I_PLINK'ed lower stream with a given 3992 * muxid. With the fd, application can send down ioctls, 3993 * like I_LIST, to the previously I_PLINK'ed stream. Note 3994 * that after getting the fd, the application has to do an 3995 * I_PUNLINK on the muxid before it can do any operation 3996 * on the lower stream. This is required by spec1170. 3997 * 3998 * The fd used to do this ioctl should point to the same 3999 * controlling device used to do the I_PLINK. If it uses 4000 * a different stream or an invalid muxid, I_MUXID2FD will 4001 * fail. The error code is set to EINVAL. 4002 * 4003 * The intended use of this interface is the following. 4004 * An application I_PLINK'ed a stream and exits. The fd 4005 * to the lower stream is gone. Another application 4006 * wants to get a fd to the lower stream, it uses I_MUXID2FD. 4007 */ 4008 int muxid = (int)arg; 4009 int fd; 4010 linkinfo_t *linkp; 4011 struct file *fp; 4012 netstack_t *ns; 4013 str_stack_t *ss; 4014 4015 /* 4016 * Do not allow the wildcard muxid. This ioctl is not 4017 * intended to find arbitrary link. 4018 */ 4019 if (muxid == 0) { 4020 return (EINVAL); 4021 } 4022 4023 ns = netstack_find_by_cred(crp); 4024 ASSERT(ns != NULL); 4025 ss = ns->netstack_str; 4026 ASSERT(ss != NULL); 4027 4028 mutex_enter(&muxifier); 4029 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss); 4030 if (linkp == NULL) { 4031 mutex_exit(&muxifier); 4032 netstack_rele(ss->ss_netstack); 4033 return (EINVAL); 4034 } 4035 4036 if ((fd = ufalloc(0)) == -1) { 4037 mutex_exit(&muxifier); 4038 netstack_rele(ss->ss_netstack); 4039 return (EMFILE); 4040 } 4041 fp = linkp->li_fpdown; 4042 mutex_enter(&fp->f_tlock); 4043 fp->f_count++; 4044 mutex_exit(&fp->f_tlock); 4045 mutex_exit(&muxifier); 4046 setf(fd, fp); 4047 *rvalp = fd; 4048 netstack_rele(ss->ss_netstack); 4049 return (0); 4050 } 4051 4052 case _I_INSERT: 4053 { 4054 /* 4055 * To insert a module to a given position in a stream. 4056 * In the first release, only allow privileged user 4057 * to use this ioctl. Furthermore, the insert is only allowed 4058 * below an anchor if the zoneid is the same as the zoneid 4059 * which created the anchor. 4060 * 4061 * Note that we do not plan to support this ioctl 4062 * on pipes in the first release. We want to learn more 4063 * about the implications of these ioctls before extending 4064 * their support. And we do not think these features are 4065 * valuable for pipes. 4066 */ 4067 STRUCT_DECL(strmodconf, strmodinsert); 4068 char mod_name[FMNAMESZ + 1]; 4069 fmodsw_impl_t *fp; 4070 dev_t dummydev; 4071 queue_t *tmp_wrq; 4072 int pos; 4073 boolean_t is_insert; 4074 4075 STRUCT_INIT(strmodinsert, flag); 4076 if (stp->sd_flag & STRHUP) 4077 return (ENXIO); 4078 if (STRMATED(stp)) 4079 return (EINVAL); 4080 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4081 return (error); 4082 if (stp->sd_anchor != 0 && 4083 stp->sd_anchorzone != crgetzoneid(crp)) 4084 return (EINVAL); 4085 4086 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert), 4087 STRUCT_SIZE(strmodinsert), copyflag); 4088 if (error) 4089 return (error); 4090 4091 /* 4092 * Get module name and look up in fmodsw. 4093 */ 4094 error = (copyflag & U_TO_K ? copyinstr : 4095 copystr)(STRUCT_FGETP(strmodinsert, mod_name), 4096 mod_name, FMNAMESZ + 1, NULL); 4097 if (error) 4098 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4099 4100 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) == 4101 NULL) 4102 return (EINVAL); 4103 4104 if (error = strstartplumb(stp, flag, cmd)) { 4105 fmodsw_rele(fp); 4106 return (error); 4107 } 4108 4109 /* 4110 * Is this _I_INSERT just like an I_PUSH? We need to know 4111 * this because we do some optimizations if this is a 4112 * module being pushed. 4113 */ 4114 pos = STRUCT_FGET(strmodinsert, pos); 4115 is_insert = (pos != 0); 4116 4117 /* 4118 * Make sure pos is valid. Even though it is not an I_PUSH, 4119 * we impose the same limit on the number of modules in a 4120 * stream. 4121 */ 4122 mutex_enter(&stp->sd_lock); 4123 if (stp->sd_pushcnt >= nstrpush || pos < 0 || 4124 pos > stp->sd_pushcnt) { 4125 fmodsw_rele(fp); 4126 strendplumb(stp); 4127 mutex_exit(&stp->sd_lock); 4128 return (EINVAL); 4129 } 4130 if (stp->sd_anchor != 0) { 4131 /* 4132 * Is this insert below the anchor? 4133 * Pushcnt hasn't been increased yet hence 4134 * we test for greater than here, and greater or 4135 * equal after qattach. 4136 */ 4137 if (pos > (stp->sd_pushcnt - stp->sd_anchor) && 4138 stp->sd_anchorzone != crgetzoneid(crp)) { 4139 fmodsw_rele(fp); 4140 strendplumb(stp); 4141 mutex_exit(&stp->sd_lock); 4142 return (EPERM); 4143 } 4144 } 4145 4146 mutex_exit(&stp->sd_lock); 4147 4148 /* 4149 * First find the correct position this module to 4150 * be inserted. We don't need to call claimstr() 4151 * as the stream should not be changing at this point. 4152 * 4153 * Insert new module and call its open routine 4154 * via qattach(). Modules don't change device 4155 * numbers, so just ignore dummydev here. 4156 */ 4157 for (tmp_wrq = stp->sd_wrq; pos > 0; 4158 tmp_wrq = tmp_wrq->q_next, pos--) { 4159 ASSERT(SAMESTR(tmp_wrq)); 4160 } 4161 dummydev = vp->v_rdev; 4162 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp, 4163 fp, is_insert)) != 0) { 4164 mutex_enter(&stp->sd_lock); 4165 strendplumb(stp); 4166 mutex_exit(&stp->sd_lock); 4167 return (error); 4168 } 4169 4170 mutex_enter(&stp->sd_lock); 4171 4172 /* 4173 * As a performance concern we are caching the values of 4174 * q_minpsz and q_maxpsz of the module below the stream 4175 * head in the stream head. 4176 */ 4177 if (!is_insert) { 4178 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 4179 rmin = stp->sd_wrq->q_next->q_minpsz; 4180 rmax = stp->sd_wrq->q_next->q_maxpsz; 4181 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 4182 4183 /* Do this processing here as a performance concern */ 4184 if (strmsgsz != 0) { 4185 if (rmax == INFPSZ) { 4186 rmax = strmsgsz; 4187 } else { 4188 rmax = MIN(strmsgsz, rmax); 4189 } 4190 } 4191 4192 mutex_enter(QLOCK(wrq)); 4193 stp->sd_qn_minpsz = rmin; 4194 stp->sd_qn_maxpsz = rmax; 4195 mutex_exit(QLOCK(wrq)); 4196 } 4197 4198 /* 4199 * Need to update the anchor value if this module is 4200 * inserted below the anchor point. 4201 */ 4202 if (stp->sd_anchor != 0) { 4203 pos = STRUCT_FGET(strmodinsert, pos); 4204 if (pos >= (stp->sd_pushcnt - stp->sd_anchor)) 4205 stp->sd_anchor++; 4206 } 4207 4208 strendplumb(stp); 4209 mutex_exit(&stp->sd_lock); 4210 return (0); 4211 } 4212 4213 case _I_REMOVE: 4214 { 4215 /* 4216 * To remove a module with a given name in a stream. The 4217 * caller of this ioctl needs to provide both the name and 4218 * the position of the module to be removed. This eliminates 4219 * the ambiguity of removal if a module is inserted/pushed 4220 * multiple times in a stream. In the first release, only 4221 * allow privileged user to use this ioctl. 4222 * Furthermore, the remove is only allowed 4223 * below an anchor if the zoneid is the same as the zoneid 4224 * which created the anchor. 4225 * 4226 * Note that we do not plan to support this ioctl 4227 * on pipes in the first release. We want to learn more 4228 * about the implications of these ioctls before extending 4229 * their support. And we do not think these features are 4230 * valuable for pipes. 4231 * 4232 * Also note that _I_REMOVE cannot be used to remove a 4233 * driver or the stream head. 4234 */ 4235 STRUCT_DECL(strmodconf, strmodremove); 4236 queue_t *q; 4237 int pos; 4238 char mod_name[FMNAMESZ + 1]; 4239 boolean_t is_remove; 4240 4241 STRUCT_INIT(strmodremove, flag); 4242 if (stp->sd_flag & STRHUP) 4243 return (ENXIO); 4244 if (STRMATED(stp)) 4245 return (EINVAL); 4246 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4247 return (error); 4248 if (stp->sd_anchor != 0 && 4249 stp->sd_anchorzone != crgetzoneid(crp)) 4250 return (EINVAL); 4251 4252 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove), 4253 STRUCT_SIZE(strmodremove), copyflag); 4254 if (error) 4255 return (error); 4256 4257 error = (copyflag & U_TO_K ? copyinstr : 4258 copystr)(STRUCT_FGETP(strmodremove, mod_name), 4259 mod_name, FMNAMESZ + 1, NULL); 4260 if (error) 4261 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4262 4263 if ((error = strstartplumb(stp, flag, cmd)) != 0) 4264 return (error); 4265 4266 /* 4267 * Match the name of given module to the name of module at 4268 * the given position. 4269 */ 4270 pos = STRUCT_FGET(strmodremove, pos); 4271 4272 is_remove = (pos != 0); 4273 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0; 4274 q = q->q_next, pos--) 4275 ; 4276 if (pos > 0 || !SAMESTR(q) || 4277 strcmp(Q2NAME(q), mod_name) != 0) { 4278 mutex_enter(&stp->sd_lock); 4279 strendplumb(stp); 4280 mutex_exit(&stp->sd_lock); 4281 return (EINVAL); 4282 } 4283 4284 /* 4285 * If the position is at or below an anchor, then the zoneid 4286 * must match the zoneid that created the anchor. 4287 */ 4288 if (stp->sd_anchor != 0) { 4289 pos = STRUCT_FGET(strmodremove, pos); 4290 if (pos >= (stp->sd_pushcnt - stp->sd_anchor) && 4291 stp->sd_anchorzone != crgetzoneid(crp)) { 4292 mutex_enter(&stp->sd_lock); 4293 strendplumb(stp); 4294 mutex_exit(&stp->sd_lock); 4295 return (EPERM); 4296 } 4297 } 4298 4299 4300 ASSERT(!(q->q_flag & QREADR)); 4301 qdetach(_RD(q), 1, flag, crp, is_remove); 4302 4303 mutex_enter(&stp->sd_lock); 4304 4305 /* 4306 * As a performance concern we are caching the values of 4307 * q_minpsz and q_maxpsz of the module below the stream 4308 * head in the stream head. 4309 */ 4310 if (!is_remove) { 4311 mutex_enter(QLOCK(wrq->q_next)); 4312 rmin = wrq->q_next->q_minpsz; 4313 rmax = wrq->q_next->q_maxpsz; 4314 mutex_exit(QLOCK(wrq->q_next)); 4315 4316 /* Do this processing here as a performance concern */ 4317 if (strmsgsz != 0) { 4318 if (rmax == INFPSZ) 4319 rmax = strmsgsz; 4320 else { 4321 if (vp->v_type == VFIFO) 4322 rmax = MIN(PIPE_BUF, rmax); 4323 else rmax = MIN(strmsgsz, rmax); 4324 } 4325 } 4326 4327 mutex_enter(QLOCK(wrq)); 4328 stp->sd_qn_minpsz = rmin; 4329 stp->sd_qn_maxpsz = rmax; 4330 mutex_exit(QLOCK(wrq)); 4331 } 4332 4333 /* 4334 * Need to update the anchor value if this module is removed 4335 * at or below the anchor point. If the removed module is at 4336 * the anchor point, remove the anchor for this stream if 4337 * there is no module above the anchor point. Otherwise, if 4338 * the removed module is below the anchor point, decrement the 4339 * anchor point by 1. 4340 */ 4341 if (stp->sd_anchor != 0) { 4342 pos = STRUCT_FGET(strmodremove, pos); 4343 if (pos == stp->sd_pushcnt - stp->sd_anchor + 1) 4344 stp->sd_anchor = 0; 4345 else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1)) 4346 stp->sd_anchor--; 4347 } 4348 4349 strendplumb(stp); 4350 mutex_exit(&stp->sd_lock); 4351 return (0); 4352 } 4353 4354 case I_ANCHOR: 4355 /* 4356 * Set the anchor position on the stream to reside at 4357 * the top module (in other words, the top module 4358 * cannot be popped). Anchors with a FIFO make no 4359 * obvious sense, so they're not allowed. 4360 */ 4361 mutex_enter(&stp->sd_lock); 4362 4363 if (stp->sd_vnode->v_type == VFIFO) { 4364 mutex_exit(&stp->sd_lock); 4365 return (EINVAL); 4366 } 4367 /* Only allow the same zoneid to update the anchor */ 4368 if (stp->sd_anchor != 0 && 4369 stp->sd_anchorzone != crgetzoneid(crp)) { 4370 mutex_exit(&stp->sd_lock); 4371 return (EINVAL); 4372 } 4373 stp->sd_anchor = stp->sd_pushcnt; 4374 stp->sd_anchorzone = crgetzoneid(crp); 4375 mutex_exit(&stp->sd_lock); 4376 return (0); 4377 4378 case I_LOOK: 4379 /* 4380 * Get name of first module downstream. 4381 * If no module, return an error. 4382 */ 4383 claimstr(wrq); 4384 if (_SAMESTR(wrq) && wrq->q_next->q_next != NULL) { 4385 char *name = Q2NAME(wrq->q_next); 4386 4387 error = strcopyout(name, (void *)arg, strlen(name) + 1, 4388 copyflag); 4389 releasestr(wrq); 4390 return (error); 4391 } 4392 releasestr(wrq); 4393 return (EINVAL); 4394 4395 case I_LINK: 4396 case I_PLINK: 4397 /* 4398 * Link a multiplexor. 4399 */ 4400 return (mlink(vp, cmd, (int)arg, crp, rvalp, 0)); 4401 4402 case _I_PLINK_LH: 4403 /* 4404 * Link a multiplexor: Call must originate from kernel. 4405 */ 4406 if (kioctl) 4407 return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp)); 4408 4409 return (EINVAL); 4410 case I_UNLINK: 4411 case I_PUNLINK: 4412 /* 4413 * Unlink a multiplexor. 4414 * If arg is -1, unlink all links for which this is the 4415 * controlling stream. Otherwise, arg is an index number 4416 * for a link to be removed. 4417 */ 4418 { 4419 struct linkinfo *linkp; 4420 int native_arg = (int)arg; 4421 int type; 4422 netstack_t *ns; 4423 str_stack_t *ss; 4424 4425 TRACE_1(TR_FAC_STREAMS_FR, 4426 TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp); 4427 if (vp->v_type == VFIFO) { 4428 return (EINVAL); 4429 } 4430 if (cmd == I_UNLINK) 4431 type = LINKNORMAL; 4432 else /* I_PUNLINK */ 4433 type = LINKPERSIST; 4434 if (native_arg == 0) { 4435 return (EINVAL); 4436 } 4437 ns = netstack_find_by_cred(crp); 4438 ASSERT(ns != NULL); 4439 ss = ns->netstack_str; 4440 ASSERT(ss != NULL); 4441 4442 if (native_arg == MUXID_ALL) 4443 error = munlinkall(stp, type, crp, rvalp, ss); 4444 else { 4445 mutex_enter(&muxifier); 4446 if (!(linkp = findlinks(stp, (int)arg, type, ss))) { 4447 /* invalid user supplied index number */ 4448 mutex_exit(&muxifier); 4449 netstack_rele(ss->ss_netstack); 4450 return (EINVAL); 4451 } 4452 /* munlink drops the muxifier lock */ 4453 error = munlink(stp, linkp, type, crp, rvalp, ss); 4454 } 4455 netstack_rele(ss->ss_netstack); 4456 return (error); 4457 } 4458 4459 case I_FLUSH: 4460 /* 4461 * send a flush message downstream 4462 * flush message can indicate 4463 * FLUSHR - flush read queue 4464 * FLUSHW - flush write queue 4465 * FLUSHRW - flush read/write queue 4466 */ 4467 if (stp->sd_flag & STRHUP) 4468 return (ENXIO); 4469 if (arg & ~FLUSHRW) 4470 return (EINVAL); 4471 4472 for (;;) { 4473 if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) { 4474 break; 4475 } 4476 if (error = strwaitbuf(1, BPRI_HI)) { 4477 return (error); 4478 } 4479 } 4480 4481 /* 4482 * Send down an unsupported ioctl and wait for the nack 4483 * in order to allow the M_FLUSH to propagate back 4484 * up to the stream head. 4485 * Replaces if (qready()) runqueues(); 4486 */ 4487 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4488 strioc.ic_timout = 0; 4489 strioc.ic_len = 0; 4490 strioc.ic_dp = NULL; 4491 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4492 *rvalp = 0; 4493 return (0); 4494 4495 case I_FLUSHBAND: 4496 { 4497 struct bandinfo binfo; 4498 4499 error = strcopyin((void *)arg, &binfo, sizeof (binfo), 4500 copyflag); 4501 if (error) 4502 return (error); 4503 if (stp->sd_flag & STRHUP) 4504 return (ENXIO); 4505 if (binfo.bi_flag & ~FLUSHRW) 4506 return (EINVAL); 4507 while (!(mp = allocb(2, BPRI_HI))) { 4508 if (error = strwaitbuf(2, BPRI_HI)) 4509 return (error); 4510 } 4511 mp->b_datap->db_type = M_FLUSH; 4512 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND; 4513 *mp->b_wptr++ = binfo.bi_pri; 4514 putnext(stp->sd_wrq, mp); 4515 /* 4516 * Send down an unsupported ioctl and wait for the nack 4517 * in order to allow the M_FLUSH to propagate back 4518 * up to the stream head. 4519 * Replaces if (qready()) runqueues(); 4520 */ 4521 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4522 strioc.ic_timout = 0; 4523 strioc.ic_len = 0; 4524 strioc.ic_dp = NULL; 4525 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4526 *rvalp = 0; 4527 return (0); 4528 } 4529 4530 case I_SRDOPT: 4531 /* 4532 * Set read options 4533 * 4534 * RNORM - default stream mode 4535 * RMSGN - message no discard 4536 * RMSGD - message discard 4537 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs 4538 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs 4539 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs 4540 */ 4541 if (arg & ~(RMODEMASK | RPROTMASK)) 4542 return (EINVAL); 4543 4544 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN)) 4545 return (EINVAL); 4546 4547 mutex_enter(&stp->sd_lock); 4548 switch (arg & RMODEMASK) { 4549 case RNORM: 4550 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 4551 break; 4552 case RMSGD: 4553 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) | 4554 RD_MSGDIS; 4555 break; 4556 case RMSGN: 4557 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) | 4558 RD_MSGNODIS; 4559 break; 4560 } 4561 4562 switch (arg & RPROTMASK) { 4563 case RPROTNORM: 4564 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 4565 break; 4566 4567 case RPROTDAT: 4568 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) | 4569 RD_PROTDAT); 4570 break; 4571 4572 case RPROTDIS: 4573 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) | 4574 RD_PROTDIS); 4575 break; 4576 } 4577 mutex_exit(&stp->sd_lock); 4578 return (0); 4579 4580 case I_GRDOPT: 4581 /* 4582 * Get read option and return the value 4583 * to spot pointed to by arg 4584 */ 4585 { 4586 int rdopt; 4587 4588 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD : 4589 ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM)); 4590 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT : 4591 ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM)); 4592 4593 return (strcopyout(&rdopt, (void *)arg, sizeof (int), 4594 copyflag)); 4595 } 4596 4597 case I_SERROPT: 4598 /* 4599 * Set error options 4600 * 4601 * RERRNORM - persistent read errors 4602 * RERRNONPERSIST - non-persistent read errors 4603 * WERRNORM - persistent write errors 4604 * WERRNONPERSIST - non-persistent write errors 4605 */ 4606 if (arg & ~(RERRMASK | WERRMASK)) 4607 return (EINVAL); 4608 4609 mutex_enter(&stp->sd_lock); 4610 switch (arg & RERRMASK) { 4611 case RERRNORM: 4612 stp->sd_flag &= ~STRDERRNONPERSIST; 4613 break; 4614 case RERRNONPERSIST: 4615 stp->sd_flag |= STRDERRNONPERSIST; 4616 break; 4617 } 4618 switch (arg & WERRMASK) { 4619 case WERRNORM: 4620 stp->sd_flag &= ~STWRERRNONPERSIST; 4621 break; 4622 case WERRNONPERSIST: 4623 stp->sd_flag |= STWRERRNONPERSIST; 4624 break; 4625 } 4626 mutex_exit(&stp->sd_lock); 4627 return (0); 4628 4629 case I_GERROPT: 4630 /* 4631 * Get error option and return the value 4632 * to spot pointed to by arg 4633 */ 4634 { 4635 int erropt = 0; 4636 4637 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST : 4638 RERRNORM; 4639 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST : 4640 WERRNORM; 4641 return (strcopyout(&erropt, (void *)arg, sizeof (int), 4642 copyflag)); 4643 } 4644 4645 case I_SETSIG: 4646 /* 4647 * Register the calling proc to receive the SIGPOLL 4648 * signal based on the events given in arg. If 4649 * arg is zero, remove the proc from register list. 4650 */ 4651 { 4652 strsig_t *ssp, *pssp; 4653 struct pid *pidp; 4654 4655 pssp = NULL; 4656 pidp = curproc->p_pidp; 4657 /* 4658 * Hold sd_lock to prevent traversal of sd_siglist while 4659 * it is modified. 4660 */ 4661 mutex_enter(&stp->sd_lock); 4662 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp); 4663 pssp = ssp, ssp = ssp->ss_next) 4664 ; 4665 4666 if (arg) { 4667 if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4668 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4669 mutex_exit(&stp->sd_lock); 4670 return (EINVAL); 4671 } 4672 if ((arg & S_BANDURG) && !(arg & S_RDBAND)) { 4673 mutex_exit(&stp->sd_lock); 4674 return (EINVAL); 4675 } 4676 4677 /* 4678 * If proc not already registered, add it 4679 * to list. 4680 */ 4681 if (!ssp) { 4682 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4683 ssp->ss_pidp = pidp; 4684 ssp->ss_pid = pidp->pid_id; 4685 ssp->ss_next = NULL; 4686 if (pssp) 4687 pssp->ss_next = ssp; 4688 else 4689 stp->sd_siglist = ssp; 4690 mutex_enter(&pidlock); 4691 PID_HOLD(pidp); 4692 mutex_exit(&pidlock); 4693 } 4694 4695 /* 4696 * Set events. 4697 */ 4698 ssp->ss_events = (int)arg; 4699 } else { 4700 /* 4701 * Remove proc from register list. 4702 */ 4703 if (ssp) { 4704 mutex_enter(&pidlock); 4705 PID_RELE(pidp); 4706 mutex_exit(&pidlock); 4707 if (pssp) 4708 pssp->ss_next = ssp->ss_next; 4709 else 4710 stp->sd_siglist = ssp->ss_next; 4711 kmem_free(ssp, sizeof (strsig_t)); 4712 } else { 4713 mutex_exit(&stp->sd_lock); 4714 return (EINVAL); 4715 } 4716 } 4717 4718 /* 4719 * Recalculate OR of sig events. 4720 */ 4721 stp->sd_sigflags = 0; 4722 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4723 stp->sd_sigflags |= ssp->ss_events; 4724 mutex_exit(&stp->sd_lock); 4725 return (0); 4726 } 4727 4728 case I_GETSIG: 4729 /* 4730 * Return (in arg) the current registration of events 4731 * for which the calling proc is to be signaled. 4732 */ 4733 { 4734 struct strsig *ssp; 4735 struct pid *pidp; 4736 4737 pidp = curproc->p_pidp; 4738 mutex_enter(&stp->sd_lock); 4739 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4740 if (ssp->ss_pidp == pidp) { 4741 error = strcopyout(&ssp->ss_events, (void *)arg, 4742 sizeof (int), copyflag); 4743 mutex_exit(&stp->sd_lock); 4744 return (error); 4745 } 4746 mutex_exit(&stp->sd_lock); 4747 return (EINVAL); 4748 } 4749 4750 case I_ESETSIG: 4751 /* 4752 * Register the ss_pid to receive the SIGPOLL 4753 * signal based on the events is ss_events arg. If 4754 * ss_events is zero, remove the proc from register list. 4755 */ 4756 { 4757 struct strsig *ssp, *pssp; 4758 struct proc *proc; 4759 struct pid *pidp; 4760 pid_t pid; 4761 struct strsigset ss; 4762 4763 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4764 if (error) 4765 return (error); 4766 4767 pid = ss.ss_pid; 4768 4769 if (ss.ss_events != 0) { 4770 /* 4771 * Permissions check by sending signal 0. 4772 * Note that when kill fails it does a set_errno 4773 * causing the system call to fail. 4774 */ 4775 error = kill(pid, 0); 4776 if (error) { 4777 return (error); 4778 } 4779 } 4780 mutex_enter(&pidlock); 4781 if (pid == 0) 4782 proc = curproc; 4783 else if (pid < 0) 4784 proc = pgfind(-pid); 4785 else 4786 proc = prfind(pid); 4787 if (proc == NULL) { 4788 mutex_exit(&pidlock); 4789 return (ESRCH); 4790 } 4791 if (pid < 0) 4792 pidp = proc->p_pgidp; 4793 else 4794 pidp = proc->p_pidp; 4795 ASSERT(pidp); 4796 /* 4797 * Get a hold on the pid structure while referencing it. 4798 * There is a separate PID_HOLD should it be inserted 4799 * in the list below. 4800 */ 4801 PID_HOLD(pidp); 4802 mutex_exit(&pidlock); 4803 4804 pssp = NULL; 4805 /* 4806 * Hold sd_lock to prevent traversal of sd_siglist while 4807 * it is modified. 4808 */ 4809 mutex_enter(&stp->sd_lock); 4810 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid); 4811 pssp = ssp, ssp = ssp->ss_next) 4812 ; 4813 4814 if (ss.ss_events) { 4815 if (ss.ss_events & 4816 ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4817 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4818 mutex_exit(&stp->sd_lock); 4819 mutex_enter(&pidlock); 4820 PID_RELE(pidp); 4821 mutex_exit(&pidlock); 4822 return (EINVAL); 4823 } 4824 if ((ss.ss_events & S_BANDURG) && 4825 !(ss.ss_events & S_RDBAND)) { 4826 mutex_exit(&stp->sd_lock); 4827 mutex_enter(&pidlock); 4828 PID_RELE(pidp); 4829 mutex_exit(&pidlock); 4830 return (EINVAL); 4831 } 4832 4833 /* 4834 * If proc not already registered, add it 4835 * to list. 4836 */ 4837 if (!ssp) { 4838 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4839 ssp->ss_pidp = pidp; 4840 ssp->ss_pid = pid; 4841 ssp->ss_next = NULL; 4842 if (pssp) 4843 pssp->ss_next = ssp; 4844 else 4845 stp->sd_siglist = ssp; 4846 mutex_enter(&pidlock); 4847 PID_HOLD(pidp); 4848 mutex_exit(&pidlock); 4849 } 4850 4851 /* 4852 * Set events. 4853 */ 4854 ssp->ss_events = ss.ss_events; 4855 } else { 4856 /* 4857 * Remove proc from register list. 4858 */ 4859 if (ssp) { 4860 mutex_enter(&pidlock); 4861 PID_RELE(pidp); 4862 mutex_exit(&pidlock); 4863 if (pssp) 4864 pssp->ss_next = ssp->ss_next; 4865 else 4866 stp->sd_siglist = ssp->ss_next; 4867 kmem_free(ssp, sizeof (strsig_t)); 4868 } else { 4869 mutex_exit(&stp->sd_lock); 4870 mutex_enter(&pidlock); 4871 PID_RELE(pidp); 4872 mutex_exit(&pidlock); 4873 return (EINVAL); 4874 } 4875 } 4876 4877 /* 4878 * Recalculate OR of sig events. 4879 */ 4880 stp->sd_sigflags = 0; 4881 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4882 stp->sd_sigflags |= ssp->ss_events; 4883 mutex_exit(&stp->sd_lock); 4884 mutex_enter(&pidlock); 4885 PID_RELE(pidp); 4886 mutex_exit(&pidlock); 4887 return (0); 4888 } 4889 4890 case I_EGETSIG: 4891 /* 4892 * Return (in arg) the current registration of events 4893 * for which the calling proc is to be signaled. 4894 */ 4895 { 4896 struct strsig *ssp; 4897 struct proc *proc; 4898 pid_t pid; 4899 struct pid *pidp; 4900 struct strsigset ss; 4901 4902 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4903 if (error) 4904 return (error); 4905 4906 pid = ss.ss_pid; 4907 mutex_enter(&pidlock); 4908 if (pid == 0) 4909 proc = curproc; 4910 else if (pid < 0) 4911 proc = pgfind(-pid); 4912 else 4913 proc = prfind(pid); 4914 if (proc == NULL) { 4915 mutex_exit(&pidlock); 4916 return (ESRCH); 4917 } 4918 if (pid < 0) 4919 pidp = proc->p_pgidp; 4920 else 4921 pidp = proc->p_pidp; 4922 4923 /* Prevent the pidp from being reassigned */ 4924 PID_HOLD(pidp); 4925 mutex_exit(&pidlock); 4926 4927 mutex_enter(&stp->sd_lock); 4928 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4929 if (ssp->ss_pid == pid) { 4930 ss.ss_pid = ssp->ss_pid; 4931 ss.ss_events = ssp->ss_events; 4932 error = strcopyout(&ss, (void *)arg, 4933 sizeof (struct strsigset), copyflag); 4934 mutex_exit(&stp->sd_lock); 4935 mutex_enter(&pidlock); 4936 PID_RELE(pidp); 4937 mutex_exit(&pidlock); 4938 return (error); 4939 } 4940 mutex_exit(&stp->sd_lock); 4941 mutex_enter(&pidlock); 4942 PID_RELE(pidp); 4943 mutex_exit(&pidlock); 4944 return (EINVAL); 4945 } 4946 4947 case I_PEEK: 4948 { 4949 STRUCT_DECL(strpeek, strpeek); 4950 size_t n; 4951 mblk_t *fmp, *tmp_mp = NULL; 4952 4953 STRUCT_INIT(strpeek, flag); 4954 4955 error = strcopyin((void *)arg, STRUCT_BUF(strpeek), 4956 STRUCT_SIZE(strpeek), copyflag); 4957 if (error) 4958 return (error); 4959 4960 mutex_enter(QLOCK(rdq)); 4961 /* 4962 * Skip the invalid messages 4963 */ 4964 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 4965 if (mp->b_datap->db_type != M_SIG) 4966 break; 4967 4968 /* 4969 * If user has requested to peek at a high priority message 4970 * and first message is not, return 0 4971 */ 4972 if (mp != NULL) { 4973 if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) && 4974 queclass(mp) == QNORM) { 4975 *rvalp = 0; 4976 mutex_exit(QLOCK(rdq)); 4977 return (0); 4978 } 4979 } else if (stp->sd_struiordq == NULL || 4980 (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) { 4981 /* 4982 * No mblks to look at at the streamhead and 4983 * 1). This isn't a synch stream or 4984 * 2). This is a synch stream but caller wants high 4985 * priority messages which is not supported by 4986 * the synch stream. (it only supports QNORM) 4987 */ 4988 *rvalp = 0; 4989 mutex_exit(QLOCK(rdq)); 4990 return (0); 4991 } 4992 4993 fmp = mp; 4994 4995 if (mp && mp->b_datap->db_type == M_PASSFP) { 4996 mutex_exit(QLOCK(rdq)); 4997 return (EBADMSG); 4998 } 4999 5000 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO || 5001 mp->b_datap->db_type == M_PROTO || 5002 mp->b_datap->db_type == M_DATA); 5003 5004 if (mp && mp->b_datap->db_type == M_PCPROTO) { 5005 STRUCT_FSET(strpeek, flags, RS_HIPRI); 5006 } else { 5007 STRUCT_FSET(strpeek, flags, 0); 5008 } 5009 5010 5011 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) { 5012 mutex_exit(QLOCK(rdq)); 5013 return (ENOSR); 5014 } 5015 mutex_exit(QLOCK(rdq)); 5016 5017 /* 5018 * set mp = tmp_mp, so that I_PEEK processing can continue. 5019 * tmp_mp is used to free the dup'd message. 5020 */ 5021 mp = tmp_mp; 5022 5023 uio.uio_fmode = 0; 5024 uio.uio_extflg = UIO_COPY_CACHED; 5025 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 5026 UIO_SYSSPACE; 5027 uio.uio_limit = 0; 5028 /* 5029 * First process PROTO blocks, if any. 5030 * If user doesn't want to get ctl info by setting maxlen <= 0, 5031 * then set len to -1/0 and skip control blocks part. 5032 */ 5033 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0) 5034 STRUCT_FSET(strpeek, ctlbuf.len, -1); 5035 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0) 5036 STRUCT_FSET(strpeek, ctlbuf.len, 0); 5037 else { 5038 int ctl_part = 0; 5039 5040 iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf); 5041 iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen); 5042 uio.uio_iov = &iov; 5043 uio.uio_resid = iov.iov_len; 5044 uio.uio_loffset = 0; 5045 uio.uio_iovcnt = 1; 5046 while (mp && mp->b_datap->db_type != M_DATA && 5047 uio.uio_resid >= 0) { 5048 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ? 5049 mp->b_datap->db_type == M_PROTO : 5050 mp->b_datap->db_type == M_PCPROTO); 5051 5052 if ((n = MIN(uio.uio_resid, 5053 mp->b_wptr - mp->b_rptr)) != 0 && 5054 (error = uiomove((char *)mp->b_rptr, n, 5055 UIO_READ, &uio)) != 0) { 5056 freemsg(tmp_mp); 5057 return (error); 5058 } 5059 ctl_part = 1; 5060 mp = mp->b_cont; 5061 } 5062 /* No ctl message */ 5063 if (ctl_part == 0) 5064 STRUCT_FSET(strpeek, ctlbuf.len, -1); 5065 else 5066 STRUCT_FSET(strpeek, ctlbuf.len, 5067 STRUCT_FGET(strpeek, ctlbuf.maxlen) - 5068 uio.uio_resid); 5069 } 5070 5071 /* 5072 * Now process DATA blocks, if any. 5073 * If user doesn't want to get data info by setting maxlen <= 0, 5074 * then set len to -1/0 and skip data blocks part. 5075 */ 5076 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0) 5077 STRUCT_FSET(strpeek, databuf.len, -1); 5078 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0) 5079 STRUCT_FSET(strpeek, databuf.len, 0); 5080 else { 5081 int data_part = 0; 5082 5083 iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf); 5084 iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen); 5085 uio.uio_iov = &iov; 5086 uio.uio_resid = iov.iov_len; 5087 uio.uio_loffset = 0; 5088 uio.uio_iovcnt = 1; 5089 while (mp && uio.uio_resid) { 5090 if (mp->b_datap->db_type == M_DATA) { 5091 if ((n = MIN(uio.uio_resid, 5092 mp->b_wptr - mp->b_rptr)) != 0 && 5093 (error = uiomove((char *)mp->b_rptr, 5094 n, UIO_READ, &uio)) != 0) { 5095 freemsg(tmp_mp); 5096 return (error); 5097 } 5098 data_part = 1; 5099 } 5100 ASSERT(data_part == 0 || 5101 mp->b_datap->db_type == M_DATA); 5102 mp = mp->b_cont; 5103 } 5104 /* No data message */ 5105 if (data_part == 0) 5106 STRUCT_FSET(strpeek, databuf.len, -1); 5107 else 5108 STRUCT_FSET(strpeek, databuf.len, 5109 STRUCT_FGET(strpeek, databuf.maxlen) - 5110 uio.uio_resid); 5111 } 5112 freemsg(tmp_mp); 5113 5114 /* 5115 * It is a synch stream and user wants to get 5116 * data (maxlen > 0). 5117 * uio setup is done by the codes that process DATA 5118 * blocks above. 5119 */ 5120 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) { 5121 infod_t infod; 5122 5123 infod.d_cmd = INFOD_COPYOUT; 5124 infod.d_res = 0; 5125 infod.d_uiop = &uio; 5126 error = infonext(rdq, &infod); 5127 if (error == EINVAL || error == EBUSY) 5128 error = 0; 5129 if (error) 5130 return (error); 5131 STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek, 5132 databuf.maxlen) - uio.uio_resid); 5133 if (STRUCT_FGET(strpeek, databuf.len) == 0) { 5134 /* 5135 * No data found by the infonext(). 5136 */ 5137 STRUCT_FSET(strpeek, databuf.len, -1); 5138 } 5139 } 5140 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg, 5141 STRUCT_SIZE(strpeek), copyflag); 5142 if (error) { 5143 return (error); 5144 } 5145 /* 5146 * If there is no message retrieved, set return code to 0 5147 * otherwise, set it to 1. 5148 */ 5149 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 && 5150 STRUCT_FGET(strpeek, databuf.len) == -1) 5151 *rvalp = 0; 5152 else 5153 *rvalp = 1; 5154 return (0); 5155 } 5156 5157 case I_FDINSERT: 5158 { 5159 STRUCT_DECL(strfdinsert, strfdinsert); 5160 struct file *resftp; 5161 struct stdata *resstp; 5162 t_uscalar_t ival; 5163 ssize_t msgsize; 5164 struct strbuf mctl; 5165 5166 STRUCT_INIT(strfdinsert, flag); 5167 if (stp->sd_flag & STRHUP) 5168 return (ENXIO); 5169 /* 5170 * STRDERR, STWRERR and STPLEX tested above. 5171 */ 5172 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert), 5173 STRUCT_SIZE(strfdinsert), copyflag); 5174 if (error) 5175 return (error); 5176 5177 if (STRUCT_FGET(strfdinsert, offset) < 0 || 5178 (STRUCT_FGET(strfdinsert, offset) % 5179 sizeof (t_uscalar_t)) != 0) 5180 return (EINVAL); 5181 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) { 5182 if ((resstp = resftp->f_vnode->v_stream) == NULL) { 5183 releasef(STRUCT_FGET(strfdinsert, fildes)); 5184 return (EINVAL); 5185 } 5186 } else 5187 return (EINVAL); 5188 5189 mutex_enter(&resstp->sd_lock); 5190 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) { 5191 error = strgeterr(resstp, 5192 STRDERR|STWRERR|STRHUP|STPLEX, 0); 5193 if (error != 0) { 5194 mutex_exit(&resstp->sd_lock); 5195 releasef(STRUCT_FGET(strfdinsert, fildes)); 5196 return (error); 5197 } 5198 } 5199 mutex_exit(&resstp->sd_lock); 5200 5201 #ifdef _ILP32 5202 { 5203 queue_t *q; 5204 queue_t *mate = NULL; 5205 5206 /* get read queue of stream terminus */ 5207 claimstr(resstp->sd_wrq); 5208 for (q = resstp->sd_wrq->q_next; q->q_next != NULL; 5209 q = q->q_next) 5210 if (!STRMATED(resstp) && STREAM(q) != resstp && 5211 mate == NULL) { 5212 ASSERT(q->q_qinfo->qi_srvp); 5213 ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp); 5214 claimstr(q); 5215 mate = q; 5216 } 5217 q = _RD(q); 5218 if (mate) 5219 releasestr(mate); 5220 releasestr(resstp->sd_wrq); 5221 ival = (t_uscalar_t)q; 5222 } 5223 #else 5224 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev); 5225 #endif /* _ILP32 */ 5226 5227 if (STRUCT_FGET(strfdinsert, ctlbuf.len) < 5228 STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) { 5229 releasef(STRUCT_FGET(strfdinsert, fildes)); 5230 return (EINVAL); 5231 } 5232 5233 /* 5234 * Check for legal flag value. 5235 */ 5236 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) { 5237 releasef(STRUCT_FGET(strfdinsert, fildes)); 5238 return (EINVAL); 5239 } 5240 5241 /* get these values from those cached in the stream head */ 5242 mutex_enter(QLOCK(stp->sd_wrq)); 5243 rmin = stp->sd_qn_minpsz; 5244 rmax = stp->sd_qn_maxpsz; 5245 mutex_exit(QLOCK(stp->sd_wrq)); 5246 5247 /* 5248 * Make sure ctl and data sizes together fall within 5249 * the limits of the max and min receive packet sizes 5250 * and do not exceed system limit. A negative data 5251 * length means that no data part is to be sent. 5252 */ 5253 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 5254 if (rmax == 0) { 5255 releasef(STRUCT_FGET(strfdinsert, fildes)); 5256 return (ERANGE); 5257 } 5258 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0) 5259 msgsize = 0; 5260 if ((msgsize < rmin) || 5261 ((msgsize > rmax) && (rmax != INFPSZ)) || 5262 (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) { 5263 releasef(STRUCT_FGET(strfdinsert, fildes)); 5264 return (ERANGE); 5265 } 5266 5267 mutex_enter(&stp->sd_lock); 5268 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) && 5269 !canputnext(stp->sd_wrq)) { 5270 if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, 5271 flag, -1, &done)) != 0 || done) { 5272 mutex_exit(&stp->sd_lock); 5273 releasef(STRUCT_FGET(strfdinsert, fildes)); 5274 return (error); 5275 } 5276 if ((error = i_straccess(stp, access)) != 0) { 5277 mutex_exit(&stp->sd_lock); 5278 releasef( 5279 STRUCT_FGET(strfdinsert, fildes)); 5280 return (error); 5281 } 5282 } 5283 mutex_exit(&stp->sd_lock); 5284 5285 /* 5286 * Copy strfdinsert.ctlbuf into native form of 5287 * ctlbuf to pass down into strmakemsg(). 5288 */ 5289 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen); 5290 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len); 5291 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf); 5292 5293 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf); 5294 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len); 5295 uio.uio_iov = &iov; 5296 uio.uio_iovcnt = 1; 5297 uio.uio_loffset = 0; 5298 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 5299 UIO_SYSSPACE; 5300 uio.uio_fmode = 0; 5301 uio.uio_extflg = UIO_COPY_CACHED; 5302 uio.uio_resid = iov.iov_len; 5303 if ((error = strmakemsg(&mctl, 5304 &msgsize, &uio, stp, 5305 STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) { 5306 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5307 releasef(STRUCT_FGET(strfdinsert, fildes)); 5308 return (error); 5309 } 5310 5311 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5312 5313 /* 5314 * Place the possibly reencoded queue pointer 'offset' bytes 5315 * from the start of the control portion of the message. 5316 */ 5317 *((t_uscalar_t *)(mp->b_rptr + 5318 STRUCT_FGET(strfdinsert, offset))) = ival; 5319 5320 /* 5321 * Put message downstream. 5322 */ 5323 stream_willservice(stp); 5324 putnext(stp->sd_wrq, mp); 5325 stream_runservice(stp); 5326 releasef(STRUCT_FGET(strfdinsert, fildes)); 5327 return (error); 5328 } 5329 5330 case I_SENDFD: 5331 { 5332 struct file *fp; 5333 5334 if ((fp = getf((int)arg)) == NULL) 5335 return (EBADF); 5336 error = do_sendfp(stp, fp, crp); 5337 if (audit_active) { 5338 audit_fdsend((int)arg, fp, error); 5339 } 5340 releasef((int)arg); 5341 return (error); 5342 } 5343 5344 case I_RECVFD: 5345 case I_E_RECVFD: 5346 { 5347 struct k_strrecvfd *srf; 5348 int i, fd; 5349 5350 mutex_enter(&stp->sd_lock); 5351 while (!(mp = getq(rdq))) { 5352 if (stp->sd_flag & (STRHUP|STREOF)) { 5353 mutex_exit(&stp->sd_lock); 5354 return (ENXIO); 5355 } 5356 if ((error = strwaitq(stp, GETWAIT, (ssize_t)0, 5357 flag, -1, &done)) != 0 || done) { 5358 mutex_exit(&stp->sd_lock); 5359 return (error); 5360 } 5361 if ((error = i_straccess(stp, access)) != 0) { 5362 mutex_exit(&stp->sd_lock); 5363 return (error); 5364 } 5365 } 5366 if (mp->b_datap->db_type != M_PASSFP) { 5367 putback(stp, rdq, mp, mp->b_band); 5368 mutex_exit(&stp->sd_lock); 5369 return (EBADMSG); 5370 } 5371 mutex_exit(&stp->sd_lock); 5372 5373 srf = (struct k_strrecvfd *)mp->b_rptr; 5374 if ((fd = ufalloc(0)) == -1) { 5375 mutex_enter(&stp->sd_lock); 5376 putback(stp, rdq, mp, mp->b_band); 5377 mutex_exit(&stp->sd_lock); 5378 return (EMFILE); 5379 } 5380 if (cmd == I_RECVFD) { 5381 struct o_strrecvfd ostrfd; 5382 5383 /* check to see if uid/gid values are too large. */ 5384 5385 if (srf->uid > (o_uid_t)USHRT_MAX || 5386 srf->gid > (o_gid_t)USHRT_MAX) { 5387 mutex_enter(&stp->sd_lock); 5388 putback(stp, rdq, mp, mp->b_band); 5389 mutex_exit(&stp->sd_lock); 5390 setf(fd, NULL); /* release fd entry */ 5391 return (EOVERFLOW); 5392 } 5393 5394 ostrfd.fd = fd; 5395 ostrfd.uid = (o_uid_t)srf->uid; 5396 ostrfd.gid = (o_gid_t)srf->gid; 5397 5398 /* Null the filler bits */ 5399 for (i = 0; i < 8; i++) 5400 ostrfd.fill[i] = 0; 5401 5402 error = strcopyout(&ostrfd, (void *)arg, 5403 sizeof (struct o_strrecvfd), copyflag); 5404 } else { /* I_E_RECVFD */ 5405 struct strrecvfd strfd; 5406 5407 strfd.fd = fd; 5408 strfd.uid = srf->uid; 5409 strfd.gid = srf->gid; 5410 5411 /* null the filler bits */ 5412 for (i = 0; i < 8; i++) 5413 strfd.fill[i] = 0; 5414 5415 error = strcopyout(&strfd, (void *)arg, 5416 sizeof (struct strrecvfd), copyflag); 5417 } 5418 5419 if (error) { 5420 setf(fd, NULL); /* release fd entry */ 5421 mutex_enter(&stp->sd_lock); 5422 putback(stp, rdq, mp, mp->b_band); 5423 mutex_exit(&stp->sd_lock); 5424 return (error); 5425 } 5426 if (audit_active) { 5427 audit_fdrecv(fd, srf->fp); 5428 } 5429 5430 /* 5431 * Always increment f_count since the freemsg() below will 5432 * always call free_passfp() which performs a closef(). 5433 */ 5434 mutex_enter(&srf->fp->f_tlock); 5435 srf->fp->f_count++; 5436 mutex_exit(&srf->fp->f_tlock); 5437 setf(fd, srf->fp); 5438 freemsg(mp); 5439 return (0); 5440 } 5441 5442 case I_SWROPT: 5443 /* 5444 * Set/clear the write options. arg is a bit 5445 * mask with any of the following bits set... 5446 * SNDZERO - send zero length message 5447 * SNDPIPE - send sigpipe to process if 5448 * sd_werror is set and process is 5449 * doing a write or putmsg. 5450 * The new stream head write options should reflect 5451 * what is in arg. 5452 */ 5453 if (arg & ~(SNDZERO|SNDPIPE)) 5454 return (EINVAL); 5455 5456 mutex_enter(&stp->sd_lock); 5457 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO); 5458 if (arg & SNDZERO) 5459 stp->sd_wput_opt |= SW_SNDZERO; 5460 if (arg & SNDPIPE) 5461 stp->sd_wput_opt |= SW_SIGPIPE; 5462 mutex_exit(&stp->sd_lock); 5463 return (0); 5464 5465 case I_GWROPT: 5466 { 5467 int wropt = 0; 5468 5469 if (stp->sd_wput_opt & SW_SNDZERO) 5470 wropt |= SNDZERO; 5471 if (stp->sd_wput_opt & SW_SIGPIPE) 5472 wropt |= SNDPIPE; 5473 return (strcopyout(&wropt, (void *)arg, sizeof (wropt), 5474 copyflag)); 5475 } 5476 5477 case I_LIST: 5478 /* 5479 * Returns all the modules found on this stream, 5480 * upto the driver. If argument is NULL, return the 5481 * number of modules (including driver). If argument 5482 * is not NULL, copy the names into the structure 5483 * provided. 5484 */ 5485 5486 { 5487 queue_t *q; 5488 char *qname; 5489 int i, nmods; 5490 struct str_mlist *mlist; 5491 STRUCT_DECL(str_list, strlist); 5492 5493 if (arg == NULL) { /* Return number of modules plus driver */ 5494 if (stp->sd_vnode->v_type == VFIFO) 5495 *rvalp = stp->sd_pushcnt; 5496 else 5497 *rvalp = stp->sd_pushcnt + 1; 5498 return (0); 5499 } 5500 5501 STRUCT_INIT(strlist, flag); 5502 5503 error = strcopyin((void *)arg, STRUCT_BUF(strlist), 5504 STRUCT_SIZE(strlist), copyflag); 5505 if (error != 0) 5506 return (error); 5507 5508 mlist = STRUCT_FGETP(strlist, sl_modlist); 5509 nmods = STRUCT_FGET(strlist, sl_nmods); 5510 if (nmods <= 0) 5511 return (EINVAL); 5512 5513 claimstr(stp->sd_wrq); 5514 q = stp->sd_wrq; 5515 for (i = 0; i < nmods && _SAMESTR(q); i++, q = q->q_next) { 5516 qname = Q2NAME(q->q_next); 5517 error = strcopyout(qname, &mlist[i], strlen(qname) + 1, 5518 copyflag); 5519 if (error != 0) { 5520 releasestr(stp->sd_wrq); 5521 return (error); 5522 } 5523 } 5524 releasestr(stp->sd_wrq); 5525 return (strcopyout(&i, (void *)arg, sizeof (int), copyflag)); 5526 } 5527 5528 case I_CKBAND: 5529 { 5530 queue_t *q; 5531 qband_t *qbp; 5532 5533 if ((arg < 0) || (arg >= NBAND)) 5534 return (EINVAL); 5535 q = _RD(stp->sd_wrq); 5536 mutex_enter(QLOCK(q)); 5537 if (arg > (int)q->q_nband) { 5538 *rvalp = 0; 5539 } else { 5540 if (arg == 0) { 5541 if (q->q_first) 5542 *rvalp = 1; 5543 else 5544 *rvalp = 0; 5545 } else { 5546 qbp = q->q_bandp; 5547 while (--arg > 0) 5548 qbp = qbp->qb_next; 5549 if (qbp->qb_first) 5550 *rvalp = 1; 5551 else 5552 *rvalp = 0; 5553 } 5554 } 5555 mutex_exit(QLOCK(q)); 5556 return (0); 5557 } 5558 5559 case I_GETBAND: 5560 { 5561 int intpri; 5562 queue_t *q; 5563 5564 q = _RD(stp->sd_wrq); 5565 mutex_enter(QLOCK(q)); 5566 mp = q->q_first; 5567 if (!mp) { 5568 mutex_exit(QLOCK(q)); 5569 return (ENODATA); 5570 } 5571 intpri = (int)mp->b_band; 5572 error = strcopyout(&intpri, (void *)arg, sizeof (int), 5573 copyflag); 5574 mutex_exit(QLOCK(q)); 5575 return (error); 5576 } 5577 5578 case I_ATMARK: 5579 { 5580 queue_t *q; 5581 5582 if (arg & ~(ANYMARK|LASTMARK)) 5583 return (EINVAL); 5584 q = _RD(stp->sd_wrq); 5585 mutex_enter(&stp->sd_lock); 5586 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) { 5587 *rvalp = 1; 5588 } else { 5589 mutex_enter(QLOCK(q)); 5590 mp = q->q_first; 5591 5592 if (mp == NULL) 5593 *rvalp = 0; 5594 else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK)) 5595 *rvalp = 1; 5596 else if ((arg == LASTMARK) && (mp == stp->sd_mark)) 5597 *rvalp = 1; 5598 else 5599 *rvalp = 0; 5600 mutex_exit(QLOCK(q)); 5601 } 5602 mutex_exit(&stp->sd_lock); 5603 return (0); 5604 } 5605 5606 case I_CANPUT: 5607 { 5608 char band; 5609 5610 if ((arg < 0) || (arg >= NBAND)) 5611 return (EINVAL); 5612 band = (char)arg; 5613 *rvalp = bcanputnext(stp->sd_wrq, band); 5614 return (0); 5615 } 5616 5617 case I_SETCLTIME: 5618 { 5619 int closetime; 5620 5621 error = strcopyin((void *)arg, &closetime, sizeof (int), 5622 copyflag); 5623 if (error) 5624 return (error); 5625 if (closetime < 0) 5626 return (EINVAL); 5627 5628 stp->sd_closetime = closetime; 5629 return (0); 5630 } 5631 5632 case I_GETCLTIME: 5633 { 5634 int closetime; 5635 5636 closetime = stp->sd_closetime; 5637 return (strcopyout(&closetime, (void *)arg, sizeof (int), 5638 copyflag)); 5639 } 5640 5641 case TIOCGSID: 5642 { 5643 pid_t sid; 5644 5645 mutex_enter(&stp->sd_lock); 5646 if (stp->sd_sidp == NULL) { 5647 mutex_exit(&stp->sd_lock); 5648 return (ENOTTY); 5649 } 5650 sid = stp->sd_sidp->pid_id; 5651 mutex_exit(&stp->sd_lock); 5652 return (strcopyout(&sid, (void *)arg, sizeof (pid_t), 5653 copyflag)); 5654 } 5655 5656 case TIOCSPGRP: 5657 { 5658 pid_t pgrp; 5659 proc_t *q; 5660 pid_t sid, fg_pgid, bg_pgid; 5661 5662 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t), 5663 copyflag)) 5664 return (error); 5665 mutex_enter(&stp->sd_lock); 5666 mutex_enter(&pidlock); 5667 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) { 5668 mutex_exit(&pidlock); 5669 mutex_exit(&stp->sd_lock); 5670 return (ENOTTY); 5671 } 5672 if (pgrp == stp->sd_pgidp->pid_id) { 5673 mutex_exit(&pidlock); 5674 mutex_exit(&stp->sd_lock); 5675 return (0); 5676 } 5677 if (pgrp <= 0 || pgrp >= maxpid) { 5678 mutex_exit(&pidlock); 5679 mutex_exit(&stp->sd_lock); 5680 return (EINVAL); 5681 } 5682 if ((q = pgfind(pgrp)) == NULL || 5683 q->p_sessp != ttoproc(curthread)->p_sessp) { 5684 mutex_exit(&pidlock); 5685 mutex_exit(&stp->sd_lock); 5686 return (EPERM); 5687 } 5688 sid = stp->sd_sidp->pid_id; 5689 fg_pgid = q->p_pgrp; 5690 bg_pgid = stp->sd_pgidp->pid_id; 5691 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid); 5692 PID_RELE(stp->sd_pgidp); 5693 ctty_clear_sighuped(); 5694 stp->sd_pgidp = q->p_pgidp; 5695 PID_HOLD(stp->sd_pgidp); 5696 mutex_exit(&pidlock); 5697 mutex_exit(&stp->sd_lock); 5698 return (0); 5699 } 5700 5701 case TIOCGPGRP: 5702 { 5703 pid_t pgrp; 5704 5705 mutex_enter(&stp->sd_lock); 5706 if (stp->sd_sidp == NULL) { 5707 mutex_exit(&stp->sd_lock); 5708 return (ENOTTY); 5709 } 5710 pgrp = stp->sd_pgidp->pid_id; 5711 mutex_exit(&stp->sd_lock); 5712 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t), 5713 copyflag)); 5714 } 5715 5716 case TIOCSCTTY: 5717 { 5718 return (strctty(stp)); 5719 } 5720 5721 case TIOCNOTTY: 5722 { 5723 /* freectty() always assumes curproc. */ 5724 if (freectty(B_FALSE) != 0) 5725 return (0); 5726 return (ENOTTY); 5727 } 5728 5729 case FIONBIO: 5730 case FIOASYNC: 5731 return (0); /* handled by the upper layer */ 5732 } 5733 } 5734 5735 /* 5736 * Custom free routine used for M_PASSFP messages. 5737 */ 5738 static void 5739 free_passfp(struct k_strrecvfd *srf) 5740 { 5741 (void) closef(srf->fp); 5742 kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t)); 5743 } 5744 5745 /* ARGSUSED */ 5746 int 5747 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr) 5748 { 5749 queue_t *qp, *nextqp; 5750 struct k_strrecvfd *srf; 5751 mblk_t *mp; 5752 frtn_t *frtnp; 5753 size_t bufsize; 5754 queue_t *mate = NULL; 5755 syncq_t *sq = NULL; 5756 int retval = 0; 5757 5758 if (stp->sd_flag & STRHUP) 5759 return (ENXIO); 5760 5761 claimstr(stp->sd_wrq); 5762 5763 /* Fastpath, we have a pipe, and we are already mated, use it. */ 5764 if (STRMATED(stp)) { 5765 qp = _RD(stp->sd_mate->sd_wrq); 5766 claimstr(qp); 5767 mate = qp; 5768 } else { /* Not already mated. */ 5769 5770 /* 5771 * Walk the stream to the end of this one. 5772 * assumes that the claimstr() will prevent 5773 * plumbing between the stream head and the 5774 * driver from changing 5775 */ 5776 qp = stp->sd_wrq; 5777 5778 /* 5779 * Loop until we reach the end of this stream. 5780 * On completion, qp points to the write queue 5781 * at the end of the stream, or the read queue 5782 * at the stream head if this is a fifo. 5783 */ 5784 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp)) 5785 ; 5786 5787 /* 5788 * Just in case we get a q_next which is NULL, but 5789 * not at the end of the stream. This is actually 5790 * broken, so we set an assert to catch it in 5791 * debug, and set an error and return if not debug. 5792 */ 5793 ASSERT(qp); 5794 if (qp == NULL) { 5795 releasestr(stp->sd_wrq); 5796 return (EINVAL); 5797 } 5798 5799 /* 5800 * Enter the syncq for the driver, so (hopefully) 5801 * the queue values will not change on us. 5802 * XXXX - This will only prevent the race IFF only 5803 * the write side modifies the q_next member, and 5804 * the put procedure is protected by at least 5805 * MT_PERQ. 5806 */ 5807 if ((sq = qp->q_syncq) != NULL) 5808 entersq(sq, SQ_PUT); 5809 5810 /* Now get the q_next value from this qp. */ 5811 nextqp = qp->q_next; 5812 5813 /* 5814 * If nextqp exists and the other stream is different 5815 * from this one claim the stream, set the mate, and 5816 * get the read queue at the stream head of the other 5817 * stream. Assumes that nextqp was at least valid when 5818 * we got it. Hopefully the entersq of the driver 5819 * will prevent it from changing on us. 5820 */ 5821 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) { 5822 ASSERT(qp->q_qinfo->qi_srvp); 5823 ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp); 5824 ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp); 5825 claimstr(nextqp); 5826 5827 /* Make sure we still have a q_next */ 5828 if (nextqp != qp->q_next) { 5829 releasestr(stp->sd_wrq); 5830 releasestr(nextqp); 5831 return (EINVAL); 5832 } 5833 5834 qp = _RD(STREAM(nextqp)->sd_wrq); 5835 mate = qp; 5836 } 5837 /* If we entered the synq above, leave it. */ 5838 if (sq != NULL) 5839 leavesq(sq, SQ_PUT); 5840 } /* STRMATED(STP) */ 5841 5842 /* XXX prevents substitution of the ops vector */ 5843 if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) { 5844 retval = EINVAL; 5845 goto out; 5846 } 5847 5848 if (qp->q_flag & QFULL) { 5849 retval = EAGAIN; 5850 goto out; 5851 } 5852 5853 /* 5854 * Since M_PASSFP messages include a file descriptor, we use 5855 * esballoc() and specify a custom free routine (free_passfp()) that 5856 * will close the descriptor as part of freeing the message. For 5857 * convenience, we stash the frtn_t right after the data block. 5858 */ 5859 bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t); 5860 srf = kmem_alloc(bufsize, KM_NOSLEEP); 5861 if (srf == NULL) { 5862 retval = EAGAIN; 5863 goto out; 5864 } 5865 5866 frtnp = (frtn_t *)(srf + 1); 5867 frtnp->free_arg = (caddr_t)srf; 5868 frtnp->free_func = free_passfp; 5869 5870 mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp); 5871 if (mp == NULL) { 5872 kmem_free(srf, bufsize); 5873 retval = EAGAIN; 5874 goto out; 5875 } 5876 mp->b_wptr += sizeof (struct k_strrecvfd); 5877 mp->b_datap->db_type = M_PASSFP; 5878 5879 srf->fp = fp; 5880 srf->uid = crgetuid(curthread->t_cred); 5881 srf->gid = crgetgid(curthread->t_cred); 5882 mutex_enter(&fp->f_tlock); 5883 fp->f_count++; 5884 mutex_exit(&fp->f_tlock); 5885 5886 put(qp, mp); 5887 out: 5888 releasestr(stp->sd_wrq); 5889 if (mate) 5890 releasestr(mate); 5891 return (retval); 5892 } 5893 5894 /* 5895 * Send an ioctl message downstream and wait for acknowledgement. 5896 * flags may be set to either U_TO_K or K_TO_K and a combination 5897 * of STR_NOERROR or STR_NOSIG 5898 * STR_NOSIG: Signals are essentially ignored or held and have 5899 * no effect for the duration of the call. 5900 * STR_NOERROR: Ignores stream head read, write and hup errors. 5901 * Additionally, if an existing ioctl times out, it is assumed 5902 * lost and and this ioctl will continue as if the previous ioctl had 5903 * finished. ETIME may be returned if this ioctl times out (i.e. 5904 * ic_timout is not INFTIM). Non-stream head errors may be returned if 5905 * the ioc_error indicates that the driver/module had problems, 5906 * an EFAULT was found when accessing user data, a lack of 5907 * resources, etc. 5908 */ 5909 int 5910 strdoioctl( 5911 struct stdata *stp, 5912 struct strioctl *strioc, 5913 int fflags, /* file flags with model info */ 5914 int flag, 5915 cred_t *crp, 5916 int *rvalp) 5917 { 5918 mblk_t *bp; 5919 struct iocblk *iocbp; 5920 struct copyreq *reqp; 5921 struct copyresp *resp; 5922 int id; 5923 int transparent = 0; 5924 int error = 0; 5925 int len = 0; 5926 caddr_t taddr; 5927 int copyflag = (flag & (U_TO_K | K_TO_K)); 5928 int sigflag = (flag & STR_NOSIG); 5929 int errs; 5930 uint_t waitflags; 5931 5932 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 5933 ASSERT((fflags & FMODELS) != 0); 5934 5935 TRACE_2(TR_FAC_STREAMS_FR, 5936 TR_STRDOIOCTL, 5937 "strdoioctl:stp %p strioc %p", stp, strioc); 5938 if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */ 5939 transparent = 1; 5940 strioc->ic_len = sizeof (intptr_t); 5941 } 5942 5943 if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz)) 5944 return (EINVAL); 5945 5946 if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error, 5947 crp, curproc->p_pid)) == NULL) 5948 return (error); 5949 5950 bzero(bp->b_wptr, sizeof (union ioctypes)); 5951 5952 iocbp = (struct iocblk *)bp->b_wptr; 5953 iocbp->ioc_count = strioc->ic_len; 5954 iocbp->ioc_cmd = strioc->ic_cmd; 5955 iocbp->ioc_flag = (fflags & FMODELS); 5956 5957 crhold(crp); 5958 iocbp->ioc_cr = crp; 5959 DB_TYPE(bp) = M_IOCTL; 5960 bp->b_wptr += sizeof (struct iocblk); 5961 5962 if (flag & STR_NOERROR) 5963 errs = STPLEX; 5964 else 5965 errs = STRHUP|STRDERR|STWRERR|STPLEX; 5966 5967 /* 5968 * If there is data to copy into ioctl block, do so. 5969 */ 5970 if (iocbp->ioc_count > 0) { 5971 if (transparent) 5972 /* 5973 * Note: STR_NOERROR does not have an effect 5974 * in putiocd() 5975 */ 5976 id = K_TO_K | sigflag; 5977 else 5978 id = flag; 5979 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) { 5980 freemsg(bp); 5981 crfree(crp); 5982 return (error); 5983 } 5984 5985 /* 5986 * We could have slept copying in user pages. 5987 * Recheck the stream head state (the other end 5988 * of a pipe could have gone away). 5989 */ 5990 if (stp->sd_flag & errs) { 5991 mutex_enter(&stp->sd_lock); 5992 error = strgeterr(stp, errs, 0); 5993 mutex_exit(&stp->sd_lock); 5994 if (error != 0) { 5995 freemsg(bp); 5996 crfree(crp); 5997 return (error); 5998 } 5999 } 6000 } 6001 if (transparent) 6002 iocbp->ioc_count = TRANSPARENT; 6003 6004 /* 6005 * Block for up to STRTIMOUT milliseconds if there is an outstanding 6006 * ioctl for this stream already running. All processes 6007 * sleeping here will be awakened as a result of an ACK 6008 * or NAK being received for the outstanding ioctl, or 6009 * as a result of the timer expiring on the outstanding 6010 * ioctl (a failure), or as a result of any waiting 6011 * process's timer expiring (also a failure). 6012 */ 6013 6014 error = 0; 6015 mutex_enter(&stp->sd_lock); 6016 while (stp->sd_flag & (IOCWAIT | IOCWAITNE)) { 6017 clock_t cv_rval; 6018 6019 TRACE_0(TR_FAC_STREAMS_FR, 6020 TR_STRDOIOCTL_WAIT, 6021 "strdoioctl sleeps - IOCWAIT"); 6022 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock, 6023 STRTIMOUT, sigflag); 6024 if (cv_rval <= 0) { 6025 if (cv_rval == 0) { 6026 error = EINTR; 6027 } else { 6028 if (flag & STR_NOERROR) { 6029 /* 6030 * Terminating current ioctl in 6031 * progress -- assume it got lost and 6032 * wake up the other thread so that the 6033 * operation completes. 6034 */ 6035 if (!(stp->sd_flag & IOCWAITNE)) { 6036 stp->sd_flag |= IOCWAITNE; 6037 cv_broadcast(&stp->sd_monitor); 6038 } 6039 /* 6040 * Otherwise, there's a running 6041 * STR_NOERROR -- we have no choice 6042 * here but to wait forever (or until 6043 * interrupted). 6044 */ 6045 } else { 6046 /* 6047 * pending ioctl has caused 6048 * us to time out 6049 */ 6050 error = ETIME; 6051 } 6052 } 6053 } else if ((stp->sd_flag & errs)) { 6054 error = strgeterr(stp, errs, 0); 6055 } 6056 if (error) { 6057 mutex_exit(&stp->sd_lock); 6058 freemsg(bp); 6059 crfree(crp); 6060 return (error); 6061 } 6062 } 6063 6064 /* 6065 * Have control of ioctl mechanism. 6066 * Send down ioctl packet and wait for response. 6067 */ 6068 if (stp->sd_iocblk != (mblk_t *)-1) { 6069 freemsg(stp->sd_iocblk); 6070 } 6071 stp->sd_iocblk = NULL; 6072 6073 /* 6074 * If this is marked with 'noerror' (internal; mostly 6075 * I_{P,}{UN,}LINK), then make sure nobody else is able to get 6076 * in here by setting IOCWAITNE. 6077 */ 6078 waitflags = IOCWAIT; 6079 if (flag & STR_NOERROR) 6080 waitflags |= IOCWAITNE; 6081 6082 stp->sd_flag |= waitflags; 6083 6084 /* 6085 * Assign sequence number. 6086 */ 6087 iocbp->ioc_id = stp->sd_iocid = getiocseqno(); 6088 6089 mutex_exit(&stp->sd_lock); 6090 6091 TRACE_1(TR_FAC_STREAMS_FR, 6092 TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp); 6093 stream_willservice(stp); 6094 putnext(stp->sd_wrq, bp); 6095 stream_runservice(stp); 6096 6097 /* 6098 * Timed wait for acknowledgment. The wait time is limited by the 6099 * timeout value, which must be a positive integer (number of 6100 * milliseconds) to wait, or 0 (use default value of STRTIMOUT 6101 * milliseconds), or -1 (wait forever). This will be awakened 6102 * either by an ACK/NAK message arriving, the timer expiring, or 6103 * the timer expiring on another ioctl waiting for control of the 6104 * mechanism. 6105 */ 6106 waitioc: 6107 mutex_enter(&stp->sd_lock); 6108 6109 6110 /* 6111 * If the reply has already arrived, don't sleep. If awakened from 6112 * the sleep, fail only if the reply has not arrived by then. 6113 * Otherwise, process the reply. 6114 */ 6115 while (!stp->sd_iocblk) { 6116 clock_t cv_rval; 6117 6118 if (stp->sd_flag & errs) { 6119 error = strgeterr(stp, errs, 0); 6120 if (error != 0) { 6121 stp->sd_flag &= ~waitflags; 6122 cv_broadcast(&stp->sd_iocmonitor); 6123 mutex_exit(&stp->sd_lock); 6124 crfree(crp); 6125 return (error); 6126 } 6127 } 6128 6129 TRACE_0(TR_FAC_STREAMS_FR, 6130 TR_STRDOIOCTL_WAIT2, 6131 "strdoioctl sleeps awaiting reply"); 6132 ASSERT(error == 0); 6133 6134 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, 6135 (strioc->ic_timout ? 6136 strioc->ic_timout * 1000 : STRTIMOUT), sigflag); 6137 6138 /* 6139 * There are four possible cases here: interrupt, timeout, 6140 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a 6141 * valid M_IOCTL reply). 6142 * 6143 * If we've been awakened by a STR_NOERROR ioctl on some other 6144 * thread, then sd_iocblk will still be NULL, and IOCWAITNE 6145 * will be set. Pretend as if we just timed out. Note that 6146 * this other thread waited at least STRTIMOUT before trying to 6147 * awaken our thread, so this is indistinguishable (even for 6148 * INFTIM) from the case where we failed with ETIME waiting on 6149 * IOCWAIT in the prior loop. 6150 */ 6151 if (cv_rval > 0 && !(flag & STR_NOERROR) && 6152 stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) { 6153 cv_rval = -1; 6154 } 6155 6156 /* 6157 * note: STR_NOERROR does not protect 6158 * us here.. use ic_timout < 0 6159 */ 6160 if (cv_rval <= 0) { 6161 if (cv_rval == 0) { 6162 error = EINTR; 6163 } else { 6164 error = ETIME; 6165 } 6166 /* 6167 * A message could have come in after we were scheduled 6168 * but before we were actually run. 6169 */ 6170 bp = stp->sd_iocblk; 6171 stp->sd_iocblk = NULL; 6172 if (bp != NULL) { 6173 if ((bp->b_datap->db_type == M_COPYIN) || 6174 (bp->b_datap->db_type == M_COPYOUT)) { 6175 mutex_exit(&stp->sd_lock); 6176 if (bp->b_cont) { 6177 freemsg(bp->b_cont); 6178 bp->b_cont = NULL; 6179 } 6180 bp->b_datap->db_type = M_IOCDATA; 6181 bp->b_wptr = bp->b_rptr + 6182 sizeof (struct copyresp); 6183 resp = (struct copyresp *)bp->b_rptr; 6184 resp->cp_rval = 6185 (caddr_t)1; /* failure */ 6186 stream_willservice(stp); 6187 putnext(stp->sd_wrq, bp); 6188 stream_runservice(stp); 6189 mutex_enter(&stp->sd_lock); 6190 } else { 6191 freemsg(bp); 6192 } 6193 } 6194 stp->sd_flag &= ~waitflags; 6195 cv_broadcast(&stp->sd_iocmonitor); 6196 mutex_exit(&stp->sd_lock); 6197 crfree(crp); 6198 return (error); 6199 } 6200 } 6201 bp = stp->sd_iocblk; 6202 /* 6203 * Note: it is strictly impossible to get here with sd_iocblk set to 6204 * -1. This is because the initial loop above doesn't allow any new 6205 * ioctls into the fray until all others have passed this point. 6206 */ 6207 ASSERT(bp != NULL && bp != (mblk_t *)-1); 6208 TRACE_1(TR_FAC_STREAMS_FR, 6209 TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp); 6210 if ((bp->b_datap->db_type == M_IOCACK) || 6211 (bp->b_datap->db_type == M_IOCNAK)) { 6212 /* for detection of duplicate ioctl replies */ 6213 stp->sd_iocblk = (mblk_t *)-1; 6214 stp->sd_flag &= ~waitflags; 6215 cv_broadcast(&stp->sd_iocmonitor); 6216 mutex_exit(&stp->sd_lock); 6217 } else { 6218 /* 6219 * flags not cleared here because we're still doing 6220 * copy in/out for ioctl. 6221 */ 6222 stp->sd_iocblk = NULL; 6223 mutex_exit(&stp->sd_lock); 6224 } 6225 6226 6227 /* 6228 * Have received acknowledgment. 6229 */ 6230 6231 switch (bp->b_datap->db_type) { 6232 case M_IOCACK: 6233 /* 6234 * Positive ack. 6235 */ 6236 iocbp = (struct iocblk *)bp->b_rptr; 6237 6238 /* 6239 * Set error if indicated. 6240 */ 6241 if (iocbp->ioc_error) { 6242 error = iocbp->ioc_error; 6243 break; 6244 } 6245 6246 /* 6247 * Set return value. 6248 */ 6249 *rvalp = iocbp->ioc_rval; 6250 6251 /* 6252 * Data may have been returned in ACK message (ioc_count > 0). 6253 * If so, copy it out to the user's buffer. 6254 */ 6255 if (iocbp->ioc_count && !transparent) { 6256 if (error = getiocd(bp, strioc->ic_dp, copyflag)) 6257 break; 6258 } 6259 if (!transparent) { 6260 if (len) /* an M_COPYOUT was used with I_STR */ 6261 strioc->ic_len = len; 6262 else 6263 strioc->ic_len = (int)iocbp->ioc_count; 6264 } 6265 break; 6266 6267 case M_IOCNAK: 6268 /* 6269 * Negative ack. 6270 * 6271 * The only thing to do is set error as specified 6272 * in neg ack packet. 6273 */ 6274 iocbp = (struct iocblk *)bp->b_rptr; 6275 6276 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL); 6277 break; 6278 6279 case M_COPYIN: 6280 /* 6281 * Driver or module has requested user ioctl data. 6282 */ 6283 reqp = (struct copyreq *)bp->b_rptr; 6284 6285 /* 6286 * M_COPYIN should *never* have a message attached, though 6287 * it's harmless if it does -- thus, panic on a DEBUG 6288 * kernel and just free it on a non-DEBUG build. 6289 */ 6290 ASSERT(bp->b_cont == NULL); 6291 if (bp->b_cont != NULL) { 6292 freemsg(bp->b_cont); 6293 bp->b_cont = NULL; 6294 } 6295 6296 error = putiocd(bp, reqp->cq_addr, flag, crp); 6297 if (error && bp->b_cont) { 6298 freemsg(bp->b_cont); 6299 bp->b_cont = NULL; 6300 } 6301 6302 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6303 bp->b_datap->db_type = M_IOCDATA; 6304 6305 mblk_setcred(bp, crp, curproc->p_pid); 6306 resp = (struct copyresp *)bp->b_rptr; 6307 resp->cp_rval = (caddr_t)(uintptr_t)error; 6308 resp->cp_flag = (fflags & FMODELS); 6309 6310 stream_willservice(stp); 6311 putnext(stp->sd_wrq, bp); 6312 stream_runservice(stp); 6313 6314 if (error) { 6315 mutex_enter(&stp->sd_lock); 6316 stp->sd_flag &= ~waitflags; 6317 cv_broadcast(&stp->sd_iocmonitor); 6318 mutex_exit(&stp->sd_lock); 6319 crfree(crp); 6320 return (error); 6321 } 6322 6323 goto waitioc; 6324 6325 case M_COPYOUT: 6326 /* 6327 * Driver or module has ioctl data for a user. 6328 */ 6329 reqp = (struct copyreq *)bp->b_rptr; 6330 ASSERT(bp->b_cont != NULL); 6331 6332 /* 6333 * Always (transparent or non-transparent ) 6334 * use the address specified in the request 6335 */ 6336 taddr = reqp->cq_addr; 6337 if (!transparent) 6338 len = (int)reqp->cq_size; 6339 6340 /* copyout data to the provided address */ 6341 error = getiocd(bp, taddr, copyflag); 6342 6343 freemsg(bp->b_cont); 6344 bp->b_cont = NULL; 6345 6346 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6347 bp->b_datap->db_type = M_IOCDATA; 6348 6349 mblk_setcred(bp, crp, curproc->p_pid); 6350 resp = (struct copyresp *)bp->b_rptr; 6351 resp->cp_rval = (caddr_t)(uintptr_t)error; 6352 resp->cp_flag = (fflags & FMODELS); 6353 6354 stream_willservice(stp); 6355 putnext(stp->sd_wrq, bp); 6356 stream_runservice(stp); 6357 6358 if (error) { 6359 mutex_enter(&stp->sd_lock); 6360 stp->sd_flag &= ~waitflags; 6361 cv_broadcast(&stp->sd_iocmonitor); 6362 mutex_exit(&stp->sd_lock); 6363 crfree(crp); 6364 return (error); 6365 } 6366 goto waitioc; 6367 6368 default: 6369 ASSERT(0); 6370 mutex_enter(&stp->sd_lock); 6371 stp->sd_flag &= ~waitflags; 6372 cv_broadcast(&stp->sd_iocmonitor); 6373 mutex_exit(&stp->sd_lock); 6374 break; 6375 } 6376 6377 freemsg(bp); 6378 crfree(crp); 6379 return (error); 6380 } 6381 6382 /* 6383 * Send an M_CMD message downstream and wait for a reply. This is a ptools 6384 * special used to retrieve information from modules/drivers a stream without 6385 * being subjected to flow control or interfering with pending messages on the 6386 * stream (e.g. an ioctl in flight). 6387 */ 6388 int 6389 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp) 6390 { 6391 mblk_t *mp; 6392 struct cmdblk *cmdp; 6393 int error = 0; 6394 int errs = STRHUP|STRDERR|STWRERR|STPLEX; 6395 clock_t rval, timeout = STRTIMOUT; 6396 6397 if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) || 6398 scp->sc_timeout < -1) 6399 return (EINVAL); 6400 6401 if (scp->sc_timeout > 0) 6402 timeout = scp->sc_timeout * MILLISEC; 6403 6404 if ((mp = allocb_cred(sizeof (struct cmdblk), crp, 6405 curproc->p_pid)) == NULL) 6406 return (ENOMEM); 6407 6408 crhold(crp); 6409 6410 cmdp = (struct cmdblk *)mp->b_wptr; 6411 cmdp->cb_cr = crp; 6412 cmdp->cb_cmd = scp->sc_cmd; 6413 cmdp->cb_len = scp->sc_len; 6414 cmdp->cb_error = 0; 6415 mp->b_wptr += sizeof (struct cmdblk); 6416 6417 DB_TYPE(mp) = M_CMD; 6418 DB_CPID(mp) = curproc->p_pid; 6419 6420 /* 6421 * Copy in the payload. 6422 */ 6423 if (cmdp->cb_len > 0) { 6424 mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp, 6425 curproc->p_pid); 6426 if (mp->b_cont == NULL) { 6427 error = ENOMEM; 6428 goto out; 6429 } 6430 6431 /* cb_len comes from sc_len, which has already been checked */ 6432 ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf)); 6433 (void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len); 6434 mp->b_cont->b_wptr += cmdp->cb_len; 6435 DB_CPID(mp->b_cont) = curproc->p_pid; 6436 } 6437 6438 /* 6439 * Since this mechanism is strictly for ptools, and since only one 6440 * process can be grabbed at a time, we simply fail if there's 6441 * currently an operation pending. 6442 */ 6443 mutex_enter(&stp->sd_lock); 6444 if (stp->sd_flag & STRCMDWAIT) { 6445 mutex_exit(&stp->sd_lock); 6446 error = EBUSY; 6447 goto out; 6448 } 6449 stp->sd_flag |= STRCMDWAIT; 6450 ASSERT(stp->sd_cmdblk == NULL); 6451 mutex_exit(&stp->sd_lock); 6452 6453 putnext(stp->sd_wrq, mp); 6454 mp = NULL; 6455 6456 /* 6457 * Timed wait for acknowledgment. If the reply has already arrived, 6458 * don't sleep. If awakened from the sleep, fail only if the reply 6459 * has not arrived by then. Otherwise, process the reply. 6460 */ 6461 mutex_enter(&stp->sd_lock); 6462 while (stp->sd_cmdblk == NULL) { 6463 if (stp->sd_flag & errs) { 6464 if ((error = strgeterr(stp, errs, 0)) != 0) 6465 goto waitout; 6466 } 6467 6468 rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0); 6469 if (stp->sd_cmdblk != NULL) 6470 break; 6471 6472 if (rval <= 0) { 6473 error = (rval == 0) ? EINTR : ETIME; 6474 goto waitout; 6475 } 6476 } 6477 6478 /* 6479 * We received a reply. 6480 */ 6481 mp = stp->sd_cmdblk; 6482 stp->sd_cmdblk = NULL; 6483 ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD); 6484 ASSERT(stp->sd_flag & STRCMDWAIT); 6485 stp->sd_flag &= ~STRCMDWAIT; 6486 mutex_exit(&stp->sd_lock); 6487 6488 cmdp = (struct cmdblk *)mp->b_rptr; 6489 if ((error = cmdp->cb_error) != 0) 6490 goto out; 6491 6492 /* 6493 * Data may have been returned in the reply (cb_len > 0). 6494 * If so, copy it out to the user's buffer. 6495 */ 6496 if (cmdp->cb_len > 0) { 6497 if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) { 6498 error = EPROTO; 6499 goto out; 6500 } 6501 6502 cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf)); 6503 (void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len); 6504 } 6505 scp->sc_len = cmdp->cb_len; 6506 out: 6507 freemsg(mp); 6508 crfree(crp); 6509 return (error); 6510 waitout: 6511 ASSERT(stp->sd_cmdblk == NULL); 6512 stp->sd_flag &= ~STRCMDWAIT; 6513 mutex_exit(&stp->sd_lock); 6514 crfree(crp); 6515 return (error); 6516 } 6517 6518 /* 6519 * For the SunOS keyboard driver. 6520 * Return the next available "ioctl" sequence number. 6521 * Exported, so that streams modules can send "ioctl" messages 6522 * downstream from their open routine. 6523 */ 6524 int 6525 getiocseqno(void) 6526 { 6527 int i; 6528 6529 mutex_enter(&strresources); 6530 i = ++ioc_id; 6531 mutex_exit(&strresources); 6532 return (i); 6533 } 6534 6535 /* 6536 * Get the next message from the read queue. If the message is 6537 * priority, STRPRI will have been set by strrput(). This flag 6538 * should be reset only when the entire message at the front of the 6539 * queue as been consumed. 6540 * 6541 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 6542 */ 6543 int 6544 strgetmsg( 6545 struct vnode *vp, 6546 struct strbuf *mctl, 6547 struct strbuf *mdata, 6548 unsigned char *prip, 6549 int *flagsp, 6550 int fmode, 6551 rval_t *rvp) 6552 { 6553 struct stdata *stp; 6554 mblk_t *bp, *nbp; 6555 mblk_t *savemp = NULL; 6556 mblk_t *savemptail = NULL; 6557 uint_t old_sd_flag; 6558 int flg; 6559 int more = 0; 6560 int error = 0; 6561 char first = 1; 6562 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 6563 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 6564 unsigned char pri = 0; 6565 queue_t *q; 6566 int pr = 0; /* Partial read successful */ 6567 struct uio uios; 6568 struct uio *uiop = &uios; 6569 struct iovec iovs; 6570 unsigned char type; 6571 6572 TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER, 6573 "strgetmsg:%p", vp); 6574 6575 ASSERT(vp->v_stream); 6576 stp = vp->v_stream; 6577 rvp->r_val1 = 0; 6578 6579 mutex_enter(&stp->sd_lock); 6580 6581 if ((error = i_straccess(stp, JCREAD)) != 0) { 6582 mutex_exit(&stp->sd_lock); 6583 return (error); 6584 } 6585 6586 if (stp->sd_flag & (STRDERR|STPLEX)) { 6587 error = strgeterr(stp, STRDERR|STPLEX, 0); 6588 if (error != 0) { 6589 mutex_exit(&stp->sd_lock); 6590 return (error); 6591 } 6592 } 6593 mutex_exit(&stp->sd_lock); 6594 6595 switch (*flagsp) { 6596 case MSG_HIPRI: 6597 if (*prip != 0) 6598 return (EINVAL); 6599 break; 6600 6601 case MSG_ANY: 6602 case MSG_BAND: 6603 break; 6604 6605 default: 6606 return (EINVAL); 6607 } 6608 /* 6609 * Setup uio and iov for data part 6610 */ 6611 iovs.iov_base = mdata->buf; 6612 iovs.iov_len = mdata->maxlen; 6613 uios.uio_iov = &iovs; 6614 uios.uio_iovcnt = 1; 6615 uios.uio_loffset = 0; 6616 uios.uio_segflg = UIO_USERSPACE; 6617 uios.uio_fmode = 0; 6618 uios.uio_extflg = UIO_COPY_CACHED; 6619 uios.uio_resid = mdata->maxlen; 6620 uios.uio_offset = 0; 6621 6622 q = _RD(stp->sd_wrq); 6623 mutex_enter(&stp->sd_lock); 6624 old_sd_flag = stp->sd_flag; 6625 mark = 0; 6626 for (;;) { 6627 int done = 0; 6628 mblk_t *q_first = q->q_first; 6629 6630 /* 6631 * Get the next message of appropriate priority 6632 * from the stream head. If the caller is interested 6633 * in band or hipri messages, then they should already 6634 * be enqueued at the stream head. On the other hand 6635 * if the caller wants normal (band 0) messages, they 6636 * might be deferred in a synchronous stream and they 6637 * will need to be pulled up. 6638 * 6639 * After we have dequeued a message, we might find that 6640 * it was a deferred M_SIG that was enqueued at the 6641 * stream head. It must now be posted as part of the 6642 * read by calling strsignal_nolock(). 6643 * 6644 * Also note that strrput does not enqueue an M_PCSIG, 6645 * and there cannot be more than one hipri message, 6646 * so there was no need to have the M_PCSIG case. 6647 * 6648 * At some time it might be nice to try and wrap the 6649 * functionality of kstrgetmsg() and strgetmsg() into 6650 * a common routine so to reduce the amount of replicated 6651 * code (since they are extremely similar). 6652 */ 6653 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) { 6654 /* Asking for normal, band0 data */ 6655 bp = strget(stp, q, uiop, first, &error); 6656 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6657 if (bp != NULL) { 6658 ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); 6659 if (DB_TYPE(bp) == M_SIG) { 6660 strsignal_nolock(stp, *bp->b_rptr, 6661 bp->b_band); 6662 freemsg(bp); 6663 continue; 6664 } else { 6665 break; 6666 } 6667 } 6668 if (error != 0) 6669 goto getmout; 6670 6671 /* 6672 * We can't depend on the value of STRPRI here because 6673 * the stream head may be in transit. Therefore, we 6674 * must look at the type of the first message to 6675 * determine if a high priority messages is waiting 6676 */ 6677 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL && 6678 DB_TYPE(q_first) >= QPCTL && 6679 (bp = getq_noenab(q, 0)) != NULL) { 6680 /* Asked for HIPRI and got one */ 6681 ASSERT(DB_TYPE(bp) >= QPCTL); 6682 break; 6683 } else if ((*flagsp & MSG_BAND) && q_first != NULL && 6684 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) && 6685 (bp = getq_noenab(q, 0)) != NULL) { 6686 /* 6687 * Asked for at least band "prip" and got either at 6688 * least that band or a hipri message. 6689 */ 6690 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL); 6691 if (DB_TYPE(bp) == M_SIG) { 6692 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 6693 freemsg(bp); 6694 continue; 6695 } else { 6696 break; 6697 } 6698 } 6699 6700 /* No data. Time to sleep? */ 6701 qbackenable(q, 0); 6702 6703 /* 6704 * If STRHUP or STREOF, return 0 length control and data. 6705 * If resid is 0, then a read(fd,buf,0) was done. Do not 6706 * sleep to satisfy this request because by default we have 6707 * zero bytes to return. 6708 */ 6709 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 && 6710 mdata->maxlen == 0)) { 6711 mctl->len = mdata->len = 0; 6712 *flagsp = 0; 6713 mutex_exit(&stp->sd_lock); 6714 return (0); 6715 } 6716 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT, 6717 "strgetmsg calls strwaitq:%p, %p", 6718 vp, uiop); 6719 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1, 6720 &done)) != 0) || done) { 6721 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE, 6722 "strgetmsg error or done:%p, %p", 6723 vp, uiop); 6724 mutex_exit(&stp->sd_lock); 6725 return (error); 6726 } 6727 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE, 6728 "strgetmsg awakes:%p, %p", vp, uiop); 6729 if ((error = i_straccess(stp, JCREAD)) != 0) { 6730 mutex_exit(&stp->sd_lock); 6731 return (error); 6732 } 6733 first = 0; 6734 } 6735 ASSERT(bp != NULL); 6736 /* 6737 * Extract any mark information. If the message is not completely 6738 * consumed this information will be put in the mblk 6739 * that is putback. 6740 * If MSGMARKNEXT is set and the message is completely consumed 6741 * the STRATMARK flag will be set below. Likewise, if 6742 * MSGNOTMARKNEXT is set and the message is 6743 * completely consumed STRNOTATMARK will be set. 6744 */ 6745 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 6746 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 6747 (MSGMARKNEXT|MSGNOTMARKNEXT)); 6748 if (mark != 0 && bp == stp->sd_mark) { 6749 mark |= _LASTMARK; 6750 stp->sd_mark = NULL; 6751 } 6752 /* 6753 * keep track of the original message type and priority 6754 */ 6755 pri = bp->b_band; 6756 type = bp->b_datap->db_type; 6757 if (type == M_PASSFP) { 6758 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 6759 stp->sd_mark = bp; 6760 bp->b_flag |= mark & ~_LASTMARK; 6761 putback(stp, q, bp, pri); 6762 qbackenable(q, pri); 6763 mutex_exit(&stp->sd_lock); 6764 return (EBADMSG); 6765 } 6766 ASSERT(type != M_SIG); 6767 6768 /* 6769 * Set this flag so strrput will not generate signals. Need to 6770 * make sure this flag is cleared before leaving this routine 6771 * else signals will stop being sent. 6772 */ 6773 stp->sd_flag |= STRGETINPROG; 6774 mutex_exit(&stp->sd_lock); 6775 6776 if (STREAM_NEEDSERVICE(stp)) 6777 stream_runservice(stp); 6778 6779 /* 6780 * Set HIPRI flag if message is priority. 6781 */ 6782 if (type >= QPCTL) 6783 flg = MSG_HIPRI; 6784 else 6785 flg = MSG_BAND; 6786 6787 /* 6788 * First process PROTO or PCPROTO blocks, if any. 6789 */ 6790 if (mctl->maxlen >= 0 && type != M_DATA) { 6791 size_t n, bcnt; 6792 char *ubuf; 6793 6794 bcnt = mctl->maxlen; 6795 ubuf = mctl->buf; 6796 while (bp != NULL && bp->b_datap->db_type != M_DATA) { 6797 if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 && 6798 copyout(bp->b_rptr, ubuf, n)) { 6799 error = EFAULT; 6800 mutex_enter(&stp->sd_lock); 6801 /* 6802 * clear stream head pri flag based on 6803 * first message type 6804 */ 6805 if (type >= QPCTL) { 6806 ASSERT(type == M_PCPROTO); 6807 stp->sd_flag &= ~STRPRI; 6808 } 6809 more = 0; 6810 freemsg(bp); 6811 goto getmout; 6812 } 6813 ubuf += n; 6814 bp->b_rptr += n; 6815 if (bp->b_rptr >= bp->b_wptr) { 6816 nbp = bp; 6817 bp = bp->b_cont; 6818 freeb(nbp); 6819 } 6820 ASSERT(n <= bcnt); 6821 bcnt -= n; 6822 if (bcnt == 0) 6823 break; 6824 } 6825 mctl->len = mctl->maxlen - bcnt; 6826 } else 6827 mctl->len = -1; 6828 6829 if (bp && bp->b_datap->db_type != M_DATA) { 6830 /* 6831 * More PROTO blocks in msg. 6832 */ 6833 more |= MORECTL; 6834 savemp = bp; 6835 while (bp && bp->b_datap->db_type != M_DATA) { 6836 savemptail = bp; 6837 bp = bp->b_cont; 6838 } 6839 savemptail->b_cont = NULL; 6840 } 6841 6842 /* 6843 * Now process DATA blocks, if any. 6844 */ 6845 if (mdata->maxlen >= 0 && bp) { 6846 /* 6847 * struiocopyout will consume a potential zero-length 6848 * M_DATA even if uio_resid is zero. 6849 */ 6850 size_t oldresid = uiop->uio_resid; 6851 6852 bp = struiocopyout(bp, uiop, &error); 6853 if (error != 0) { 6854 mutex_enter(&stp->sd_lock); 6855 /* 6856 * clear stream head hi pri flag based on 6857 * first message 6858 */ 6859 if (type >= QPCTL) { 6860 ASSERT(type == M_PCPROTO); 6861 stp->sd_flag &= ~STRPRI; 6862 } 6863 more = 0; 6864 freemsg(savemp); 6865 goto getmout; 6866 } 6867 /* 6868 * (pr == 1) indicates a partial read. 6869 */ 6870 if (oldresid > uiop->uio_resid) 6871 pr = 1; 6872 mdata->len = mdata->maxlen - uiop->uio_resid; 6873 } else 6874 mdata->len = -1; 6875 6876 if (bp) { /* more data blocks in msg */ 6877 more |= MOREDATA; 6878 if (savemp) 6879 savemptail->b_cont = bp; 6880 else 6881 savemp = bp; 6882 } 6883 6884 mutex_enter(&stp->sd_lock); 6885 if (savemp) { 6886 if (pr && (savemp->b_datap->db_type == M_DATA) && 6887 msgnodata(savemp)) { 6888 /* 6889 * Avoid queuing a zero-length tail part of 6890 * a message. pr=1 indicates that we read some of 6891 * the message. 6892 */ 6893 freemsg(savemp); 6894 more &= ~MOREDATA; 6895 /* 6896 * clear stream head hi pri flag based on 6897 * first message 6898 */ 6899 if (type >= QPCTL) { 6900 ASSERT(type == M_PCPROTO); 6901 stp->sd_flag &= ~STRPRI; 6902 } 6903 } else { 6904 savemp->b_band = pri; 6905 /* 6906 * If the first message was HIPRI and the one we're 6907 * putting back isn't, then clear STRPRI, otherwise 6908 * set STRPRI again. Note that we must set STRPRI 6909 * again since the flush logic in strrput_nondata() 6910 * may have cleared it while we had sd_lock dropped. 6911 */ 6912 if (type >= QPCTL) { 6913 ASSERT(type == M_PCPROTO); 6914 if (queclass(savemp) < QPCTL) 6915 stp->sd_flag &= ~STRPRI; 6916 else 6917 stp->sd_flag |= STRPRI; 6918 } else if (queclass(savemp) >= QPCTL) { 6919 /* 6920 * The first message was not a HIPRI message, 6921 * but the one we are about to putback is. 6922 * For simplicitly, we do not allow for HIPRI 6923 * messages to be embedded in the message 6924 * body, so just force it to same type as 6925 * first message. 6926 */ 6927 ASSERT(type == M_DATA || type == M_PROTO); 6928 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 6929 savemp->b_datap->db_type = type; 6930 } 6931 if (mark != 0) { 6932 savemp->b_flag |= mark & ~_LASTMARK; 6933 if ((mark & _LASTMARK) && 6934 (stp->sd_mark == NULL)) { 6935 /* 6936 * If another marked message arrived 6937 * while sd_lock was not held sd_mark 6938 * would be non-NULL. 6939 */ 6940 stp->sd_mark = savemp; 6941 } 6942 } 6943 putback(stp, q, savemp, pri); 6944 } 6945 } else { 6946 /* 6947 * The complete message was consumed. 6948 * 6949 * If another M_PCPROTO arrived while sd_lock was not held 6950 * it would have been discarded since STRPRI was still set. 6951 * 6952 * Move the MSG*MARKNEXT information 6953 * to the stream head just in case 6954 * the read queue becomes empty. 6955 * clear stream head hi pri flag based on 6956 * first message 6957 * 6958 * If the stream head was at the mark 6959 * (STRATMARK) before we dropped sd_lock above 6960 * and some data was consumed then we have 6961 * moved past the mark thus STRATMARK is 6962 * cleared. However, if a message arrived in 6963 * strrput during the copyout above causing 6964 * STRATMARK to be set we can not clear that 6965 * flag. 6966 */ 6967 if (type >= QPCTL) { 6968 ASSERT(type == M_PCPROTO); 6969 stp->sd_flag &= ~STRPRI; 6970 } 6971 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 6972 if (mark & MSGMARKNEXT) { 6973 stp->sd_flag &= ~STRNOTATMARK; 6974 stp->sd_flag |= STRATMARK; 6975 } else if (mark & MSGNOTMARKNEXT) { 6976 stp->sd_flag &= ~STRATMARK; 6977 stp->sd_flag |= STRNOTATMARK; 6978 } else { 6979 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 6980 } 6981 } else if (pr && (old_sd_flag & STRATMARK)) { 6982 stp->sd_flag &= ~STRATMARK; 6983 } 6984 } 6985 6986 *flagsp = flg; 6987 *prip = pri; 6988 6989 /* 6990 * Getmsg cleanup processing - if the state of the queue has changed 6991 * some signals may need to be sent and/or poll awakened. 6992 */ 6993 getmout: 6994 qbackenable(q, pri); 6995 6996 /* 6997 * We dropped the stream head lock above. Send all M_SIG messages 6998 * before processing stream head for SIGPOLL messages. 6999 */ 7000 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7001 while ((bp = q->q_first) != NULL && 7002 (bp->b_datap->db_type == M_SIG)) { 7003 /* 7004 * sd_lock is held so the content of the read queue can not 7005 * change. 7006 */ 7007 bp = getq(q); 7008 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 7009 7010 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7011 mutex_exit(&stp->sd_lock); 7012 freemsg(bp); 7013 if (STREAM_NEEDSERVICE(stp)) 7014 stream_runservice(stp); 7015 mutex_enter(&stp->sd_lock); 7016 } 7017 7018 /* 7019 * stream head cannot change while we make the determination 7020 * whether or not to send a signal. Drop the flag to allow strrput 7021 * to send firstmsgsigs again. 7022 */ 7023 stp->sd_flag &= ~STRGETINPROG; 7024 7025 /* 7026 * If the type of message at the front of the queue changed 7027 * due to the receive the appropriate signals and pollwakeup events 7028 * are generated. The type of changes are: 7029 * Processed a hipri message, q_first is not hipri. 7030 * Processed a band X message, and q_first is band Y. 7031 * The generated signals and pollwakeups are identical to what 7032 * strrput() generates should the message that is now on q_first 7033 * arrive to an empty read queue. 7034 * 7035 * Note: only strrput will send a signal for a hipri message. 7036 */ 7037 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7038 strsigset_t signals = 0; 7039 strpollset_t pollwakeups = 0; 7040 7041 if (flg & MSG_HIPRI) { 7042 /* 7043 * Removed a hipri message. Regular data at 7044 * the front of the queue. 7045 */ 7046 if (bp->b_band == 0) { 7047 signals = S_INPUT | S_RDNORM; 7048 pollwakeups = POLLIN | POLLRDNORM; 7049 } else { 7050 signals = S_INPUT | S_RDBAND; 7051 pollwakeups = POLLIN | POLLRDBAND; 7052 } 7053 } else if (pri != bp->b_band) { 7054 /* 7055 * The band is different for the new q_first. 7056 */ 7057 if (bp->b_band == 0) { 7058 signals = S_RDNORM; 7059 pollwakeups = POLLIN | POLLRDNORM; 7060 } else { 7061 signals = S_RDBAND; 7062 pollwakeups = POLLIN | POLLRDBAND; 7063 } 7064 } 7065 7066 if (pollwakeups != 0) { 7067 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7068 if (!(stp->sd_rput_opt & SR_POLLIN)) 7069 goto no_pollwake; 7070 stp->sd_rput_opt &= ~SR_POLLIN; 7071 } 7072 mutex_exit(&stp->sd_lock); 7073 pollwakeup(&stp->sd_pollist, pollwakeups); 7074 mutex_enter(&stp->sd_lock); 7075 } 7076 no_pollwake: 7077 7078 if (stp->sd_sigflags & signals) 7079 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7080 } 7081 mutex_exit(&stp->sd_lock); 7082 7083 rvp->r_val1 = more; 7084 return (error); 7085 #undef _LASTMARK 7086 } 7087 7088 /* 7089 * Get the next message from the read queue. If the message is 7090 * priority, STRPRI will have been set by strrput(). This flag 7091 * should be reset only when the entire message at the front of the 7092 * queue as been consumed. 7093 * 7094 * If uiop is NULL all data is returned in mctlp. 7095 * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed 7096 * not enabled. 7097 * The timeout parameter is in milliseconds; -1 for infinity. 7098 * This routine handles the consolidation private flags: 7099 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7100 * MSG_DELAYERROR Defer the error check until the queue is empty. 7101 * MSG_HOLDSIG Hold signals while waiting for data. 7102 * MSG_IPEEK Only peek at messages. 7103 * MSG_DISCARDTAIL Discard the tail M_DATA part of the message 7104 * that doesn't fit. 7105 * MSG_NOMARK If the message is marked leave it on the queue. 7106 * 7107 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 7108 */ 7109 int 7110 kstrgetmsg( 7111 struct vnode *vp, 7112 mblk_t **mctlp, 7113 struct uio *uiop, 7114 unsigned char *prip, 7115 int *flagsp, 7116 clock_t timout, 7117 rval_t *rvp) 7118 { 7119 struct stdata *stp; 7120 mblk_t *bp, *nbp; 7121 mblk_t *savemp = NULL; 7122 mblk_t *savemptail = NULL; 7123 int flags; 7124 uint_t old_sd_flag; 7125 int flg; 7126 int more = 0; 7127 int error = 0; 7128 char first = 1; 7129 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 7130 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 7131 unsigned char pri = 0; 7132 queue_t *q; 7133 int pr = 0; /* Partial read successful */ 7134 unsigned char type; 7135 7136 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER, 7137 "kstrgetmsg:%p", vp); 7138 7139 ASSERT(vp->v_stream); 7140 stp = vp->v_stream; 7141 rvp->r_val1 = 0; 7142 7143 mutex_enter(&stp->sd_lock); 7144 7145 if ((error = i_straccess(stp, JCREAD)) != 0) { 7146 mutex_exit(&stp->sd_lock); 7147 return (error); 7148 } 7149 7150 flags = *flagsp; 7151 if (stp->sd_flag & (STRDERR|STPLEX)) { 7152 if ((stp->sd_flag & STPLEX) || 7153 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) { 7154 error = strgeterr(stp, STRDERR|STPLEX, 7155 (flags & MSG_IPEEK)); 7156 if (error != 0) { 7157 mutex_exit(&stp->sd_lock); 7158 return (error); 7159 } 7160 } 7161 } 7162 mutex_exit(&stp->sd_lock); 7163 7164 switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) { 7165 case MSG_HIPRI: 7166 if (*prip != 0) 7167 return (EINVAL); 7168 break; 7169 7170 case MSG_ANY: 7171 case MSG_BAND: 7172 break; 7173 7174 default: 7175 return (EINVAL); 7176 } 7177 7178 retry: 7179 q = _RD(stp->sd_wrq); 7180 mutex_enter(&stp->sd_lock); 7181 old_sd_flag = stp->sd_flag; 7182 mark = 0; 7183 for (;;) { 7184 int done = 0; 7185 int waitflag; 7186 int fmode; 7187 mblk_t *q_first = q->q_first; 7188 7189 /* 7190 * This section of the code operates just like the code 7191 * in strgetmsg(). There is a comment there about what 7192 * is going on here. 7193 */ 7194 if (!(flags & (MSG_HIPRI|MSG_BAND))) { 7195 /* Asking for normal, band0 data */ 7196 bp = strget(stp, q, uiop, first, &error); 7197 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7198 if (bp != NULL) { 7199 if (DB_TYPE(bp) == M_SIG) { 7200 strsignal_nolock(stp, *bp->b_rptr, 7201 bp->b_band); 7202 freemsg(bp); 7203 continue; 7204 } else { 7205 break; 7206 } 7207 } 7208 if (error != 0) { 7209 goto getmout; 7210 } 7211 /* 7212 * We can't depend on the value of STRPRI here because 7213 * the stream head may be in transit. Therefore, we 7214 * must look at the type of the first message to 7215 * determine if a high priority messages is waiting 7216 */ 7217 } else if ((flags & MSG_HIPRI) && q_first != NULL && 7218 DB_TYPE(q_first) >= QPCTL && 7219 (bp = getq_noenab(q, 0)) != NULL) { 7220 ASSERT(DB_TYPE(bp) >= QPCTL); 7221 break; 7222 } else if ((flags & MSG_BAND) && q_first != NULL && 7223 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) && 7224 (bp = getq_noenab(q, 0)) != NULL) { 7225 /* 7226 * Asked for at least band "prip" and got either at 7227 * least that band or a hipri message. 7228 */ 7229 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL); 7230 if (DB_TYPE(bp) == M_SIG) { 7231 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7232 freemsg(bp); 7233 continue; 7234 } else { 7235 break; 7236 } 7237 } 7238 7239 /* No data. Time to sleep? */ 7240 qbackenable(q, 0); 7241 7242 /* 7243 * Delayed error notification? 7244 */ 7245 if ((stp->sd_flag & (STRDERR|STPLEX)) && 7246 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) { 7247 error = strgeterr(stp, STRDERR|STPLEX, 7248 (flags & MSG_IPEEK)); 7249 if (error != 0) { 7250 mutex_exit(&stp->sd_lock); 7251 return (error); 7252 } 7253 } 7254 7255 /* 7256 * If STRHUP or STREOF, return 0 length control and data. 7257 * If a read(fd,buf,0) has been done, do not sleep, just 7258 * return. 7259 * 7260 * If mctlp == NULL and uiop == NULL, then the code will 7261 * do the strwaitq. This is an understood way of saying 7262 * sleep "polling" until a message is received. 7263 */ 7264 if ((stp->sd_flag & (STRHUP|STREOF)) || 7265 (uiop != NULL && uiop->uio_resid == 0)) { 7266 if (mctlp != NULL) 7267 *mctlp = NULL; 7268 *flagsp = 0; 7269 mutex_exit(&stp->sd_lock); 7270 return (0); 7271 } 7272 7273 waitflag = GETWAIT; 7274 if (flags & 7275 (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) { 7276 if (flags & MSG_HOLDSIG) 7277 waitflag |= STR_NOSIG; 7278 if (flags & MSG_IGNERROR) 7279 waitflag |= STR_NOERROR; 7280 if (flags & MSG_IPEEK) 7281 waitflag |= STR_PEEK; 7282 if (flags & MSG_DELAYERROR) 7283 waitflag |= STR_DELAYERR; 7284 } 7285 if (uiop != NULL) 7286 fmode = uiop->uio_fmode; 7287 else 7288 fmode = 0; 7289 7290 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT, 7291 "kstrgetmsg calls strwaitq:%p, %p", 7292 vp, uiop); 7293 if (((error = strwaitq(stp, waitflag, (ssize_t)0, 7294 fmode, timout, &done))) != 0 || done) { 7295 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE, 7296 "kstrgetmsg error or done:%p, %p", 7297 vp, uiop); 7298 mutex_exit(&stp->sd_lock); 7299 return (error); 7300 } 7301 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE, 7302 "kstrgetmsg awakes:%p, %p", vp, uiop); 7303 if ((error = i_straccess(stp, JCREAD)) != 0) { 7304 mutex_exit(&stp->sd_lock); 7305 return (error); 7306 } 7307 first = 0; 7308 } 7309 ASSERT(bp != NULL); 7310 /* 7311 * Extract any mark information. If the message is not completely 7312 * consumed this information will be put in the mblk 7313 * that is putback. 7314 * If MSGMARKNEXT is set and the message is completely consumed 7315 * the STRATMARK flag will be set below. Likewise, if 7316 * MSGNOTMARKNEXT is set and the message is 7317 * completely consumed STRNOTATMARK will be set. 7318 */ 7319 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 7320 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 7321 (MSGMARKNEXT|MSGNOTMARKNEXT)); 7322 pri = bp->b_band; 7323 if (mark != 0) { 7324 /* 7325 * If the caller doesn't want the mark return. 7326 * Used to implement MSG_WAITALL in sockets. 7327 */ 7328 if (flags & MSG_NOMARK) { 7329 putback(stp, q, bp, pri); 7330 qbackenable(q, pri); 7331 mutex_exit(&stp->sd_lock); 7332 return (EWOULDBLOCK); 7333 } 7334 if (bp == stp->sd_mark) { 7335 mark |= _LASTMARK; 7336 stp->sd_mark = NULL; 7337 } 7338 } 7339 7340 /* 7341 * keep track of the first message type 7342 */ 7343 type = bp->b_datap->db_type; 7344 7345 if (bp->b_datap->db_type == M_PASSFP) { 7346 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7347 stp->sd_mark = bp; 7348 bp->b_flag |= mark & ~_LASTMARK; 7349 putback(stp, q, bp, pri); 7350 qbackenable(q, pri); 7351 mutex_exit(&stp->sd_lock); 7352 return (EBADMSG); 7353 } 7354 ASSERT(type != M_SIG); 7355 7356 if (flags & MSG_IPEEK) { 7357 /* 7358 * Clear any struioflag - we do the uiomove over again 7359 * when peeking since it simplifies the code. 7360 * 7361 * Dup the message and put the original back on the queue. 7362 * If dupmsg() fails, try again with copymsg() to see if 7363 * there is indeed a shortage of memory. dupmsg() may fail 7364 * if db_ref in any of the messages reaches its limit. 7365 */ 7366 7367 ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); 7368 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) { 7369 /* 7370 * Restore the state of the stream head since we 7371 * need to drop sd_lock (strwaitbuf is sleeping). 7372 */ 7373 size_t size = msgdsize(bp); 7374 7375 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7376 stp->sd_mark = bp; 7377 bp->b_flag |= mark & ~_LASTMARK; 7378 putback(stp, q, bp, pri); 7379 mutex_exit(&stp->sd_lock); 7380 error = strwaitbuf(size, BPRI_HI); 7381 if (error) { 7382 /* 7383 * There is no net change to the queue thus 7384 * no need to qbackenable. 7385 */ 7386 return (error); 7387 } 7388 goto retry; 7389 } 7390 7391 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7392 stp->sd_mark = bp; 7393 bp->b_flag |= mark & ~_LASTMARK; 7394 putback(stp, q, bp, pri); 7395 bp = nbp; 7396 } 7397 7398 /* 7399 * Set this flag so strrput will not generate signals. Need to 7400 * make sure this flag is cleared before leaving this routine 7401 * else signals will stop being sent. 7402 */ 7403 stp->sd_flag |= STRGETINPROG; 7404 mutex_exit(&stp->sd_lock); 7405 7406 if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) { 7407 mblk_t *tmp, *prevmp; 7408 7409 /* 7410 * Put first non-data mblk back to stream head and 7411 * cut the mblk chain so sd_rputdatafunc only sees 7412 * M_DATA mblks. We can skip the first mblk since it 7413 * is M_DATA according to the condition above. 7414 */ 7415 for (prevmp = bp, tmp = bp->b_cont; tmp != NULL; 7416 prevmp = tmp, tmp = tmp->b_cont) { 7417 if (DB_TYPE(tmp) != M_DATA) { 7418 prevmp->b_cont = NULL; 7419 mutex_enter(&stp->sd_lock); 7420 putback(stp, q, tmp, tmp->b_band); 7421 mutex_exit(&stp->sd_lock); 7422 break; 7423 } 7424 } 7425 7426 ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); 7427 bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp, 7428 NULL, NULL, NULL, NULL); 7429 7430 if (bp == NULL) 7431 goto retry; 7432 } 7433 7434 if (STREAM_NEEDSERVICE(stp)) 7435 stream_runservice(stp); 7436 7437 /* 7438 * Set HIPRI flag if message is priority. 7439 */ 7440 if (type >= QPCTL) 7441 flg = MSG_HIPRI; 7442 else 7443 flg = MSG_BAND; 7444 7445 /* 7446 * First process PROTO or PCPROTO blocks, if any. 7447 */ 7448 if (mctlp != NULL && type != M_DATA) { 7449 mblk_t *nbp; 7450 7451 *mctlp = bp; 7452 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA) 7453 bp = bp->b_cont; 7454 nbp = bp->b_cont; 7455 bp->b_cont = NULL; 7456 bp = nbp; 7457 } 7458 7459 if (bp && bp->b_datap->db_type != M_DATA) { 7460 /* 7461 * More PROTO blocks in msg. Will only happen if mctlp is NULL. 7462 */ 7463 more |= MORECTL; 7464 savemp = bp; 7465 while (bp && bp->b_datap->db_type != M_DATA) { 7466 savemptail = bp; 7467 bp = bp->b_cont; 7468 } 7469 savemptail->b_cont = NULL; 7470 } 7471 7472 /* 7473 * Now process DATA blocks, if any. 7474 */ 7475 if (uiop == NULL) { 7476 /* Append data to tail of mctlp */ 7477 7478 ASSERT(bp == NULL || !(bp->b_datap->db_flags & DBLK_UIOA)); 7479 if (mctlp != NULL) { 7480 mblk_t **mpp = mctlp; 7481 7482 while (*mpp != NULL) 7483 mpp = &((*mpp)->b_cont); 7484 *mpp = bp; 7485 bp = NULL; 7486 } 7487 } else if (bp && (bp->b_datap->db_flags & DBLK_UIOA)) { 7488 /* 7489 * A uioa mblk_t chain, as uio processing has already 7490 * been done we simple skip over processing. 7491 */ 7492 bp = NULL; 7493 pr = 0; 7494 7495 } else if (uiop->uio_resid >= 0 && bp) { 7496 size_t oldresid = uiop->uio_resid; 7497 7498 /* 7499 * If a streams message is likely to consist 7500 * of many small mblks, it is pulled up into 7501 * one continuous chunk of memory. 7502 * see longer comment at top of page 7503 * by mblk_pull_len declaration. 7504 */ 7505 7506 if (MBLKL(bp) < mblk_pull_len) { 7507 (void) pullupmsg(bp, -1); 7508 } 7509 7510 bp = struiocopyout(bp, uiop, &error); 7511 if (error != 0) { 7512 if (mctlp != NULL) { 7513 freemsg(*mctlp); 7514 *mctlp = NULL; 7515 } else 7516 freemsg(savemp); 7517 mutex_enter(&stp->sd_lock); 7518 /* 7519 * clear stream head hi pri flag based on 7520 * first message 7521 */ 7522 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7523 ASSERT(type == M_PCPROTO); 7524 stp->sd_flag &= ~STRPRI; 7525 } 7526 more = 0; 7527 goto getmout; 7528 } 7529 /* 7530 * (pr == 1) indicates a partial read. 7531 */ 7532 if (oldresid > uiop->uio_resid) 7533 pr = 1; 7534 } 7535 7536 if (bp) { /* more data blocks in msg */ 7537 more |= MOREDATA; 7538 if (savemp) 7539 savemptail->b_cont = bp; 7540 else 7541 savemp = bp; 7542 } 7543 7544 mutex_enter(&stp->sd_lock); 7545 if (savemp) { 7546 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) { 7547 /* 7548 * When MSG_DISCARDTAIL is set or 7549 * when peeking discard any tail. When peeking this 7550 * is the tail of the dup that was copied out - the 7551 * message has already been putback on the queue. 7552 * Return MOREDATA to the caller even though the data 7553 * is discarded. This is used by sockets (to 7554 * set MSG_TRUNC). 7555 */ 7556 freemsg(savemp); 7557 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7558 ASSERT(type == M_PCPROTO); 7559 stp->sd_flag &= ~STRPRI; 7560 } 7561 } else if (pr && (savemp->b_datap->db_type == M_DATA) && 7562 msgnodata(savemp)) { 7563 /* 7564 * Avoid queuing a zero-length tail part of 7565 * a message. pr=1 indicates that we read some of 7566 * the message. 7567 */ 7568 freemsg(savemp); 7569 more &= ~MOREDATA; 7570 if (type >= QPCTL) { 7571 ASSERT(type == M_PCPROTO); 7572 stp->sd_flag &= ~STRPRI; 7573 } 7574 } else { 7575 savemp->b_band = pri; 7576 /* 7577 * If the first message was HIPRI and the one we're 7578 * putting back isn't, then clear STRPRI, otherwise 7579 * set STRPRI again. Note that we must set STRPRI 7580 * again since the flush logic in strrput_nondata() 7581 * may have cleared it while we had sd_lock dropped. 7582 */ 7583 7584 ASSERT(!(savemp->b_datap->db_flags & DBLK_UIOA)); 7585 if (type >= QPCTL) { 7586 ASSERT(type == M_PCPROTO); 7587 if (queclass(savemp) < QPCTL) 7588 stp->sd_flag &= ~STRPRI; 7589 else 7590 stp->sd_flag |= STRPRI; 7591 } else if (queclass(savemp) >= QPCTL) { 7592 /* 7593 * The first message was not a HIPRI message, 7594 * but the one we are about to putback is. 7595 * For simplicitly, we do not allow for HIPRI 7596 * messages to be embedded in the message 7597 * body, so just force it to same type as 7598 * first message. 7599 */ 7600 ASSERT(type == M_DATA || type == M_PROTO); 7601 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 7602 savemp->b_datap->db_type = type; 7603 } 7604 if (mark != 0) { 7605 if ((mark & _LASTMARK) && 7606 (stp->sd_mark == NULL)) { 7607 /* 7608 * If another marked message arrived 7609 * while sd_lock was not held sd_mark 7610 * would be non-NULL. 7611 */ 7612 stp->sd_mark = savemp; 7613 } 7614 savemp->b_flag |= mark & ~_LASTMARK; 7615 } 7616 putback(stp, q, savemp, pri); 7617 } 7618 } else if (!(flags & MSG_IPEEK)) { 7619 /* 7620 * The complete message was consumed. 7621 * 7622 * If another M_PCPROTO arrived while sd_lock was not held 7623 * it would have been discarded since STRPRI was still set. 7624 * 7625 * Move the MSG*MARKNEXT information 7626 * to the stream head just in case 7627 * the read queue becomes empty. 7628 * clear stream head hi pri flag based on 7629 * first message 7630 * 7631 * If the stream head was at the mark 7632 * (STRATMARK) before we dropped sd_lock above 7633 * and some data was consumed then we have 7634 * moved past the mark thus STRATMARK is 7635 * cleared. However, if a message arrived in 7636 * strrput during the copyout above causing 7637 * STRATMARK to be set we can not clear that 7638 * flag. 7639 * XXX A "perimeter" would help by single-threading strrput, 7640 * strread, strgetmsg and kstrgetmsg. 7641 */ 7642 if (type >= QPCTL) { 7643 ASSERT(type == M_PCPROTO); 7644 stp->sd_flag &= ~STRPRI; 7645 } 7646 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 7647 if (mark & MSGMARKNEXT) { 7648 stp->sd_flag &= ~STRNOTATMARK; 7649 stp->sd_flag |= STRATMARK; 7650 } else if (mark & MSGNOTMARKNEXT) { 7651 stp->sd_flag &= ~STRATMARK; 7652 stp->sd_flag |= STRNOTATMARK; 7653 } else { 7654 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 7655 } 7656 } else if (pr && (old_sd_flag & STRATMARK)) { 7657 stp->sd_flag &= ~STRATMARK; 7658 } 7659 } 7660 7661 *flagsp = flg; 7662 *prip = pri; 7663 7664 /* 7665 * Getmsg cleanup processing - if the state of the queue has changed 7666 * some signals may need to be sent and/or poll awakened. 7667 */ 7668 getmout: 7669 qbackenable(q, pri); 7670 7671 /* 7672 * We dropped the stream head lock above. Send all M_SIG messages 7673 * before processing stream head for SIGPOLL messages. 7674 */ 7675 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7676 while ((bp = q->q_first) != NULL && 7677 (bp->b_datap->db_type == M_SIG)) { 7678 /* 7679 * sd_lock is held so the content of the read queue can not 7680 * change. 7681 */ 7682 bp = getq(q); 7683 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 7684 7685 strsignal_nolock(stp, *bp->b_rptr, bp->b_band); 7686 mutex_exit(&stp->sd_lock); 7687 freemsg(bp); 7688 if (STREAM_NEEDSERVICE(stp)) 7689 stream_runservice(stp); 7690 mutex_enter(&stp->sd_lock); 7691 } 7692 7693 /* 7694 * stream head cannot change while we make the determination 7695 * whether or not to send a signal. Drop the flag to allow strrput 7696 * to send firstmsgsigs again. 7697 */ 7698 stp->sd_flag &= ~STRGETINPROG; 7699 7700 /* 7701 * If the type of message at the front of the queue changed 7702 * due to the receive the appropriate signals and pollwakeup events 7703 * are generated. The type of changes are: 7704 * Processed a hipri message, q_first is not hipri. 7705 * Processed a band X message, and q_first is band Y. 7706 * The generated signals and pollwakeups are identical to what 7707 * strrput() generates should the message that is now on q_first 7708 * arrive to an empty read queue. 7709 * 7710 * Note: only strrput will send a signal for a hipri message. 7711 */ 7712 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7713 strsigset_t signals = 0; 7714 strpollset_t pollwakeups = 0; 7715 7716 if (flg & MSG_HIPRI) { 7717 /* 7718 * Removed a hipri message. Regular data at 7719 * the front of the queue. 7720 */ 7721 if (bp->b_band == 0) { 7722 signals = S_INPUT | S_RDNORM; 7723 pollwakeups = POLLIN | POLLRDNORM; 7724 } else { 7725 signals = S_INPUT | S_RDBAND; 7726 pollwakeups = POLLIN | POLLRDBAND; 7727 } 7728 } else if (pri != bp->b_band) { 7729 /* 7730 * The band is different for the new q_first. 7731 */ 7732 if (bp->b_band == 0) { 7733 signals = S_RDNORM; 7734 pollwakeups = POLLIN | POLLRDNORM; 7735 } else { 7736 signals = S_RDBAND; 7737 pollwakeups = POLLIN | POLLRDBAND; 7738 } 7739 } 7740 7741 if (pollwakeups != 0) { 7742 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7743 if (!(stp->sd_rput_opt & SR_POLLIN)) 7744 goto no_pollwake; 7745 stp->sd_rput_opt &= ~SR_POLLIN; 7746 } 7747 mutex_exit(&stp->sd_lock); 7748 pollwakeup(&stp->sd_pollist, pollwakeups); 7749 mutex_enter(&stp->sd_lock); 7750 } 7751 no_pollwake: 7752 7753 if (stp->sd_sigflags & signals) 7754 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7755 } 7756 mutex_exit(&stp->sd_lock); 7757 7758 rvp->r_val1 = more; 7759 return (error); 7760 #undef _LASTMARK 7761 } 7762 7763 /* 7764 * Put a message downstream. 7765 * 7766 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7767 */ 7768 int 7769 strputmsg( 7770 struct vnode *vp, 7771 struct strbuf *mctl, 7772 struct strbuf *mdata, 7773 unsigned char pri, 7774 int flag, 7775 int fmode) 7776 { 7777 struct stdata *stp; 7778 queue_t *wqp; 7779 mblk_t *mp; 7780 ssize_t msgsize; 7781 ssize_t rmin, rmax; 7782 int error; 7783 struct uio uios; 7784 struct uio *uiop = &uios; 7785 struct iovec iovs; 7786 int xpg4 = 0; 7787 7788 ASSERT(vp->v_stream); 7789 stp = vp->v_stream; 7790 wqp = stp->sd_wrq; 7791 7792 /* 7793 * If it is an XPG4 application, we need to send 7794 * SIGPIPE below 7795 */ 7796 7797 xpg4 = (flag & MSG_XPG4) ? 1 : 0; 7798 flag &= ~MSG_XPG4; 7799 7800 if (audit_active) 7801 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode); 7802 7803 mutex_enter(&stp->sd_lock); 7804 7805 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7806 mutex_exit(&stp->sd_lock); 7807 return (error); 7808 } 7809 7810 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7811 error = strwriteable(stp, B_FALSE, xpg4); 7812 if (error != 0) { 7813 mutex_exit(&stp->sd_lock); 7814 return (error); 7815 } 7816 } 7817 7818 mutex_exit(&stp->sd_lock); 7819 7820 /* 7821 * Check for legal flag value. 7822 */ 7823 switch (flag) { 7824 case MSG_HIPRI: 7825 if ((mctl->len < 0) || (pri != 0)) 7826 return (EINVAL); 7827 break; 7828 case MSG_BAND: 7829 break; 7830 7831 default: 7832 return (EINVAL); 7833 } 7834 7835 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN, 7836 "strputmsg in:stp %p", stp); 7837 7838 /* get these values from those cached in the stream head */ 7839 rmin = stp->sd_qn_minpsz; 7840 rmax = stp->sd_qn_maxpsz; 7841 7842 /* 7843 * Make sure ctl and data sizes together fall within the 7844 * limits of the max and min receive packet sizes and do 7845 * not exceed system limit. 7846 */ 7847 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 7848 if (rmax == 0) { 7849 return (ERANGE); 7850 } 7851 /* 7852 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 7853 * Needed to prevent partial failures in the strmakedata loop. 7854 */ 7855 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 7856 rmax = stp->sd_maxblk; 7857 7858 if ((msgsize = mdata->len) < 0) { 7859 msgsize = 0; 7860 rmin = 0; /* no range check for NULL data part */ 7861 } 7862 if ((msgsize < rmin) || 7863 ((msgsize > rmax) && (rmax != INFPSZ)) || 7864 (mctl->len > strctlsz)) { 7865 return (ERANGE); 7866 } 7867 7868 /* 7869 * Setup uio and iov for data part 7870 */ 7871 iovs.iov_base = mdata->buf; 7872 iovs.iov_len = msgsize; 7873 uios.uio_iov = &iovs; 7874 uios.uio_iovcnt = 1; 7875 uios.uio_loffset = 0; 7876 uios.uio_segflg = UIO_USERSPACE; 7877 uios.uio_fmode = fmode; 7878 uios.uio_extflg = UIO_COPY_DEFAULT; 7879 uios.uio_resid = msgsize; 7880 uios.uio_offset = 0; 7881 7882 /* Ignore flow control in strput for HIPRI */ 7883 if (flag & MSG_HIPRI) 7884 flag |= MSG_IGNFLOW; 7885 7886 for (;;) { 7887 int done = 0; 7888 7889 /* 7890 * strput will always free the ctl mblk - even when strput 7891 * fails. 7892 */ 7893 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) { 7894 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7895 "strputmsg out:stp %p out %d error %d", 7896 stp, 1, error); 7897 return (error); 7898 } 7899 /* 7900 * Verify that the whole message can be transferred by 7901 * strput. 7902 */ 7903 ASSERT(stp->sd_maxblk == INFPSZ || 7904 stp->sd_maxblk >= mdata->len); 7905 7906 msgsize = mdata->len; 7907 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 7908 mdata->len = msgsize; 7909 7910 if (error == 0) 7911 break; 7912 7913 if (error != EWOULDBLOCK) 7914 goto out; 7915 7916 mutex_enter(&stp->sd_lock); 7917 /* 7918 * Check for a missed wakeup. 7919 * Needed since strput did not hold sd_lock across 7920 * the canputnext. 7921 */ 7922 if (bcanputnext(wqp, pri)) { 7923 /* Try again */ 7924 mutex_exit(&stp->sd_lock); 7925 continue; 7926 } 7927 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT, 7928 "strputmsg wait:stp %p waits pri %d", stp, pri); 7929 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1, 7930 &done)) != 0) || done) { 7931 mutex_exit(&stp->sd_lock); 7932 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7933 "strputmsg out:q %p out %d error %d", 7934 stp, 0, error); 7935 return (error); 7936 } 7937 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE, 7938 "strputmsg wake:stp %p wakes", stp); 7939 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7940 mutex_exit(&stp->sd_lock); 7941 return (error); 7942 } 7943 mutex_exit(&stp->sd_lock); 7944 } 7945 out: 7946 /* 7947 * For historic reasons, applications expect EAGAIN 7948 * when data mblk could not be allocated. so change 7949 * ENOMEM back to EAGAIN 7950 */ 7951 if (error == ENOMEM) 7952 error = EAGAIN; 7953 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7954 "strputmsg out:stp %p out %d error %d", stp, 2, error); 7955 return (error); 7956 } 7957 7958 /* 7959 * Put a message downstream. 7960 * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop. 7961 * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio 7962 * and the fmode parameter. 7963 * 7964 * This routine handles the consolidation private flags: 7965 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7966 * MSG_HOLDSIG Hold signals while waiting for data. 7967 * MSG_IGNFLOW Don't check streams flow control. 7968 * 7969 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7970 */ 7971 int 7972 kstrputmsg( 7973 struct vnode *vp, 7974 mblk_t *mctl, 7975 struct uio *uiop, 7976 ssize_t msgsize, 7977 unsigned char pri, 7978 int flag, 7979 int fmode) 7980 { 7981 struct stdata *stp; 7982 queue_t *wqp; 7983 ssize_t rmin, rmax; 7984 int error; 7985 7986 ASSERT(vp->v_stream); 7987 stp = vp->v_stream; 7988 wqp = stp->sd_wrq; 7989 if (audit_active) 7990 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode); 7991 if (mctl == NULL) 7992 return (EINVAL); 7993 7994 mutex_enter(&stp->sd_lock); 7995 7996 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7997 mutex_exit(&stp->sd_lock); 7998 freemsg(mctl); 7999 return (error); 8000 } 8001 8002 if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) { 8003 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 8004 error = strwriteable(stp, B_FALSE, B_TRUE); 8005 if (error != 0) { 8006 mutex_exit(&stp->sd_lock); 8007 freemsg(mctl); 8008 return (error); 8009 } 8010 } 8011 } 8012 8013 mutex_exit(&stp->sd_lock); 8014 8015 /* 8016 * Check for legal flag value. 8017 */ 8018 switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) { 8019 case MSG_HIPRI: 8020 if (pri != 0) { 8021 freemsg(mctl); 8022 return (EINVAL); 8023 } 8024 break; 8025 case MSG_BAND: 8026 break; 8027 default: 8028 freemsg(mctl); 8029 return (EINVAL); 8030 } 8031 8032 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN, 8033 "kstrputmsg in:stp %p", stp); 8034 8035 /* get these values from those cached in the stream head */ 8036 rmin = stp->sd_qn_minpsz; 8037 rmax = stp->sd_qn_maxpsz; 8038 8039 /* 8040 * Make sure ctl and data sizes together fall within the 8041 * limits of the max and min receive packet sizes and do 8042 * not exceed system limit. 8043 */ 8044 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 8045 if (rmax == 0) { 8046 freemsg(mctl); 8047 return (ERANGE); 8048 } 8049 /* 8050 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 8051 * Needed to prevent partial failures in the strmakedata loop. 8052 */ 8053 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 8054 rmax = stp->sd_maxblk; 8055 8056 if (uiop == NULL) { 8057 msgsize = -1; 8058 rmin = -1; /* no range check for NULL data part */ 8059 } else { 8060 /* Use uio flags as well as the fmode parameter flags */ 8061 fmode |= uiop->uio_fmode; 8062 8063 if ((msgsize < rmin) || 8064 ((msgsize > rmax) && (rmax != INFPSZ))) { 8065 freemsg(mctl); 8066 return (ERANGE); 8067 } 8068 } 8069 8070 /* Ignore flow control in strput for HIPRI */ 8071 if (flag & MSG_HIPRI) 8072 flag |= MSG_IGNFLOW; 8073 8074 for (;;) { 8075 int done = 0; 8076 int waitflag; 8077 mblk_t *mp; 8078 8079 /* 8080 * strput will always free the ctl mblk - even when strput 8081 * fails. If MSG_IGNFLOW is set then any error returned 8082 * will cause us to break the loop, so we don't need a copy 8083 * of the message. If MSG_IGNFLOW is not set, then we can 8084 * get hit by flow control and be forced to try again. In 8085 * this case we need to have a copy of the message. We 8086 * do this using copymsg since the message may get modified 8087 * by something below us. 8088 * 8089 * We've observed that many TPI providers do not check db_ref 8090 * on the control messages but blindly reuse them for the 8091 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more 8092 * friendly to such providers than using dupmsg. Also, note 8093 * that sockfs uses MSG_IGNFLOW for all TPI control messages. 8094 * Only data messages are subject to flow control, hence 8095 * subject to this copymsg. 8096 */ 8097 if (flag & MSG_IGNFLOW) { 8098 mp = mctl; 8099 mctl = NULL; 8100 } else { 8101 do { 8102 /* 8103 * If a message has a free pointer, the message 8104 * must be dupmsg to maintain this pointer. 8105 * Code using this facility must be sure 8106 * that modules below will not change the 8107 * contents of the dblk without checking db_ref 8108 * first. If db_ref is > 1, then the module 8109 * needs to do a copymsg first. Otherwise, 8110 * the contents of the dblk may become 8111 * inconsistent because the freesmg/freeb below 8112 * may end up calling atomic_add_32_nv. 8113 * The atomic_add_32_nv in freeb (accessing 8114 * all of db_ref, db_type, db_flags, and 8115 * db_struioflag) does not prevent other threads 8116 * from concurrently trying to modify e.g. 8117 * db_type. 8118 */ 8119 if (mctl->b_datap->db_frtnp != NULL) 8120 mp = dupmsg(mctl); 8121 else 8122 mp = copymsg(mctl); 8123 8124 if (mp != NULL) 8125 break; 8126 8127 error = strwaitbuf(msgdsize(mctl), BPRI_MED); 8128 if (error) { 8129 freemsg(mctl); 8130 return (error); 8131 } 8132 } while (mp == NULL); 8133 } 8134 /* 8135 * Verify that all of msgsize can be transferred by 8136 * strput. 8137 */ 8138 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize); 8139 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 8140 if (error == 0) 8141 break; 8142 8143 if (error != EWOULDBLOCK) 8144 goto out; 8145 8146 /* 8147 * IF MSG_IGNFLOW is set we should have broken out of loop 8148 * above. 8149 */ 8150 ASSERT(!(flag & MSG_IGNFLOW)); 8151 mutex_enter(&stp->sd_lock); 8152 /* 8153 * Check for a missed wakeup. 8154 * Needed since strput did not hold sd_lock across 8155 * the canputnext. 8156 */ 8157 if (bcanputnext(wqp, pri)) { 8158 /* Try again */ 8159 mutex_exit(&stp->sd_lock); 8160 continue; 8161 } 8162 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT, 8163 "kstrputmsg wait:stp %p waits pri %d", stp, pri); 8164 8165 waitflag = WRITEWAIT; 8166 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) { 8167 if (flag & MSG_HOLDSIG) 8168 waitflag |= STR_NOSIG; 8169 if (flag & MSG_IGNERROR) 8170 waitflag |= STR_NOERROR; 8171 } 8172 if (((error = strwaitq(stp, waitflag, 8173 (ssize_t)0, fmode, -1, &done)) != 0) || done) { 8174 mutex_exit(&stp->sd_lock); 8175 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8176 "kstrputmsg out:stp %p out %d error %d", 8177 stp, 0, error); 8178 freemsg(mctl); 8179 return (error); 8180 } 8181 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE, 8182 "kstrputmsg wake:stp %p wakes", stp); 8183 if ((error = i_straccess(stp, JCWRITE)) != 0) { 8184 mutex_exit(&stp->sd_lock); 8185 freemsg(mctl); 8186 return (error); 8187 } 8188 mutex_exit(&stp->sd_lock); 8189 } 8190 out: 8191 freemsg(mctl); 8192 /* 8193 * For historic reasons, applications expect EAGAIN 8194 * when data mblk could not be allocated. so change 8195 * ENOMEM back to EAGAIN 8196 */ 8197 if (error == ENOMEM) 8198 error = EAGAIN; 8199 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8200 "kstrputmsg out:stp %p out %d error %d", stp, 2, error); 8201 return (error); 8202 } 8203 8204 /* 8205 * Determines whether the necessary conditions are set on a stream 8206 * for it to be readable, writeable, or have exceptions. 8207 * 8208 * strpoll handles the consolidation private events: 8209 * POLLNOERR Do not return POLLERR even if there are stream 8210 * head errors. 8211 * Used by sockfs. 8212 * POLLRDDATA Do not return POLLIN unless at least one message on 8213 * the queue contains one or more M_DATA mblks. Thus 8214 * when this flag is set a queue with only 8215 * M_PROTO/M_PCPROTO mblks does not return POLLIN. 8216 * Used by sockfs to ignore T_EXDATA_IND messages. 8217 * 8218 * Note: POLLRDDATA assumes that synch streams only return messages with 8219 * an M_DATA attached (i.e. not messages consisting of only 8220 * an M_PROTO/M_PCPROTO part). 8221 */ 8222 int 8223 strpoll( 8224 struct stdata *stp, 8225 short events_arg, 8226 int anyyet, 8227 short *reventsp, 8228 struct pollhead **phpp) 8229 { 8230 int events = (ushort_t)events_arg; 8231 int retevents = 0; 8232 mblk_t *mp; 8233 qband_t *qbp; 8234 long sd_flags = stp->sd_flag; 8235 int headlocked = 0; 8236 8237 /* 8238 * For performance, a single 'if' tests for most possible edge 8239 * conditions in one shot 8240 */ 8241 if (sd_flags & (STPLEX | STRDERR | STWRERR)) { 8242 if (sd_flags & STPLEX) { 8243 *reventsp = POLLNVAL; 8244 return (EINVAL); 8245 } 8246 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) && 8247 (sd_flags & STRDERR)) || 8248 ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) && 8249 (sd_flags & STWRERR))) { 8250 if (!(events & POLLNOERR)) { 8251 *reventsp = POLLERR; 8252 return (0); 8253 } 8254 } 8255 } 8256 if (sd_flags & STRHUP) { 8257 retevents |= POLLHUP; 8258 } else if (events & (POLLWRNORM | POLLWRBAND)) { 8259 queue_t *tq; 8260 queue_t *qp = stp->sd_wrq; 8261 8262 claimstr(qp); 8263 /* Find next module forward that has a service procedure */ 8264 tq = qp->q_next->q_nfsrv; 8265 ASSERT(tq != NULL); 8266 8267 polllock(&stp->sd_pollist, QLOCK(tq)); 8268 if (events & POLLWRNORM) { 8269 queue_t *sqp; 8270 8271 if (tq->q_flag & QFULL) 8272 /* ensure backq svc procedure runs */ 8273 tq->q_flag |= QWANTW; 8274 else if ((sqp = stp->sd_struiowrq) != NULL) { 8275 /* Check sync stream barrier write q */ 8276 mutex_exit(QLOCK(tq)); 8277 polllock(&stp->sd_pollist, QLOCK(sqp)); 8278 if (sqp->q_flag & QFULL) 8279 /* ensure pollwakeup() is done */ 8280 sqp->q_flag |= QWANTWSYNC; 8281 else 8282 retevents |= POLLOUT; 8283 /* More write events to process ??? */ 8284 if (! (events & POLLWRBAND)) { 8285 mutex_exit(QLOCK(sqp)); 8286 releasestr(qp); 8287 goto chkrd; 8288 } 8289 mutex_exit(QLOCK(sqp)); 8290 polllock(&stp->sd_pollist, QLOCK(tq)); 8291 } else 8292 retevents |= POLLOUT; 8293 } 8294 if (events & POLLWRBAND) { 8295 qbp = tq->q_bandp; 8296 if (qbp) { 8297 while (qbp) { 8298 if (qbp->qb_flag & QB_FULL) 8299 qbp->qb_flag |= QB_WANTW; 8300 else 8301 retevents |= POLLWRBAND; 8302 qbp = qbp->qb_next; 8303 } 8304 } else { 8305 retevents |= POLLWRBAND; 8306 } 8307 } 8308 mutex_exit(QLOCK(tq)); 8309 releasestr(qp); 8310 } 8311 chkrd: 8312 if (sd_flags & STRPRI) { 8313 retevents |= (events & POLLPRI); 8314 } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) { 8315 queue_t *qp = _RD(stp->sd_wrq); 8316 int normevents = (events & (POLLIN | POLLRDNORM)); 8317 8318 /* 8319 * Note: Need to do polllock() here since ps_lock may be 8320 * held. See bug 4191544. 8321 */ 8322 polllock(&stp->sd_pollist, &stp->sd_lock); 8323 headlocked = 1; 8324 mp = qp->q_first; 8325 while (mp) { 8326 /* 8327 * For POLLRDDATA we scan b_cont and b_next until we 8328 * find an M_DATA. 8329 */ 8330 if ((events & POLLRDDATA) && 8331 mp->b_datap->db_type != M_DATA) { 8332 mblk_t *nmp = mp->b_cont; 8333 8334 while (nmp != NULL && 8335 nmp->b_datap->db_type != M_DATA) 8336 nmp = nmp->b_cont; 8337 if (nmp == NULL) { 8338 mp = mp->b_next; 8339 continue; 8340 } 8341 } 8342 if (mp->b_band == 0) 8343 retevents |= normevents; 8344 else 8345 retevents |= (events & (POLLIN | POLLRDBAND)); 8346 break; 8347 } 8348 if (! (retevents & normevents) && 8349 (stp->sd_wakeq & RSLEEP)) { 8350 /* 8351 * Sync stream barrier read queue has data. 8352 */ 8353 retevents |= normevents; 8354 } 8355 /* Treat eof as normal data */ 8356 if (sd_flags & STREOF) 8357 retevents |= normevents; 8358 } 8359 8360 *reventsp = (short)retevents; 8361 if (retevents) { 8362 if (headlocked) 8363 mutex_exit(&stp->sd_lock); 8364 return (0); 8365 } 8366 8367 /* 8368 * If poll() has not found any events yet, set up event cell 8369 * to wake up the poll if a requested event occurs on this 8370 * stream. Check for collisions with outstanding poll requests. 8371 */ 8372 if (!anyyet) { 8373 *phpp = &stp->sd_pollist; 8374 if (headlocked == 0) { 8375 polllock(&stp->sd_pollist, &stp->sd_lock); 8376 headlocked = 1; 8377 } 8378 stp->sd_rput_opt |= SR_POLLIN; 8379 } 8380 if (headlocked) 8381 mutex_exit(&stp->sd_lock); 8382 return (0); 8383 } 8384 8385 /* 8386 * The purpose of putback() is to assure sleeping polls/reads 8387 * are awakened when there are no new messages arriving at the, 8388 * stream head, and a message is placed back on the read queue. 8389 * 8390 * sd_lock must be held when messages are placed back on stream 8391 * head. (getq() holds sd_lock when it removes messages from 8392 * the queue) 8393 */ 8394 8395 static void 8396 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band) 8397 { 8398 mblk_t *qfirst; 8399 ASSERT(MUTEX_HELD(&stp->sd_lock)); 8400 8401 /* 8402 * As a result of lock-step ordering around q_lock and sd_lock, 8403 * it's possible for function calls like putnext() and 8404 * canputnext() to get an inaccurate picture of how much 8405 * data is really being processed at the stream head. 8406 * We only consolidate with existing messages on the queue 8407 * if the length of the message we want to put back is smaller 8408 * than the queue hiwater mark. 8409 */ 8410 if ((stp->sd_rput_opt & SR_CONSOL_DATA) && 8411 (DB_TYPE(bp) == M_DATA) && ((qfirst = q->q_first) != NULL) && 8412 (DB_TYPE(qfirst) == M_DATA) && 8413 ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0) && 8414 ((bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT)) == 0) && 8415 (mp_cont_len(bp, NULL) < q->q_hiwat)) { 8416 /* 8417 * We use the same logic as defined in strrput() 8418 * but in reverse as we are putting back onto the 8419 * queue and want to retain byte ordering. 8420 * Consolidate M_DATA messages with M_DATA ONLY. 8421 * strrput() allows the consolidation of M_DATA onto 8422 * M_PROTO | M_PCPROTO but not the other way round. 8423 * 8424 * The consolidation does not take place if the message 8425 * we are returning to the queue is marked with either 8426 * of the marks or the delim flag or if q_first 8427 * is marked with MSGMARK. The MSGMARK check is needed to 8428 * handle the odd semantics of MSGMARK where essentially 8429 * the whole message is to be treated as marked. 8430 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first 8431 * to the front of the b_cont chain. 8432 */ 8433 rmvq_noenab(q, qfirst); 8434 8435 /* 8436 * The first message in the b_cont list 8437 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 8438 * We need to handle the case where we 8439 * are appending: 8440 * 8441 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 8442 * 2) a MSGMARKNEXT to a plain message. 8443 * 3) a MSGNOTMARKNEXT to a plain message 8444 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 8445 * message. 8446 * 8447 * Thus we never append a MSGMARKNEXT or 8448 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 8449 */ 8450 if (qfirst->b_flag & MSGMARKNEXT) { 8451 bp->b_flag |= MSGMARKNEXT; 8452 bp->b_flag &= ~MSGNOTMARKNEXT; 8453 qfirst->b_flag &= ~MSGMARKNEXT; 8454 } else if (qfirst->b_flag & MSGNOTMARKNEXT) { 8455 bp->b_flag |= MSGNOTMARKNEXT; 8456 qfirst->b_flag &= ~MSGNOTMARKNEXT; 8457 } 8458 8459 linkb(bp, qfirst); 8460 } 8461 (void) putbq(q, bp); 8462 8463 /* 8464 * A message may have come in when the sd_lock was dropped in the 8465 * calling routine. If this is the case and STR*ATMARK info was 8466 * received, need to move that from the stream head to the q_last 8467 * so that SIOCATMARK can return the proper value. 8468 */ 8469 if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) { 8470 unsigned short *flagp = &q->q_last->b_flag; 8471 uint_t b_flag = (uint_t)*flagp; 8472 8473 if (stp->sd_flag & STRATMARK) { 8474 b_flag &= ~MSGNOTMARKNEXT; 8475 b_flag |= MSGMARKNEXT; 8476 stp->sd_flag &= ~STRATMARK; 8477 } else { 8478 b_flag &= ~MSGMARKNEXT; 8479 b_flag |= MSGNOTMARKNEXT; 8480 stp->sd_flag &= ~STRNOTATMARK; 8481 } 8482 *flagp = (unsigned short) b_flag; 8483 } 8484 8485 #ifdef DEBUG 8486 /* 8487 * Make sure that the flags are not messed up. 8488 */ 8489 { 8490 mblk_t *mp; 8491 mp = q->q_last; 8492 while (mp != NULL) { 8493 ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 8494 (MSGMARKNEXT|MSGNOTMARKNEXT)); 8495 mp = mp->b_cont; 8496 } 8497 } 8498 #endif 8499 if (q->q_first == bp) { 8500 short pollevents; 8501 8502 if (stp->sd_flag & RSLEEP) { 8503 stp->sd_flag &= ~RSLEEP; 8504 cv_broadcast(&q->q_wait); 8505 } 8506 if (stp->sd_flag & STRPRI) { 8507 pollevents = POLLPRI; 8508 } else { 8509 if (band == 0) { 8510 if (!(stp->sd_rput_opt & SR_POLLIN)) 8511 return; 8512 stp->sd_rput_opt &= ~SR_POLLIN; 8513 pollevents = POLLIN | POLLRDNORM; 8514 } else { 8515 pollevents = POLLIN | POLLRDBAND; 8516 } 8517 } 8518 mutex_exit(&stp->sd_lock); 8519 pollwakeup(&stp->sd_pollist, pollevents); 8520 mutex_enter(&stp->sd_lock); 8521 } 8522 } 8523 8524 /* 8525 * Return the held vnode attached to the stream head of a 8526 * given queue 8527 * It is the responsibility of the calling routine to ensure 8528 * that the queue does not go away (e.g. pop). 8529 */ 8530 vnode_t * 8531 strq2vp(queue_t *qp) 8532 { 8533 vnode_t *vp; 8534 vp = STREAM(qp)->sd_vnode; 8535 ASSERT(vp != NULL); 8536 VN_HOLD(vp); 8537 return (vp); 8538 } 8539 8540 /* 8541 * return the stream head write queue for the given vp 8542 * It is the responsibility of the calling routine to ensure 8543 * that the stream or vnode do not close. 8544 */ 8545 queue_t * 8546 strvp2wq(vnode_t *vp) 8547 { 8548 ASSERT(vp->v_stream != NULL); 8549 return (vp->v_stream->sd_wrq); 8550 } 8551 8552 /* 8553 * pollwakeup stream head 8554 * It is the responsibility of the calling routine to ensure 8555 * that the stream or vnode do not close. 8556 */ 8557 void 8558 strpollwakeup(vnode_t *vp, short event) 8559 { 8560 ASSERT(vp->v_stream); 8561 pollwakeup(&vp->v_stream->sd_pollist, event); 8562 } 8563 8564 /* 8565 * Mate the stream heads of two vnodes together. If the two vnodes are the 8566 * same, we just make the write-side point at the read-side -- otherwise, 8567 * we do a full mate. Only works on vnodes associated with streams that are 8568 * still being built and thus have only a stream head. 8569 */ 8570 void 8571 strmate(vnode_t *vp1, vnode_t *vp2) 8572 { 8573 queue_t *wrq1 = strvp2wq(vp1); 8574 queue_t *wrq2 = strvp2wq(vp2); 8575 8576 /* 8577 * Verify that there are no modules on the stream yet. We also 8578 * rely on the stream head always having a service procedure to 8579 * avoid tweaking q_nfsrv. 8580 */ 8581 ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL); 8582 ASSERT(wrq1->q_qinfo->qi_srvp != NULL); 8583 ASSERT(wrq2->q_qinfo->qi_srvp != NULL); 8584 8585 /* 8586 * If the queues are the same, just twist; otherwise do a full mate. 8587 */ 8588 if (wrq1 == wrq2) { 8589 wrq1->q_next = _RD(wrq1); 8590 } else { 8591 wrq1->q_next = _RD(wrq2); 8592 wrq2->q_next = _RD(wrq1); 8593 STREAM(wrq1)->sd_mate = STREAM(wrq2); 8594 STREAM(wrq1)->sd_flag |= STRMATE; 8595 STREAM(wrq2)->sd_mate = STREAM(wrq1); 8596 STREAM(wrq2)->sd_flag |= STRMATE; 8597 } 8598 } 8599 8600 /* 8601 * XXX will go away when console is correctly fixed. 8602 * Clean up the console PIDS, from previous I_SETSIG, 8603 * called only for cnopen which never calls strclean(). 8604 */ 8605 void 8606 str_cn_clean(struct vnode *vp) 8607 { 8608 strsig_t *ssp, *pssp, *tssp; 8609 struct stdata *stp; 8610 struct pid *pidp; 8611 int update = 0; 8612 8613 ASSERT(vp->v_stream); 8614 stp = vp->v_stream; 8615 pssp = NULL; 8616 mutex_enter(&stp->sd_lock); 8617 ssp = stp->sd_siglist; 8618 while (ssp) { 8619 mutex_enter(&pidlock); 8620 pidp = ssp->ss_pidp; 8621 /* 8622 * Get rid of PID if the proc is gone. 8623 */ 8624 if (pidp->pid_prinactive) { 8625 tssp = ssp->ss_next; 8626 if (pssp) 8627 pssp->ss_next = tssp; 8628 else 8629 stp->sd_siglist = tssp; 8630 ASSERT(pidp->pid_ref <= 1); 8631 PID_RELE(ssp->ss_pidp); 8632 mutex_exit(&pidlock); 8633 kmem_free(ssp, sizeof (strsig_t)); 8634 update = 1; 8635 ssp = tssp; 8636 continue; 8637 } else 8638 mutex_exit(&pidlock); 8639 pssp = ssp; 8640 ssp = ssp->ss_next; 8641 } 8642 if (update) { 8643 stp->sd_sigflags = 0; 8644 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 8645 stp->sd_sigflags |= ssp->ss_events; 8646 } 8647 mutex_exit(&stp->sd_lock); 8648 } 8649 8650 /* 8651 * Return B_TRUE if there is data in the message, B_FALSE otherwise. 8652 */ 8653 static boolean_t 8654 msghasdata(mblk_t *bp) 8655 { 8656 for (; bp; bp = bp->b_cont) 8657 if (bp->b_datap->db_type == M_DATA) { 8658 ASSERT(bp->b_wptr >= bp->b_rptr); 8659 if (bp->b_wptr > bp->b_rptr) 8660 return (B_TRUE); 8661 } 8662 return (B_FALSE); 8663 } 8664 8665 /* 8666 * Called on the first strget() of a sodirect/uioa enabled streamhead, 8667 * if any mblk_t(s) enqueued they must first be uioamove()d before uioa 8668 * can be enabled for the underlying transport's use. 8669 */ 8670 void 8671 struioainit(queue_t *q, sodirect_t *sodp, uio_t *uiop) 8672 { 8673 uioa_t *uioap = (uioa_t *)uiop; 8674 mblk_t *bp; 8675 mblk_t *lbp = NULL; 8676 mblk_t *wbp; 8677 int len; 8678 int error; 8679 8680 ASSERT(MUTEX_HELD(sodp->sod_lockp)); 8681 ASSERT(&sodp->sod_uioa == uioap); 8682 8683 /* 8684 * Walk first b_cont chain in sod_q 8685 * and schedule any M_DATA mblk_t's for uio asynchronous move. 8686 */ 8687 mutex_enter(QLOCK(q)); 8688 if ((bp = q->q_first) == NULL) { 8689 mutex_exit(QLOCK(q)); 8690 return; 8691 } 8692 /* Walk the chain */ 8693 wbp = bp; 8694 do { 8695 if (wbp->b_datap->db_type != M_DATA) { 8696 /* Not M_DATA, no more uioa */ 8697 goto nouioa; 8698 } 8699 if ((len = wbp->b_wptr - wbp->b_rptr) > 0) { 8700 /* Have a M_DATA mblk_t with data */ 8701 if (len > uioap->uio_resid) { 8702 /* Not enough uio sapce */ 8703 goto nouioa; 8704 } 8705 ASSERT(!(wbp->b_datap->db_flags & DBLK_UIOA)); 8706 error = uioamove(wbp->b_rptr, len, 8707 UIO_READ, uioap); 8708 if (!error) { 8709 /* Scheduled, mark dblk_t as such */ 8710 wbp->b_datap->db_flags |= DBLK_UIOA; 8711 } else { 8712 /* Break the mblk chain */ 8713 goto nouioa; 8714 } 8715 } 8716 /* Save last wbp processed */ 8717 lbp = wbp; 8718 } while ((wbp = wbp->b_cont) != NULL); 8719 8720 mutex_exit(QLOCK(q)); 8721 return; 8722 8723 nouioa: 8724 /* No more uioa */ 8725 uioap->uioa_state &= UIOA_CLR; 8726 uioap->uioa_state |= UIOA_FINI; 8727 8728 /* 8729 * If we processed 1 or more mblk_t(s) then we need to split the 8730 * current mblk_t chain in 2 so that all the uioamove()ed mblk_t(s) 8731 * are in the current chain and the rest are in the following new 8732 * chain. 8733 */ 8734 if (lbp != NULL) { 8735 /* New end of current chain */ 8736 lbp->b_cont = NULL; 8737 8738 /* Insert new chain wbp after bp */ 8739 if ((wbp->b_next = bp->b_next) != NULL) 8740 bp->b_next->b_prev = wbp; 8741 else 8742 q->q_last = wbp; 8743 wbp->b_prev = bp; 8744 bp->b_next = wbp; 8745 } 8746 mutex_exit(QLOCK(q)); 8747 } 8748