1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/sysmacros.h> 34 #include <sys/param.h> 35 #include <sys/errno.h> 36 #include <sys/signal.h> 37 #include <sys/stat.h> 38 #include <sys/proc.h> 39 #include <sys/cred.h> 40 #include <sys/user.h> 41 #include <sys/vnode.h> 42 #include <sys/file.h> 43 #include <sys/stream.h> 44 #include <sys/strsubr.h> 45 #include <sys/stropts.h> 46 #include <sys/tihdr.h> 47 #include <sys/var.h> 48 #include <sys/poll.h> 49 #include <sys/termio.h> 50 #include <sys/ttold.h> 51 #include <sys/systm.h> 52 #include <sys/uio.h> 53 #include <sys/cmn_err.h> 54 #include <sys/sad.h> 55 #include <sys/netstack.h> 56 #include <sys/priocntl.h> 57 #include <sys/jioctl.h> 58 #include <sys/procset.h> 59 #include <sys/session.h> 60 #include <sys/kmem.h> 61 #include <sys/filio.h> 62 #include <sys/vtrace.h> 63 #include <sys/debug.h> 64 #include <sys/strredir.h> 65 #include <sys/fs/fifonode.h> 66 #include <sys/fs/snode.h> 67 #include <sys/strlog.h> 68 #include <sys/strsun.h> 69 #include <sys/project.h> 70 #include <sys/kbio.h> 71 #include <sys/msio.h> 72 #include <sys/tty.h> 73 #include <sys/ptyvar.h> 74 #include <sys/vuid_event.h> 75 #include <sys/modctl.h> 76 #include <sys/sunddi.h> 77 #include <sys/sunldi_impl.h> 78 #include <sys/autoconf.h> 79 #include <sys/policy.h> 80 #include <sys/dld.h> 81 #include <sys/zone.h> 82 83 /* 84 * This define helps improve the readability of streams code while 85 * still maintaining a very old streams performance enhancement. The 86 * performance enhancement basically involved having all callers 87 * of straccess() perform the first check that straccess() will do 88 * locally before actually calling straccess(). (There by reducing 89 * the number of unnecessary calls to straccess().) 90 */ 91 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \ 92 (stp->sd_vnode->v_type == VFIFO) ? 0 : \ 93 straccess((x), (y))) 94 95 /* 96 * what is mblk_pull_len? 97 * 98 * If a streams message consists of many short messages, 99 * a performance degradation occurs from copyout overhead. 100 * To decrease the per mblk overhead, messages that are 101 * likely to consist of many small mblks are pulled up into 102 * one continuous chunk of memory. 103 * 104 * To avoid the processing overhead of examining every 105 * mblk, a quick heuristic is used. If the first mblk in 106 * the message is shorter than mblk_pull_len, it is likely 107 * that the rest of the mblk will be short. 108 * 109 * This heuristic was decided upon after performance tests 110 * indicated that anything more complex slowed down the main 111 * code path. 112 */ 113 #define MBLK_PULL_LEN 64 114 uint32_t mblk_pull_len = MBLK_PULL_LEN; 115 116 /* 117 * The sgttyb_handling flag controls the handling of the old BSD 118 * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows: 119 * 120 * 0 - Emit no warnings at all and retain old, broken behavior. 121 * 1 - Emit no warnings and silently handle new semantics. 122 * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used 123 * (once per system invocation). Handle with new semantics. 124 * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is 125 * made (so that offenders drop core and are easy to debug). 126 * 127 * The "new semantics" are that TIOCGETP returns B38400 for 128 * sg_[io]speed if the corresponding value is over B38400, and that 129 * TIOCSET[PN] accept B38400 in these cases to mean "retain current 130 * bit rate." 131 */ 132 int sgttyb_handling = 1; 133 static boolean_t sgttyb_complaint; 134 135 /* don't push drcompat module by default on Style-2 streams */ 136 static int push_drcompat = 0; 137 138 /* 139 * id value used to distinguish between different ioctl messages 140 */ 141 static uint32_t ioc_id; 142 143 static void putback(struct stdata *, queue_t *, mblk_t *, int); 144 static void strcleanall(struct vnode *); 145 static int strwsrv(queue_t *); 146 static int strdocmd(struct stdata *, struct strcmd *, cred_t *); 147 static void struioainit(queue_t *, sodirect_t *, uio_t *); 148 149 /* 150 * qinit and module_info structures for stream head read and write queues 151 */ 152 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW }; 153 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 }; 154 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info }; 155 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info }; 156 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT, 157 FIFOLOWAT }; 158 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 }; 159 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info }; 160 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info }; 161 162 extern kmutex_t strresources; /* protects global resources */ 163 extern kmutex_t muxifier; /* single-threads multiplexor creation */ 164 165 static boolean_t msghasdata(mblk_t *bp); 166 #define msgnodata(bp) (!msghasdata(bp)) 167 168 /* 169 * Stream head locking notes: 170 * There are four monitors associated with the stream head: 171 * 1. v_stream monitor: in stropen() and strclose() v_lock 172 * is held while the association of vnode and stream 173 * head is established or tested for. 174 * 2. open/close/push/pop monitor: sd_lock is held while each 175 * thread bids for exclusive access to this monitor 176 * for opening or closing a stream. In addition, this 177 * monitor is entered during pushes and pops. This 178 * guarantees that during plumbing operations there 179 * is only one thread trying to change the plumbing. 180 * Any other threads present in the stream are only 181 * using the plumbing. 182 * 3. read/write monitor: in the case of read, a thread holds 183 * sd_lock while trying to get data from the stream 184 * head queue. if there is none to fulfill a read 185 * request, it sets RSLEEP and calls cv_wait_sig() down 186 * in strwaitq() to await the arrival of new data. 187 * when new data arrives in strrput(), sd_lock is acquired 188 * before testing for RSLEEP and calling cv_broadcast(). 189 * the behavior of strwrite(), strwsrv(), and WSLEEP 190 * mirror this. 191 * 4. ioctl monitor: sd_lock is gotten to ensure that only one 192 * thread is doing an ioctl at a time. 193 * 194 * Note, for sodirect case 3. is extended to (*sodirect_t.sod_enqueue)() 195 * call-back from below, further the sodirect support is for code paths 196 * called via kstgetmsg(), all other code paths ASSERT() that sodirect 197 * uioa generated mblk_t's (i.e. DBLK_UIOA) aren't processed. 198 */ 199 200 static int 201 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name, 202 int anchor, cred_t *crp, uint_t anchor_zoneid) 203 { 204 int error; 205 fmodsw_impl_t *fp; 206 207 if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) { 208 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO; 209 return (error); 210 } 211 if (stp->sd_pushcnt >= nstrpush) { 212 return (EINVAL); 213 } 214 215 if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) { 216 stp->sd_flag |= STREOPENFAIL; 217 return (EINVAL); 218 } 219 220 /* 221 * push new module and call its open routine via qattach 222 */ 223 if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0) 224 return (error); 225 226 /* 227 * Check to see if caller wants a STREAMS anchor 228 * put at this place in the stream, and add if so. 229 */ 230 mutex_enter(&stp->sd_lock); 231 if (anchor == stp->sd_pushcnt) { 232 stp->sd_anchor = stp->sd_pushcnt; 233 stp->sd_anchorzone = anchor_zoneid; 234 } 235 mutex_exit(&stp->sd_lock); 236 237 return (0); 238 } 239 240 /* 241 * Open a stream device. 242 */ 243 int 244 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp) 245 { 246 struct stdata *stp; 247 queue_t *qp; 248 int s; 249 dev_t dummydev, savedev; 250 struct autopush *ap; 251 struct dlautopush dlap; 252 int error = 0; 253 ssize_t rmin, rmax; 254 int cloneopen; 255 queue_t *brq; 256 major_t major; 257 str_stack_t *ss; 258 zoneid_t zoneid; 259 uint_t anchor; 260 261 if (audit_active) 262 audit_stropen(vp, devp, flag, crp); 263 264 /* 265 * If the stream already exists, wait for any open in progress 266 * to complete, then call the open function of each module and 267 * driver in the stream. Otherwise create the stream. 268 */ 269 TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp); 270 retry: 271 mutex_enter(&vp->v_lock); 272 if ((stp = vp->v_stream) != NULL) { 273 274 /* 275 * Waiting for stream to be created to device 276 * due to another open. 277 */ 278 mutex_exit(&vp->v_lock); 279 280 if (STRMATED(stp)) { 281 struct stdata *strmatep = stp->sd_mate; 282 283 STRLOCKMATES(stp); 284 if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 285 if (flag & (FNDELAY|FNONBLOCK)) { 286 error = EAGAIN; 287 mutex_exit(&strmatep->sd_lock); 288 goto ckreturn; 289 } 290 mutex_exit(&stp->sd_lock); 291 if (!cv_wait_sig(&strmatep->sd_monitor, 292 &strmatep->sd_lock)) { 293 error = EINTR; 294 mutex_exit(&strmatep->sd_lock); 295 mutex_enter(&stp->sd_lock); 296 goto ckreturn; 297 } 298 mutex_exit(&strmatep->sd_lock); 299 goto retry; 300 } 301 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 302 if (flag & (FNDELAY|FNONBLOCK)) { 303 error = EAGAIN; 304 mutex_exit(&strmatep->sd_lock); 305 goto ckreturn; 306 } 307 mutex_exit(&strmatep->sd_lock); 308 if (!cv_wait_sig(&stp->sd_monitor, 309 &stp->sd_lock)) { 310 error = EINTR; 311 goto ckreturn; 312 } 313 mutex_exit(&stp->sd_lock); 314 goto retry; 315 } 316 317 if (stp->sd_flag & (STRDERR|STWRERR)) { 318 error = EIO; 319 mutex_exit(&strmatep->sd_lock); 320 goto ckreturn; 321 } 322 323 stp->sd_flag |= STWOPEN; 324 STRUNLOCKMATES(stp); 325 } else { 326 mutex_enter(&stp->sd_lock); 327 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 328 if (flag & (FNDELAY|FNONBLOCK)) { 329 error = EAGAIN; 330 goto ckreturn; 331 } 332 if (!cv_wait_sig(&stp->sd_monitor, 333 &stp->sd_lock)) { 334 error = EINTR; 335 goto ckreturn; 336 } 337 mutex_exit(&stp->sd_lock); 338 goto retry; /* could be clone! */ 339 } 340 341 if (stp->sd_flag & (STRDERR|STWRERR)) { 342 error = EIO; 343 goto ckreturn; 344 } 345 346 stp->sd_flag |= STWOPEN; 347 mutex_exit(&stp->sd_lock); 348 } 349 350 /* 351 * Open all modules and devices down stream to notify 352 * that another user is streaming. For modules, set the 353 * last argument to MODOPEN and do not pass any open flags. 354 * Ignore dummydev since this is not the first open. 355 */ 356 claimstr(stp->sd_wrq); 357 qp = stp->sd_wrq; 358 while (_SAMESTR(qp)) { 359 qp = qp->q_next; 360 if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0) 361 break; 362 } 363 releasestr(stp->sd_wrq); 364 mutex_enter(&stp->sd_lock); 365 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR); 366 stp->sd_rerror = 0; 367 stp->sd_werror = 0; 368 ckreturn: 369 cv_broadcast(&stp->sd_monitor); 370 mutex_exit(&stp->sd_lock); 371 return (error); 372 } 373 374 /* 375 * This vnode isn't streaming. SPECFS already 376 * checked for multiple vnodes pointing to the 377 * same stream, so create a stream to the driver. 378 */ 379 qp = allocq(); 380 stp = shalloc(qp); 381 382 /* 383 * Initialize stream head. shalloc() has given us 384 * exclusive access, and we have the vnode locked; 385 * we can do whatever we want with stp. 386 */ 387 stp->sd_flag = STWOPEN; 388 stp->sd_siglist = NULL; 389 stp->sd_pollist.ph_list = NULL; 390 stp->sd_sigflags = 0; 391 stp->sd_mark = NULL; 392 stp->sd_closetime = STRTIMOUT; 393 stp->sd_sidp = NULL; 394 stp->sd_pgidp = NULL; 395 stp->sd_vnode = vp; 396 stp->sd_rerror = 0; 397 stp->sd_werror = 0; 398 stp->sd_wroff = 0; 399 stp->sd_tail = 0; 400 stp->sd_iocblk = NULL; 401 stp->sd_cmdblk = NULL; 402 stp->sd_pushcnt = 0; 403 stp->sd_qn_minpsz = 0; 404 stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */ 405 stp->sd_maxblk = INFPSZ; 406 stp->sd_sodirect = NULL; 407 qp->q_ptr = _WR(qp)->q_ptr = stp; 408 STREAM(qp) = STREAM(_WR(qp)) = stp; 409 vp->v_stream = stp; 410 mutex_exit(&vp->v_lock); 411 if (vp->v_type == VFIFO) { 412 stp->sd_flag |= OLDNDELAY; 413 /* 414 * This means, both for pipes and fifos 415 * strwrite will send SIGPIPE if the other 416 * end is closed. For putmsg it depends 417 * on whether it is a XPG4_2 application 418 * or not 419 */ 420 stp->sd_wput_opt = SW_SIGPIPE; 421 422 /* setq might sleep in kmem_alloc - avoid holding locks. */ 423 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE, 424 SQ_CI|SQ_CO, B_FALSE); 425 426 set_qend(qp); 427 stp->sd_strtab = fifo_getinfo(); 428 _WR(qp)->q_nfsrv = _WR(qp); 429 qp->q_nfsrv = qp; 430 /* 431 * Wake up others that are waiting for stream to be created. 432 */ 433 mutex_enter(&stp->sd_lock); 434 /* 435 * nothing is be pushed on stream yet, so 436 * optimized stream head packetsizes are just that 437 * of the read queue 438 */ 439 stp->sd_qn_minpsz = qp->q_minpsz; 440 stp->sd_qn_maxpsz = qp->q_maxpsz; 441 stp->sd_flag &= ~STWOPEN; 442 goto fifo_opendone; 443 } 444 /* setq might sleep in kmem_alloc - avoid holding locks. */ 445 setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE); 446 447 set_qend(qp); 448 449 /* 450 * Open driver and create stream to it (via qattach). 451 */ 452 savedev = *devp; 453 cloneopen = (getmajor(*devp) == clone_major); 454 if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) { 455 mutex_enter(&vp->v_lock); 456 vp->v_stream = NULL; 457 mutex_exit(&vp->v_lock); 458 mutex_enter(&stp->sd_lock); 459 cv_broadcast(&stp->sd_monitor); 460 mutex_exit(&stp->sd_lock); 461 freeq(_RD(qp)); 462 shfree(stp); 463 return (error); 464 } 465 /* 466 * Set sd_strtab after open in order to handle clonable drivers 467 */ 468 stp->sd_strtab = STREAMSTAB(getmajor(*devp)); 469 470 /* 471 * Historical note: dummydev used to be be prior to the initial 472 * open (via qattach above), which made the value seen 473 * inconsistent between an I_PUSH and an autopush of a module. 474 */ 475 dummydev = *devp; 476 477 /* 478 * For clone open of old style (Q not associated) network driver, 479 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH 480 */ 481 brq = _RD(_WR(qp)->q_next); 482 major = getmajor(*devp); 483 if (push_drcompat && cloneopen && NETWORK_DRV(major) && 484 ((brq->q_flag & _QASSOCIATED) == 0)) { 485 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0) 486 cmn_err(CE_WARN, "cannot push " DRMODNAME 487 " streams module"); 488 } 489 490 if (!NETWORK_DRV(major)) { 491 savedev = *devp; 492 } else { 493 /* 494 * For network devices, process differently based on the 495 * return value from dld_autopush(): 496 * 497 * 0: the passed-in device points to a GLDv3 datalink with 498 * per-link autopush configuration; use that configuration 499 * and ignore any per-driver autopush configuration. 500 * 501 * 1: the passed-in device points to a physical GLDv3 502 * datalink without per-link autopush configuration. The 503 * passed in device was changed to refer to the actual 504 * physical device (if it's not already); we use that new 505 * device to look up any per-driver autopush configuration. 506 * 507 * -1: neither of the above cases applied; use the initial 508 * device to look up any per-driver autopush configuration. 509 */ 510 switch (dld_autopush(&savedev, &dlap)) { 511 case 0: 512 zoneid = crgetzoneid(crp); 513 for (s = 0; s < dlap.dap_npush; s++) { 514 error = push_mod(qp, &dummydev, stp, 515 dlap.dap_aplist[s], dlap.dap_anchor, crp, 516 zoneid); 517 if (error != 0) 518 break; 519 } 520 goto opendone; 521 case 1: 522 break; 523 case -1: 524 savedev = *devp; 525 break; 526 } 527 } 528 /* 529 * Find the autopush configuration based on "savedev". Start with the 530 * global zone. If not found check in the local zone. 531 */ 532 zoneid = GLOBAL_ZONEID; 533 retryap: 534 ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))-> 535 netstack_str; 536 if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) { 537 netstack_rele(ss->ss_netstack); 538 if (zoneid == GLOBAL_ZONEID) { 539 /* 540 * None found. Also look in the zone's autopush table. 541 */ 542 zoneid = crgetzoneid(crp); 543 if (zoneid != GLOBAL_ZONEID) 544 goto retryap; 545 } 546 goto opendone; 547 } 548 anchor = ap->ap_anchor; 549 zoneid = crgetzoneid(crp); 550 for (s = 0; s < ap->ap_npush; s++) { 551 error = push_mod(qp, &dummydev, stp, ap->ap_list[s], 552 anchor, crp, zoneid); 553 if (error != 0) 554 break; 555 } 556 sad_ap_rele(ap, ss); 557 netstack_rele(ss->ss_netstack); 558 559 opendone: 560 561 /* 562 * let specfs know that open failed part way through 563 */ 564 if (error) { 565 mutex_enter(&stp->sd_lock); 566 stp->sd_flag |= STREOPENFAIL; 567 mutex_exit(&stp->sd_lock); 568 } 569 570 /* 571 * Wake up others that are waiting for stream to be created. 572 */ 573 mutex_enter(&stp->sd_lock); 574 stp->sd_flag &= ~STWOPEN; 575 576 /* 577 * As a performance concern we are caching the values of 578 * q_minpsz and q_maxpsz of the module below the stream 579 * head in the stream head. 580 */ 581 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 582 rmin = stp->sd_wrq->q_next->q_minpsz; 583 rmax = stp->sd_wrq->q_next->q_maxpsz; 584 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 585 586 /* do this processing here as a performance concern */ 587 if (strmsgsz != 0) { 588 if (rmax == INFPSZ) 589 rmax = strmsgsz; 590 else 591 rmax = MIN(strmsgsz, rmax); 592 } 593 594 mutex_enter(QLOCK(stp->sd_wrq)); 595 stp->sd_qn_minpsz = rmin; 596 stp->sd_qn_maxpsz = rmax; 597 mutex_exit(QLOCK(stp->sd_wrq)); 598 599 fifo_opendone: 600 cv_broadcast(&stp->sd_monitor); 601 mutex_exit(&stp->sd_lock); 602 return (error); 603 } 604 605 static int strsink(queue_t *, mblk_t *); 606 static struct qinit deadrend = { 607 strsink, NULL, NULL, NULL, NULL, &strm_info, NULL 608 }; 609 static struct qinit deadwend = { 610 NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL 611 }; 612 613 /* 614 * Close a stream. 615 * This is called from closef() on the last close of an open stream. 616 * Strclean() will already have removed the siglist and pollist 617 * information, so all that remains is to remove all multiplexor links 618 * for the stream, pop all the modules (and the driver), and free the 619 * stream structure. 620 */ 621 622 int 623 strclose(struct vnode *vp, int flag, cred_t *crp) 624 { 625 struct stdata *stp; 626 queue_t *qp; 627 int rval; 628 int freestp = 1; 629 queue_t *rmq; 630 631 if (audit_active) 632 audit_strclose(vp, flag, crp); 633 634 TRACE_1(TR_FAC_STREAMS_FR, 635 TR_STRCLOSE, "strclose:%p", vp); 636 ASSERT(vp->v_stream); 637 638 stp = vp->v_stream; 639 ASSERT(!(stp->sd_flag & STPLEX)); 640 qp = stp->sd_wrq; 641 642 /* 643 * Needed so that strpoll will return non-zero for this fd. 644 * Note that with POLLNOERR STRHUP does still cause POLLHUP. 645 */ 646 mutex_enter(&stp->sd_lock); 647 stp->sd_flag |= STRHUP; 648 mutex_exit(&stp->sd_lock); 649 650 /* 651 * If the registered process or process group did not have an 652 * open instance of this stream then strclean would not be 653 * called. Thus at the time of closing all remaining siglist entries 654 * are removed. 655 */ 656 if (stp->sd_siglist != NULL) 657 strcleanall(vp); 658 659 ASSERT(stp->sd_siglist == NULL); 660 ASSERT(stp->sd_sigflags == 0); 661 662 if (STRMATED(stp)) { 663 struct stdata *strmatep = stp->sd_mate; 664 int waited = 1; 665 666 STRLOCKMATES(stp); 667 while (waited) { 668 waited = 0; 669 while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 670 mutex_exit(&strmatep->sd_lock); 671 cv_wait(&stp->sd_monitor, &stp->sd_lock); 672 mutex_exit(&stp->sd_lock); 673 STRLOCKMATES(stp); 674 waited = 1; 675 } 676 while (strmatep->sd_flag & 677 (STWOPEN|STRCLOSE|STRPLUMB)) { 678 mutex_exit(&stp->sd_lock); 679 cv_wait(&strmatep->sd_monitor, 680 &strmatep->sd_lock); 681 mutex_exit(&strmatep->sd_lock); 682 STRLOCKMATES(stp); 683 waited = 1; 684 } 685 } 686 stp->sd_flag |= STRCLOSE; 687 STRUNLOCKMATES(stp); 688 } else { 689 mutex_enter(&stp->sd_lock); 690 stp->sd_flag |= STRCLOSE; 691 mutex_exit(&stp->sd_lock); 692 } 693 694 ASSERT(qp->q_first == NULL); /* No more delayed write */ 695 696 /* Check if an I_LINK was ever done on this stream */ 697 if (stp->sd_flag & STRHASLINKS) { 698 netstack_t *ns; 699 str_stack_t *ss; 700 701 ns = netstack_find_by_cred(crp); 702 ASSERT(ns != NULL); 703 ss = ns->netstack_str; 704 ASSERT(ss != NULL); 705 706 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss); 707 netstack_rele(ss->ss_netstack); 708 } 709 710 while (_SAMESTR(qp)) { 711 /* 712 * Holding sd_lock prevents q_next from changing in 713 * this stream. 714 */ 715 mutex_enter(&stp->sd_lock); 716 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) { 717 718 /* 719 * sleep until awakened by strwsrv() or timeout 720 */ 721 for (;;) { 722 mutex_enter(QLOCK(qp->q_next)); 723 if (!(qp->q_next->q_mblkcnt)) { 724 mutex_exit(QLOCK(qp->q_next)); 725 break; 726 } 727 stp->sd_flag |= WSLEEP; 728 729 /* ensure strwsrv gets enabled */ 730 qp->q_next->q_flag |= QWANTW; 731 mutex_exit(QLOCK(qp->q_next)); 732 /* get out if we timed out or recv'd a signal */ 733 if (str_cv_wait(&qp->q_wait, &stp->sd_lock, 734 stp->sd_closetime, 0) <= 0) { 735 break; 736 } 737 } 738 stp->sd_flag &= ~WSLEEP; 739 } 740 mutex_exit(&stp->sd_lock); 741 742 rmq = qp->q_next; 743 if (rmq->q_flag & QISDRV) { 744 ASSERT(!_SAMESTR(rmq)); 745 wait_sq_svc(_RD(qp)->q_syncq); 746 } 747 748 qdetach(_RD(rmq), 1, flag, crp, B_FALSE); 749 } 750 751 /* 752 * Since we call pollwakeup in close() now, the poll list should 753 * be empty in most cases. The only exception is the layered devices 754 * (e.g. the console drivers with redirection modules pushed on top 755 * of it). We have to do this after calling qdetach() because 756 * the redirection module won't have torn down the console 757 * redirection until after qdetach() has been invoked. 758 */ 759 if (stp->sd_pollist.ph_list != NULL) { 760 pollwakeup(&stp->sd_pollist, POLLERR); 761 pollhead_clean(&stp->sd_pollist); 762 } 763 ASSERT(stp->sd_pollist.ph_list == NULL); 764 ASSERT(stp->sd_sidp == NULL); 765 ASSERT(stp->sd_pgidp == NULL); 766 767 /* Prevent qenable from re-enabling the stream head queue */ 768 disable_svc(_RD(qp)); 769 770 /* 771 * Wait until service procedure of each queue is 772 * run, if QINSERVICE is set. 773 */ 774 wait_svc(_RD(qp)); 775 776 /* 777 * Now, flush both queues. 778 */ 779 flushq(_RD(qp), FLUSHALL); 780 flushq(qp, FLUSHALL); 781 782 /* 783 * If the write queue of the stream head is pointing to a 784 * read queue, we have a twisted stream. If the read queue 785 * is alive, convert the stream head queues into a dead end. 786 * If the read queue is dead, free the dead pair. 787 */ 788 if (qp->q_next && !_SAMESTR(qp)) { 789 if (qp->q_next->q_qinfo == &deadrend) { /* half-closed pipe */ 790 flushq(qp->q_next, FLUSHALL); /* ensure no message */ 791 shfree(qp->q_next->q_stream); 792 freeq(qp->q_next); 793 freeq(_RD(qp)); 794 } else if (qp->q_next == _RD(qp)) { /* fifo */ 795 freeq(_RD(qp)); 796 } else { /* pipe */ 797 freestp = 0; 798 /* 799 * The q_info pointers are never accessed when 800 * SQLOCK is held. 801 */ 802 ASSERT(qp->q_syncq == _RD(qp)->q_syncq); 803 mutex_enter(SQLOCK(qp->q_syncq)); 804 qp->q_qinfo = &deadwend; 805 _RD(qp)->q_qinfo = &deadrend; 806 mutex_exit(SQLOCK(qp->q_syncq)); 807 } 808 } else { 809 freeq(_RD(qp)); /* free stream head queue pair */ 810 } 811 812 mutex_enter(&vp->v_lock); 813 if (stp->sd_iocblk) { 814 if (stp->sd_iocblk != (mblk_t *)-1) { 815 freemsg(stp->sd_iocblk); 816 } 817 stp->sd_iocblk = NULL; 818 } 819 stp->sd_vnode = NULL; 820 vp->v_stream = NULL; 821 mutex_exit(&vp->v_lock); 822 mutex_enter(&stp->sd_lock); 823 freemsg(stp->sd_cmdblk); 824 stp->sd_cmdblk = NULL; 825 stp->sd_flag &= ~STRCLOSE; 826 cv_broadcast(&stp->sd_monitor); 827 mutex_exit(&stp->sd_lock); 828 829 if (freestp) 830 shfree(stp); 831 return (0); 832 } 833 834 static int 835 strsink(queue_t *q, mblk_t *bp) 836 { 837 struct copyresp *resp; 838 839 switch (bp->b_datap->db_type) { 840 case M_FLUSH: 841 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 842 *bp->b_rptr &= ~FLUSHR; 843 bp->b_flag |= MSGNOLOOP; 844 /* 845 * Protect against the driver passing up 846 * messages after it has done a qprocsoff. 847 */ 848 if (_OTHERQ(q)->q_next == NULL) 849 freemsg(bp); 850 else 851 qreply(q, bp); 852 } else { 853 freemsg(bp); 854 } 855 break; 856 857 case M_COPYIN: 858 case M_COPYOUT: 859 if (bp->b_cont) { 860 freemsg(bp->b_cont); 861 bp->b_cont = NULL; 862 } 863 bp->b_datap->db_type = M_IOCDATA; 864 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 865 resp = (struct copyresp *)bp->b_rptr; 866 resp->cp_rval = (caddr_t)1; /* failure */ 867 /* 868 * Protect against the driver passing up 869 * messages after it has done a qprocsoff. 870 */ 871 if (_OTHERQ(q)->q_next == NULL) 872 freemsg(bp); 873 else 874 qreply(q, bp); 875 break; 876 877 case M_IOCTL: 878 if (bp->b_cont) { 879 freemsg(bp->b_cont); 880 bp->b_cont = NULL; 881 } 882 bp->b_datap->db_type = M_IOCNAK; 883 /* 884 * Protect against the driver passing up 885 * messages after it has done a qprocsoff. 886 */ 887 if (_OTHERQ(q)->q_next == NULL) 888 freemsg(bp); 889 else 890 qreply(q, bp); 891 break; 892 893 default: 894 freemsg(bp); 895 break; 896 } 897 898 return (0); 899 } 900 901 /* 902 * Clean up after a process when it closes a stream. This is called 903 * from closef for all closes, whereas strclose is called only for the 904 * last close on a stream. The siglist is scanned for entries for the 905 * current process, and these are removed. 906 */ 907 void 908 strclean(struct vnode *vp) 909 { 910 strsig_t *ssp, *pssp, *tssp; 911 stdata_t *stp; 912 int update = 0; 913 914 TRACE_1(TR_FAC_STREAMS_FR, 915 TR_STRCLEAN, "strclean:%p", vp); 916 stp = vp->v_stream; 917 pssp = NULL; 918 mutex_enter(&stp->sd_lock); 919 ssp = stp->sd_siglist; 920 while (ssp) { 921 if (ssp->ss_pidp == curproc->p_pidp) { 922 tssp = ssp->ss_next; 923 if (pssp) 924 pssp->ss_next = tssp; 925 else 926 stp->sd_siglist = tssp; 927 mutex_enter(&pidlock); 928 PID_RELE(ssp->ss_pidp); 929 mutex_exit(&pidlock); 930 kmem_free(ssp, sizeof (strsig_t)); 931 update = 1; 932 ssp = tssp; 933 } else { 934 pssp = ssp; 935 ssp = ssp->ss_next; 936 } 937 } 938 if (update) { 939 stp->sd_sigflags = 0; 940 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 941 stp->sd_sigflags |= ssp->ss_events; 942 } 943 mutex_exit(&stp->sd_lock); 944 } 945 946 /* 947 * Used on the last close to remove any remaining items on the siglist. 948 * These could be present on the siglist due to I_ESETSIG calls that 949 * use process groups or processed that do not have an open file descriptor 950 * for this stream (Such entries would not be removed by strclean). 951 */ 952 static void 953 strcleanall(struct vnode *vp) 954 { 955 strsig_t *ssp, *nssp; 956 stdata_t *stp; 957 958 stp = vp->v_stream; 959 mutex_enter(&stp->sd_lock); 960 ssp = stp->sd_siglist; 961 stp->sd_siglist = NULL; 962 while (ssp) { 963 nssp = ssp->ss_next; 964 mutex_enter(&pidlock); 965 PID_RELE(ssp->ss_pidp); 966 mutex_exit(&pidlock); 967 kmem_free(ssp, sizeof (strsig_t)); 968 ssp = nssp; 969 } 970 stp->sd_sigflags = 0; 971 mutex_exit(&stp->sd_lock); 972 } 973 974 /* 975 * Retrieve the next message from the logical stream head read queue 976 * using either rwnext (if sync stream) or getq_noenab. 977 * It is the callers responsibility to call qbackenable after 978 * it is finished with the message. The caller should not call 979 * qbackenable until after any putback calls to avoid spurious backenabling. 980 * 981 * Also, handle uioa initialization and process any DBLK_UIOA flaged messages. 982 */ 983 mblk_t * 984 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first, 985 int *errorp) 986 { 987 sodirect_t *sodp = stp->sd_sodirect; 988 mblk_t *bp; 989 int error; 990 ssize_t rbytes = 0; 991 992 /* Holding sd_lock prevents the read queue from changing */ 993 ASSERT(MUTEX_HELD(&stp->sd_lock)); 994 995 if (uiop != NULL && stp->sd_struiordq != NULL && 996 q->q_first == NULL && 997 (!first || (stp->sd_wakeq & RSLEEP))) { 998 /* 999 * Stream supports rwnext() for the read side. 1000 * If this is the first time we're called by e.g. strread 1001 * only do the downcall if there is a deferred wakeup 1002 * (registered in sd_wakeq). 1003 */ 1004 struiod_t uiod; 1005 1006 if (first) 1007 stp->sd_wakeq &= ~RSLEEP; 1008 1009 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 1010 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 1011 uiod.d_mp = 0; 1012 /* 1013 * Mark that a thread is in rwnext on the read side 1014 * to prevent strrput from nacking ioctls immediately. 1015 * When the last concurrent rwnext returns 1016 * the ioctls are nack'ed. 1017 */ 1018 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1019 stp->sd_struiodnak++; 1020 /* 1021 * Note: rwnext will drop sd_lock. 1022 */ 1023 error = rwnext(q, &uiod); 1024 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 1025 mutex_enter(&stp->sd_lock); 1026 stp->sd_struiodnak--; 1027 while (stp->sd_struiodnak == 0 && 1028 ((bp = stp->sd_struionak) != NULL)) { 1029 stp->sd_struionak = bp->b_next; 1030 bp->b_next = NULL; 1031 bp->b_datap->db_type = M_IOCNAK; 1032 /* 1033 * Protect against the driver passing up 1034 * messages after it has done a qprocsoff. 1035 */ 1036 if (_OTHERQ(q)->q_next == NULL) 1037 freemsg(bp); 1038 else { 1039 mutex_exit(&stp->sd_lock); 1040 qreply(q, bp); 1041 mutex_enter(&stp->sd_lock); 1042 } 1043 } 1044 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1045 if (error == 0 || error == EWOULDBLOCK) { 1046 if ((bp = uiod.d_mp) != NULL) { 1047 *errorp = 0; 1048 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1049 return (bp); 1050 } 1051 error = 0; 1052 } else if (error == EINVAL) { 1053 /* 1054 * The stream plumbing must have 1055 * changed while we were away, so 1056 * just turn off rwnext()s. 1057 */ 1058 error = 0; 1059 } else if (error == EBUSY) { 1060 /* 1061 * The module might have data in transit using putnext 1062 * Fall back on waiting + getq. 1063 */ 1064 error = 0; 1065 } else { 1066 *errorp = error; 1067 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1068 return (NULL); 1069 } 1070 /* 1071 * Try a getq in case a rwnext() generated mblk 1072 * has bubbled up via strrput(). 1073 */ 1074 } 1075 *errorp = 0; 1076 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1077 1078 if (sodp != NULL && (sodp->sod_state & SOD_ENABLED) && 1079 (sodp->sod_uioa.uioa_state & UIOA_INIT)) { 1080 /* 1081 * First kstrgetmsg() call for an uioa_t so if any 1082 * queued mblk_t's need to consume them before uioa 1083 * from below can occur. 1084 */ 1085 sodp->sod_uioa.uioa_state &= UIOA_CLR; 1086 sodp->sod_uioa.uioa_state |= UIOA_ENABLED; 1087 if (q->q_first != NULL) { 1088 struioainit(q, sodp, uiop); 1089 } 1090 } else { 1091 /* 1092 * If we have a valid uio, try and use this as a guide for how 1093 * many bytes to retrieve from the queue via getq_noenab(). 1094 * Doing this can avoid unneccesary counting of overlong 1095 * messages in putback(). We currently only do this for sockets 1096 * and only if there is no sd_rputdatafunc hook. 1097 * 1098 * The sd_rputdatafunc hook transforms the entire message 1099 * before any bytes in it can be given to a client. So, rbytes 1100 * must be 0 if there is a hook. 1101 */ 1102 if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) && 1103 (stp->sd_rputdatafunc == NULL)) 1104 rbytes = uiop->uio_resid; 1105 } 1106 1107 bp = getq_noenab(q, rbytes); 1108 if (bp != NULL && (bp->b_datap->db_flags & DBLK_UIOA)) { 1109 /* 1110 * A uioa flaged mblk_t chain, already uio processed, 1111 * add it to the sodirect uioa pending free list. 1112 * 1113 * Note, a b_cont chain headed by a DBLK_UIOA enable 1114 * mblk_t must have all mblk_t(s) DBLK_UIOA enabled. 1115 */ 1116 mblk_t *bpt = sodp->sod_uioaft; 1117 1118 ASSERT(sodp != NULL); 1119 1120 /* 1121 * Add first mblk_t of "bp" chain to current sodirect uioa 1122 * free list tail mblk_t, if any, else empty list so new head. 1123 */ 1124 if (bpt == NULL) 1125 sodp->sod_uioafh = bp; 1126 else 1127 bpt->b_cont = bp; 1128 1129 /* 1130 * Walk mblk_t "bp" chain to find tail and adjust rptr of 1131 * each to reflect that uioamove() has consumed all data. 1132 */ 1133 bpt = bp; 1134 for (;;) { 1135 bpt->b_rptr = bpt->b_wptr; 1136 if (bpt->b_cont == NULL) 1137 break; 1138 bpt = bpt->b_cont; 1139 1140 ASSERT(bpt->b_datap->db_flags & DBLK_UIOA); 1141 } 1142 /* New sodirect uioa free list tail */ 1143 sodp->sod_uioaft = bpt; 1144 1145 /* Only 1 strget() with data returned per uioa_t */ 1146 if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) { 1147 sodp->sod_uioa.uioa_state &= UIOA_CLR; 1148 sodp->sod_uioa.uioa_state |= UIOA_FINI; 1149 } 1150 } 1151 1152 return (bp); 1153 } 1154 1155 /* 1156 * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'. 1157 * If the message does not fit in the uio the remainder of it is returned; 1158 * otherwise NULL is returned. Any embedded zero-length mblk_t's are 1159 * consumed, even if uio_resid reaches zero. On error, `*errorp' is set to 1160 * the error code, the message is consumed, and NULL is returned. 1161 */ 1162 static mblk_t * 1163 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp) 1164 { 1165 int error; 1166 ptrdiff_t n; 1167 mblk_t *nbp; 1168 1169 ASSERT(bp->b_wptr >= bp->b_rptr); 1170 1171 do { 1172 ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); 1173 1174 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) { 1175 ASSERT(n > 0); 1176 1177 error = uiomove(bp->b_rptr, n, UIO_READ, uiop); 1178 if (error != 0) { 1179 freemsg(bp); 1180 *errorp = error; 1181 return (NULL); 1182 } 1183 } 1184 1185 bp->b_rptr += n; 1186 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) { 1187 nbp = bp; 1188 bp = bp->b_cont; 1189 freeb(nbp); 1190 } 1191 } while (bp != NULL && uiop->uio_resid > 0); 1192 1193 *errorp = 0; 1194 return (bp); 1195 } 1196 1197 /* 1198 * Read a stream according to the mode flags in sd_flag: 1199 * 1200 * (default mode) - Byte stream, msg boundaries are ignored 1201 * RD_MSGDIS (msg discard) - Read on msg boundaries and throw away 1202 * any data remaining in msg 1203 * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back 1204 * any remaining data on head of read queue 1205 * 1206 * Consume readable messages on the front of the queue until 1207 * ttolwp(curthread)->lwp_count 1208 * is satisfied, the readable messages are exhausted, or a message 1209 * boundary is reached in a message mode. If no data was read and 1210 * the stream was not opened with the NDELAY flag, block until data arrives. 1211 * Otherwise return the data read and update the count. 1212 * 1213 * In default mode a 0 length message signifies end-of-file and terminates 1214 * a read in progress. The 0 length message is removed from the queue 1215 * only if it is the only message read (no data is read). 1216 * 1217 * An attempt to read an M_PROTO or M_PCPROTO message results in an 1218 * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set. 1219 * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data. 1220 * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message 1221 * are unlinked from and M_DATA blocks in the message, the protos are 1222 * thrown away, and the data is read. 1223 */ 1224 /* ARGSUSED */ 1225 int 1226 strread(struct vnode *vp, struct uio *uiop, cred_t *crp) 1227 { 1228 struct stdata *stp; 1229 mblk_t *bp, *nbp; 1230 queue_t *q; 1231 int error = 0; 1232 uint_t old_sd_flag; 1233 int first; 1234 char rflg; 1235 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 1236 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 1237 short delim; 1238 unsigned char pri = 0; 1239 char waitflag; 1240 unsigned char type; 1241 1242 TRACE_1(TR_FAC_STREAMS_FR, 1243 TR_STRREAD_ENTER, "strread:%p", vp); 1244 ASSERT(vp->v_stream); 1245 stp = vp->v_stream; 1246 1247 mutex_enter(&stp->sd_lock); 1248 1249 if ((error = i_straccess(stp, JCREAD)) != 0) { 1250 mutex_exit(&stp->sd_lock); 1251 return (error); 1252 } 1253 1254 if (stp->sd_flag & (STRDERR|STPLEX)) { 1255 error = strgeterr(stp, STRDERR|STPLEX, 0); 1256 if (error != 0) { 1257 mutex_exit(&stp->sd_lock); 1258 return (error); 1259 } 1260 } 1261 1262 /* 1263 * Loop terminates when uiop->uio_resid == 0. 1264 */ 1265 rflg = 0; 1266 waitflag = READWAIT; 1267 q = _RD(stp->sd_wrq); 1268 for (;;) { 1269 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1270 old_sd_flag = stp->sd_flag; 1271 mark = 0; 1272 delim = 0; 1273 first = 1; 1274 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) { 1275 int done = 0; 1276 1277 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1278 1279 if (error != 0) 1280 goto oops; 1281 1282 if (stp->sd_flag & (STRHUP|STREOF)) { 1283 goto oops; 1284 } 1285 if (rflg && !(stp->sd_flag & STRDELIM)) { 1286 goto oops; 1287 } 1288 /* 1289 * If a read(fd,buf,0) has been done, there is no 1290 * need to sleep. We always have zero bytes to 1291 * return. 1292 */ 1293 if (uiop->uio_resid == 0) { 1294 goto oops; 1295 } 1296 1297 qbackenable(q, 0); 1298 1299 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT, 1300 "strread calls strwaitq:%p, %p, %p", 1301 vp, uiop, crp); 1302 if ((error = strwaitq(stp, waitflag, uiop->uio_resid, 1303 uiop->uio_fmode, -1, &done)) != 0 || done) { 1304 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE, 1305 "strread error or done:%p, %p, %p", 1306 vp, uiop, crp); 1307 if ((uiop->uio_fmode & FNDELAY) && 1308 (stp->sd_flag & OLDNDELAY) && 1309 (error == EAGAIN)) 1310 error = 0; 1311 goto oops; 1312 } 1313 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE, 1314 "strread awakes:%p, %p, %p", vp, uiop, crp); 1315 if ((error = i_straccess(stp, JCREAD)) != 0) { 1316 goto oops; 1317 } 1318 first = 0; 1319 } 1320 1321 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1322 ASSERT(bp); 1323 ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); 1324 pri = bp->b_band; 1325 /* 1326 * Extract any mark information. If the message is not 1327 * completely consumed this information will be put in the mblk 1328 * that is putback. 1329 * If MSGMARKNEXT is set and the message is completely consumed 1330 * the STRATMARK flag will be set below. Likewise, if 1331 * MSGNOTMARKNEXT is set and the message is 1332 * completely consumed STRNOTATMARK will be set. 1333 * 1334 * For some unknown reason strread only breaks the read at the 1335 * last mark. 1336 */ 1337 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 1338 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 1339 (MSGMARKNEXT|MSGNOTMARKNEXT)); 1340 if (mark != 0 && bp == stp->sd_mark) { 1341 if (rflg) { 1342 putback(stp, q, bp, pri); 1343 goto oops; 1344 } 1345 mark |= _LASTMARK; 1346 stp->sd_mark = NULL; 1347 } 1348 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM)) 1349 delim = 1; 1350 mutex_exit(&stp->sd_lock); 1351 1352 if (STREAM_NEEDSERVICE(stp)) 1353 stream_runservice(stp); 1354 1355 type = bp->b_datap->db_type; 1356 1357 switch (type) { 1358 1359 case M_DATA: 1360 ismdata: 1361 if (msgnodata(bp)) { 1362 if (mark || delim) { 1363 freemsg(bp); 1364 } else if (rflg) { 1365 1366 /* 1367 * If already read data put zero 1368 * length message back on queue else 1369 * free msg and return 0. 1370 */ 1371 bp->b_band = pri; 1372 mutex_enter(&stp->sd_lock); 1373 putback(stp, q, bp, pri); 1374 mutex_exit(&stp->sd_lock); 1375 } else { 1376 freemsg(bp); 1377 } 1378 error = 0; 1379 goto oops1; 1380 } 1381 1382 rflg = 1; 1383 waitflag |= NOINTR; 1384 bp = struiocopyout(bp, uiop, &error); 1385 if (error != 0) 1386 goto oops1; 1387 1388 mutex_enter(&stp->sd_lock); 1389 if (bp) { 1390 /* 1391 * Have remaining data in message. 1392 * Free msg if in discard mode. 1393 */ 1394 if (stp->sd_read_opt & RD_MSGDIS) { 1395 freemsg(bp); 1396 } else { 1397 bp->b_band = pri; 1398 if ((mark & _LASTMARK) && 1399 (stp->sd_mark == NULL)) 1400 stp->sd_mark = bp; 1401 bp->b_flag |= mark & ~_LASTMARK; 1402 if (delim) 1403 bp->b_flag |= MSGDELIM; 1404 if (msgnodata(bp)) 1405 freemsg(bp); 1406 else 1407 putback(stp, q, bp, pri); 1408 } 1409 } else { 1410 /* 1411 * Consumed the complete message. 1412 * Move the MSG*MARKNEXT information 1413 * to the stream head just in case 1414 * the read queue becomes empty. 1415 * 1416 * If the stream head was at the mark 1417 * (STRATMARK) before we dropped sd_lock above 1418 * and some data was consumed then we have 1419 * moved past the mark thus STRATMARK is 1420 * cleared. However, if a message arrived in 1421 * strrput during the copyout above causing 1422 * STRATMARK to be set we can not clear that 1423 * flag. 1424 */ 1425 if (mark & 1426 (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 1427 if (mark & MSGMARKNEXT) { 1428 stp->sd_flag &= ~STRNOTATMARK; 1429 stp->sd_flag |= STRATMARK; 1430 } else if (mark & MSGNOTMARKNEXT) { 1431 stp->sd_flag &= ~STRATMARK; 1432 stp->sd_flag |= STRNOTATMARK; 1433 } else { 1434 stp->sd_flag &= 1435 ~(STRATMARK|STRNOTATMARK); 1436 } 1437 } else if (rflg && (old_sd_flag & STRATMARK)) { 1438 stp->sd_flag &= ~STRATMARK; 1439 } 1440 } 1441 1442 /* 1443 * Check for signal messages at the front of the read 1444 * queue and generate the signal(s) if appropriate. 1445 * The only signal that can be on queue is M_SIG at 1446 * this point. 1447 */ 1448 while ((((bp = q->q_first)) != NULL) && 1449 (bp->b_datap->db_type == M_SIG)) { 1450 bp = getq_noenab(q, 0); 1451 /* 1452 * sd_lock is held so the content of the 1453 * read queue can not change. 1454 */ 1455 ASSERT(bp != NULL && 1456 bp->b_datap->db_type == M_SIG); 1457 strsignal_nolock(stp, *bp->b_rptr, 1458 (int32_t)bp->b_band); 1459 mutex_exit(&stp->sd_lock); 1460 freemsg(bp); 1461 if (STREAM_NEEDSERVICE(stp)) 1462 stream_runservice(stp); 1463 mutex_enter(&stp->sd_lock); 1464 } 1465 1466 if ((uiop->uio_resid == 0) || (mark & _LASTMARK) || 1467 delim || 1468 (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) { 1469 goto oops; 1470 } 1471 continue; 1472 1473 case M_SIG: 1474 strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band); 1475 freemsg(bp); 1476 mutex_enter(&stp->sd_lock); 1477 continue; 1478 1479 case M_PROTO: 1480 case M_PCPROTO: 1481 /* 1482 * Only data messages are readable. 1483 * Any others generate an error, unless 1484 * RD_PROTDIS or RD_PROTDAT is set. 1485 */ 1486 if (stp->sd_read_opt & RD_PROTDAT) { 1487 for (nbp = bp; nbp; nbp = nbp->b_next) { 1488 if ((nbp->b_datap->db_type == 1489 M_PROTO) || 1490 (nbp->b_datap->db_type == 1491 M_PCPROTO)) { 1492 nbp->b_datap->db_type = M_DATA; 1493 } else { 1494 break; 1495 } 1496 } 1497 /* 1498 * clear stream head hi pri flag based on 1499 * first message 1500 */ 1501 if (type == M_PCPROTO) { 1502 mutex_enter(&stp->sd_lock); 1503 stp->sd_flag &= ~STRPRI; 1504 mutex_exit(&stp->sd_lock); 1505 } 1506 goto ismdata; 1507 } else if (stp->sd_read_opt & RD_PROTDIS) { 1508 /* 1509 * discard non-data messages 1510 */ 1511 while (bp && 1512 ((bp->b_datap->db_type == M_PROTO) || 1513 (bp->b_datap->db_type == M_PCPROTO))) { 1514 nbp = unlinkb(bp); 1515 freeb(bp); 1516 bp = nbp; 1517 } 1518 /* 1519 * clear stream head hi pri flag based on 1520 * first message 1521 */ 1522 if (type == M_PCPROTO) { 1523 mutex_enter(&stp->sd_lock); 1524 stp->sd_flag &= ~STRPRI; 1525 mutex_exit(&stp->sd_lock); 1526 } 1527 if (bp) { 1528 bp->b_band = pri; 1529 goto ismdata; 1530 } else { 1531 break; 1532 } 1533 } 1534 /* FALLTHRU */ 1535 case M_PASSFP: 1536 if ((bp->b_datap->db_type == M_PASSFP) && 1537 (stp->sd_read_opt & RD_PROTDIS)) { 1538 freemsg(bp); 1539 break; 1540 } 1541 mutex_enter(&stp->sd_lock); 1542 putback(stp, q, bp, pri); 1543 mutex_exit(&stp->sd_lock); 1544 if (rflg == 0) 1545 error = EBADMSG; 1546 goto oops1; 1547 1548 default: 1549 /* 1550 * Garbage on stream head read queue. 1551 */ 1552 cmn_err(CE_WARN, "bad %x found at stream head\n", 1553 bp->b_datap->db_type); 1554 freemsg(bp); 1555 goto oops1; 1556 } 1557 mutex_enter(&stp->sd_lock); 1558 } 1559 oops: 1560 mutex_exit(&stp->sd_lock); 1561 oops1: 1562 qbackenable(q, pri); 1563 return (error); 1564 #undef _LASTMARK 1565 } 1566 1567 /* 1568 * Default processing of M_PROTO/M_PCPROTO messages. 1569 * Determine which wakeups and signals are needed. 1570 * This can be replaced by a user-specified procedure for kernel users 1571 * of STREAMS. 1572 */ 1573 /* ARGSUSED */ 1574 mblk_t * 1575 strrput_proto(vnode_t *vp, mblk_t *mp, 1576 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1577 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1578 { 1579 *wakeups = RSLEEP; 1580 *allmsgsigs = 0; 1581 1582 switch (mp->b_datap->db_type) { 1583 case M_PROTO: 1584 if (mp->b_band == 0) { 1585 *firstmsgsigs = S_INPUT | S_RDNORM; 1586 *pollwakeups = POLLIN | POLLRDNORM; 1587 } else { 1588 *firstmsgsigs = S_INPUT | S_RDBAND; 1589 *pollwakeups = POLLIN | POLLRDBAND; 1590 } 1591 break; 1592 case M_PCPROTO: 1593 *firstmsgsigs = S_HIPRI; 1594 *pollwakeups = POLLPRI; 1595 break; 1596 } 1597 return (mp); 1598 } 1599 1600 /* 1601 * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and 1602 * M_PASSFP messages. 1603 * Determine which wakeups and signals are needed. 1604 * This can be replaced by a user-specified procedure for kernel users 1605 * of STREAMS. 1606 */ 1607 /* ARGSUSED */ 1608 mblk_t * 1609 strrput_misc(vnode_t *vp, mblk_t *mp, 1610 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1611 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1612 { 1613 *wakeups = 0; 1614 *firstmsgsigs = 0; 1615 *allmsgsigs = 0; 1616 *pollwakeups = 0; 1617 return (mp); 1618 } 1619 1620 /* 1621 * Stream read put procedure. Called from downstream driver/module 1622 * with messages for the stream head. Data, protocol, and in-stream 1623 * signal messages are placed on the queue, others are handled directly. 1624 */ 1625 int 1626 strrput(queue_t *q, mblk_t *bp) 1627 { 1628 struct stdata *stp; 1629 ulong_t rput_opt; 1630 strwakeup_t wakeups; 1631 strsigset_t firstmsgsigs; /* Signals if first message on queue */ 1632 strsigset_t allmsgsigs; /* Signals for all messages */ 1633 strsigset_t signals; /* Signals events to generate */ 1634 strpollset_t pollwakeups; 1635 mblk_t *nextbp; 1636 uchar_t band = 0; 1637 int hipri_sig; 1638 1639 stp = (struct stdata *)q->q_ptr; 1640 /* 1641 * Use rput_opt for optimized access to the SR_ flags except 1642 * SR_POLLIN. That flag has to be checked under sd_lock since it 1643 * is modified by strpoll(). 1644 */ 1645 rput_opt = stp->sd_rput_opt; 1646 1647 ASSERT(qclaimed(q)); 1648 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER, 1649 "strrput called with message type:q %p bp %p", q, bp); 1650 1651 /* 1652 * Perform initial processing and pass to the parameterized functions. 1653 */ 1654 ASSERT(bp->b_next == NULL); 1655 1656 switch (bp->b_datap->db_type) { 1657 case M_DATA: 1658 /* 1659 * sockfs is the only consumer of STREOF and when it is set, 1660 * it implies that the receiver is not interested in receiving 1661 * any more data, hence the mblk is freed to prevent unnecessary 1662 * message queueing at the stream head. 1663 */ 1664 if (stp->sd_flag == STREOF) { 1665 freemsg(bp); 1666 return (0); 1667 } 1668 if ((rput_opt & SR_IGN_ZEROLEN) && 1669 bp->b_rptr == bp->b_wptr && msgnodata(bp)) { 1670 /* 1671 * Ignore zero-length M_DATA messages. These might be 1672 * generated by some transports. 1673 * The zero-length M_DATA messages, even if they 1674 * are ignored, should effect the atmark tracking and 1675 * should wake up a thread sleeping in strwaitmark. 1676 */ 1677 mutex_enter(&stp->sd_lock); 1678 if (bp->b_flag & MSGMARKNEXT) { 1679 /* 1680 * Record the position of the mark either 1681 * in q_last or in STRATMARK. 1682 */ 1683 if (q->q_last != NULL) { 1684 q->q_last->b_flag &= ~MSGNOTMARKNEXT; 1685 q->q_last->b_flag |= MSGMARKNEXT; 1686 } else { 1687 stp->sd_flag &= ~STRNOTATMARK; 1688 stp->sd_flag |= STRATMARK; 1689 } 1690 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1691 /* 1692 * Record that this is not the position of 1693 * the mark either in q_last or in 1694 * STRNOTATMARK. 1695 */ 1696 if (q->q_last != NULL) { 1697 q->q_last->b_flag &= ~MSGMARKNEXT; 1698 q->q_last->b_flag |= MSGNOTMARKNEXT; 1699 } else { 1700 stp->sd_flag &= ~STRATMARK; 1701 stp->sd_flag |= STRNOTATMARK; 1702 } 1703 } 1704 if (stp->sd_flag & RSLEEP) { 1705 stp->sd_flag &= ~RSLEEP; 1706 cv_broadcast(&q->q_wait); 1707 } 1708 mutex_exit(&stp->sd_lock); 1709 freemsg(bp); 1710 return (0); 1711 } 1712 wakeups = RSLEEP; 1713 if (bp->b_band == 0) { 1714 firstmsgsigs = S_INPUT | S_RDNORM; 1715 pollwakeups = POLLIN | POLLRDNORM; 1716 } else { 1717 firstmsgsigs = S_INPUT | S_RDBAND; 1718 pollwakeups = POLLIN | POLLRDBAND; 1719 } 1720 if (rput_opt & SR_SIGALLDATA) 1721 allmsgsigs = firstmsgsigs; 1722 else 1723 allmsgsigs = 0; 1724 1725 mutex_enter(&stp->sd_lock); 1726 if ((rput_opt & SR_CONSOL_DATA) && 1727 (q->q_last != NULL) && 1728 (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) { 1729 /* 1730 * Consolidate an M_DATA message onto an M_DATA, 1731 * M_PROTO, or M_PCPROTO by merging it with q_last. 1732 * The consolidation does not take place if 1733 * the old message is marked with either of the 1734 * marks or the delim flag or if the new 1735 * message is marked with MSGMARK. The MSGMARK 1736 * check is needed to handle the odd semantics of 1737 * MSGMARK where essentially the whole message 1738 * is to be treated as marked. 1739 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from the 1740 * new message to the front of the b_cont chain. 1741 */ 1742 mblk_t *lbp = q->q_last; 1743 unsigned char db_type = lbp->b_datap->db_type; 1744 1745 if ((db_type == M_DATA || db_type == M_PROTO || 1746 db_type == M_PCPROTO) && 1747 !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) { 1748 rmvq_noenab(q, lbp); 1749 /* 1750 * The first message in the b_cont list 1751 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 1752 * We need to handle the case where we 1753 * are appending: 1754 * 1755 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 1756 * 2) a MSGMARKNEXT to a plain message. 1757 * 3) a MSGNOTMARKNEXT to a plain message 1758 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 1759 * message. 1760 * 1761 * Thus we never append a MSGMARKNEXT or 1762 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 1763 */ 1764 if (bp->b_flag & MSGMARKNEXT) { 1765 lbp->b_flag |= MSGMARKNEXT; 1766 lbp->b_flag &= ~MSGNOTMARKNEXT; 1767 bp->b_flag &= ~MSGMARKNEXT; 1768 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1769 lbp->b_flag |= MSGNOTMARKNEXT; 1770 bp->b_flag &= ~MSGNOTMARKNEXT; 1771 } 1772 1773 linkb(lbp, bp); 1774 bp = lbp; 1775 /* 1776 * The new message logically isn't the first 1777 * even though the q_first check below thinks 1778 * it is. Clear the firstmsgsigs to make it 1779 * not appear to be first. 1780 */ 1781 firstmsgsigs = 0; 1782 } 1783 } 1784 break; 1785 1786 case M_PASSFP: 1787 wakeups = RSLEEP; 1788 allmsgsigs = 0; 1789 if (bp->b_band == 0) { 1790 firstmsgsigs = S_INPUT | S_RDNORM; 1791 pollwakeups = POLLIN | POLLRDNORM; 1792 } else { 1793 firstmsgsigs = S_INPUT | S_RDBAND; 1794 pollwakeups = POLLIN | POLLRDBAND; 1795 } 1796 mutex_enter(&stp->sd_lock); 1797 break; 1798 1799 case M_PROTO: 1800 case M_PCPROTO: 1801 ASSERT(stp->sd_rprotofunc != NULL); 1802 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp, 1803 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1804 #define ALLSIG (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\ 1805 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG) 1806 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\ 1807 POLLWRBAND) 1808 1809 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1810 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1811 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1812 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1813 1814 mutex_enter(&stp->sd_lock); 1815 break; 1816 1817 default: 1818 ASSERT(stp->sd_rmiscfunc != NULL); 1819 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp, 1820 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1821 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1822 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1823 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1824 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1825 #undef ALLSIG 1826 #undef ALLPOLL 1827 mutex_enter(&stp->sd_lock); 1828 break; 1829 } 1830 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1831 1832 /* By default generate superset of signals */ 1833 signals = (firstmsgsigs | allmsgsigs); 1834 1835 /* 1836 * The proto and misc functions can return multiple messages 1837 * as a b_next chain. Such messages are processed separately. 1838 */ 1839 one_more: 1840 hipri_sig = 0; 1841 if (bp == NULL) { 1842 nextbp = NULL; 1843 } else { 1844 nextbp = bp->b_next; 1845 bp->b_next = NULL; 1846 1847 switch (bp->b_datap->db_type) { 1848 case M_PCPROTO: 1849 /* 1850 * Only one priority protocol message is allowed at the 1851 * stream head at a time. 1852 */ 1853 if (stp->sd_flag & STRPRI) { 1854 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR, 1855 "M_PCPROTO already at head"); 1856 freemsg(bp); 1857 mutex_exit(&stp->sd_lock); 1858 goto done; 1859 } 1860 stp->sd_flag |= STRPRI; 1861 hipri_sig = 1; 1862 /* FALLTHRU */ 1863 case M_DATA: 1864 case M_PROTO: 1865 case M_PASSFP: 1866 band = bp->b_band; 1867 /* 1868 * Marking doesn't work well when messages 1869 * are marked in more than one band. We only 1870 * remember the last message received, even if 1871 * it is placed on the queue ahead of other 1872 * marked messages. 1873 */ 1874 if (bp->b_flag & MSGMARK) 1875 stp->sd_mark = bp; 1876 (void) putq(q, bp); 1877 1878 /* 1879 * If message is a PCPROTO message, always use 1880 * firstmsgsigs to determine if a signal should be 1881 * sent as strrput is the only place to send 1882 * signals for PCPROTO. Other messages are based on 1883 * the STRGETINPROG flag. The flag determines if 1884 * strrput or (k)strgetmsg will be responsible for 1885 * sending the signals, in the firstmsgsigs case. 1886 */ 1887 if ((hipri_sig == 1) || 1888 (((stp->sd_flag & STRGETINPROG) == 0) && 1889 (q->q_first == bp))) 1890 signals = (firstmsgsigs | allmsgsigs); 1891 else 1892 signals = allmsgsigs; 1893 break; 1894 1895 default: 1896 mutex_exit(&stp->sd_lock); 1897 (void) strrput_nondata(q, bp); 1898 mutex_enter(&stp->sd_lock); 1899 break; 1900 } 1901 } 1902 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1903 /* 1904 * Wake sleeping read/getmsg and cancel deferred wakeup 1905 */ 1906 if (wakeups & RSLEEP) 1907 stp->sd_wakeq &= ~RSLEEP; 1908 1909 wakeups &= stp->sd_flag; 1910 if (wakeups & RSLEEP) { 1911 stp->sd_flag &= ~RSLEEP; 1912 cv_broadcast(&q->q_wait); 1913 } 1914 if (wakeups & WSLEEP) { 1915 stp->sd_flag &= ~WSLEEP; 1916 cv_broadcast(&_WR(q)->q_wait); 1917 } 1918 1919 if (pollwakeups != 0) { 1920 if (pollwakeups == (POLLIN | POLLRDNORM)) { 1921 /* 1922 * Can't use rput_opt since it was not 1923 * read when sd_lock was held and SR_POLLIN is changed 1924 * by strpoll() under sd_lock. 1925 */ 1926 if (!(stp->sd_rput_opt & SR_POLLIN)) 1927 goto no_pollwake; 1928 stp->sd_rput_opt &= ~SR_POLLIN; 1929 } 1930 mutex_exit(&stp->sd_lock); 1931 pollwakeup(&stp->sd_pollist, pollwakeups); 1932 mutex_enter(&stp->sd_lock); 1933 } 1934 no_pollwake: 1935 1936 /* 1937 * strsendsig can handle multiple signals with a 1938 * single call. 1939 */ 1940 if (stp->sd_sigflags & signals) 1941 strsendsig(stp->sd_siglist, signals, band, 0); 1942 mutex_exit(&stp->sd_lock); 1943 1944 1945 done: 1946 if (nextbp == NULL) 1947 return (0); 1948 1949 /* 1950 * Any signals were handled the first time. 1951 * Wakeups and pollwakeups are redone to avoid any race 1952 * conditions - all the messages are not queued until the 1953 * last message has been processed by strrput. 1954 */ 1955 bp = nextbp; 1956 signals = firstmsgsigs = allmsgsigs = 0; 1957 mutex_enter(&stp->sd_lock); 1958 goto one_more; 1959 } 1960 1961 static void 1962 log_dupioc(queue_t *rq, mblk_t *bp) 1963 { 1964 queue_t *wq, *qp; 1965 char *modnames, *mnp, *dname; 1966 size_t maxmodstr; 1967 boolean_t islast; 1968 1969 /* 1970 * Allocate a buffer large enough to hold the names of nstrpush modules 1971 * and one driver, with spaces between and NUL terminator. If we can't 1972 * get memory, then we'll just log the driver name. 1973 */ 1974 maxmodstr = nstrpush * (FMNAMESZ + 1); 1975 mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP); 1976 1977 /* march down write side to print log message down to the driver */ 1978 wq = WR(rq); 1979 1980 /* make sure q_next doesn't shift around while we're grabbing data */ 1981 claimstr(wq); 1982 qp = wq->q_next; 1983 do { 1984 if ((dname = qp->q_qinfo->qi_minfo->mi_idname) == NULL) 1985 dname = "?"; 1986 islast = !SAMESTR(qp) || qp->q_next == NULL; 1987 if (modnames == NULL) { 1988 /* 1989 * If we don't have memory, then get the driver name in 1990 * the log where we can see it. Note that memory 1991 * pressure is a possible cause of these sorts of bugs. 1992 */ 1993 if (islast) { 1994 modnames = dname; 1995 maxmodstr = 0; 1996 } 1997 } else { 1998 mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname); 1999 if (!islast) 2000 *mnp++ = ' '; 2001 } 2002 qp = qp->q_next; 2003 } while (!islast); 2004 releasestr(wq); 2005 /* Cannot happen unless stream head is corrupt. */ 2006 ASSERT(modnames != NULL); 2007 (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1, 2008 SL_CONSOLE|SL_TRACE|SL_ERROR, 2009 "Warning: stream %p received duplicate %X M_IOC%s; module list: %s", 2010 rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd, 2011 (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames); 2012 if (maxmodstr != 0) 2013 kmem_free(modnames, maxmodstr); 2014 } 2015 2016 int 2017 strrput_nondata(queue_t *q, mblk_t *bp) 2018 { 2019 struct stdata *stp; 2020 struct iocblk *iocbp; 2021 struct stroptions *sop; 2022 struct copyreq *reqp; 2023 struct copyresp *resp; 2024 unsigned char bpri; 2025 unsigned char flushed_already = 0; 2026 2027 stp = (struct stdata *)q->q_ptr; 2028 2029 ASSERT(!(stp->sd_flag & STPLEX)); 2030 ASSERT(qclaimed(q)); 2031 2032 switch (bp->b_datap->db_type) { 2033 case M_ERROR: 2034 /* 2035 * An error has occurred downstream, the errno is in the first 2036 * bytes of the message. 2037 */ 2038 if ((bp->b_wptr - bp->b_rptr) == 2) { /* New flavor */ 2039 unsigned char rw = 0; 2040 2041 mutex_enter(&stp->sd_lock); 2042 if (*bp->b_rptr != NOERROR) { /* read error */ 2043 if (*bp->b_rptr != 0) { 2044 if (stp->sd_flag & STRDERR) 2045 flushed_already |= FLUSHR; 2046 stp->sd_flag |= STRDERR; 2047 rw |= FLUSHR; 2048 } else { 2049 stp->sd_flag &= ~STRDERR; 2050 } 2051 stp->sd_rerror = *bp->b_rptr; 2052 } 2053 bp->b_rptr++; 2054 if (*bp->b_rptr != NOERROR) { /* write error */ 2055 if (*bp->b_rptr != 0) { 2056 if (stp->sd_flag & STWRERR) 2057 flushed_already |= FLUSHW; 2058 stp->sd_flag |= STWRERR; 2059 rw |= FLUSHW; 2060 } else { 2061 stp->sd_flag &= ~STWRERR; 2062 } 2063 stp->sd_werror = *bp->b_rptr; 2064 } 2065 if (rw) { 2066 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE, 2067 "strrput cv_broadcast:q %p, bp %p", 2068 q, bp); 2069 cv_broadcast(&q->q_wait); /* readers */ 2070 cv_broadcast(&_WR(q)->q_wait); /* writers */ 2071 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 2072 2073 mutex_exit(&stp->sd_lock); 2074 pollwakeup(&stp->sd_pollist, POLLERR); 2075 mutex_enter(&stp->sd_lock); 2076 2077 if (stp->sd_sigflags & S_ERROR) 2078 strsendsig(stp->sd_siglist, S_ERROR, 0, 2079 ((rw & FLUSHR) ? stp->sd_rerror : 2080 stp->sd_werror)); 2081 mutex_exit(&stp->sd_lock); 2082 /* 2083 * Send the M_FLUSH only 2084 * for the first M_ERROR 2085 * message on the stream 2086 */ 2087 if (flushed_already == rw) { 2088 freemsg(bp); 2089 return (0); 2090 } 2091 2092 bp->b_datap->db_type = M_FLUSH; 2093 *bp->b_rptr = rw; 2094 bp->b_wptr = bp->b_rptr + 1; 2095 /* 2096 * Protect against the driver 2097 * passing up messages after 2098 * it has done a qprocsoff 2099 */ 2100 if (_OTHERQ(q)->q_next == NULL) 2101 freemsg(bp); 2102 else 2103 qreply(q, bp); 2104 return (0); 2105 } else 2106 mutex_exit(&stp->sd_lock); 2107 } else if (*bp->b_rptr != 0) { /* Old flavor */ 2108 if (stp->sd_flag & (STRDERR|STWRERR)) 2109 flushed_already = FLUSHRW; 2110 mutex_enter(&stp->sd_lock); 2111 stp->sd_flag |= (STRDERR|STWRERR); 2112 stp->sd_rerror = *bp->b_rptr; 2113 stp->sd_werror = *bp->b_rptr; 2114 TRACE_2(TR_FAC_STREAMS_FR, 2115 TR_STRRPUT_WAKE2, 2116 "strrput wakeup #2:q %p, bp %p", q, bp); 2117 cv_broadcast(&q->q_wait); /* the readers */ 2118 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2119 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 2120 2121 mutex_exit(&stp->sd_lock); 2122 pollwakeup(&stp->sd_pollist, POLLERR); 2123 mutex_enter(&stp->sd_lock); 2124 2125 if (stp->sd_sigflags & S_ERROR) 2126 strsendsig(stp->sd_siglist, S_ERROR, 0, 2127 (stp->sd_werror ? stp->sd_werror : 2128 stp->sd_rerror)); 2129 mutex_exit(&stp->sd_lock); 2130 2131 /* 2132 * Send the M_FLUSH only 2133 * for the first M_ERROR 2134 * message on the stream 2135 */ 2136 if (flushed_already != FLUSHRW) { 2137 bp->b_datap->db_type = M_FLUSH; 2138 *bp->b_rptr = FLUSHRW; 2139 /* 2140 * Protect against the driver passing up 2141 * messages after it has done a 2142 * qprocsoff. 2143 */ 2144 if (_OTHERQ(q)->q_next == NULL) 2145 freemsg(bp); 2146 else 2147 qreply(q, bp); 2148 return (0); 2149 } 2150 } 2151 freemsg(bp); 2152 return (0); 2153 2154 case M_HANGUP: 2155 2156 freemsg(bp); 2157 mutex_enter(&stp->sd_lock); 2158 stp->sd_werror = ENXIO; 2159 stp->sd_flag |= STRHUP; 2160 stp->sd_flag &= ~(WSLEEP|RSLEEP); 2161 2162 /* 2163 * send signal if controlling tty 2164 */ 2165 2166 if (stp->sd_sidp) { 2167 prsignal(stp->sd_sidp, SIGHUP); 2168 if (stp->sd_sidp != stp->sd_pgidp) 2169 pgsignal(stp->sd_pgidp, SIGTSTP); 2170 } 2171 2172 /* 2173 * wake up read, write, and exception pollers and 2174 * reset wakeup mechanism. 2175 */ 2176 cv_broadcast(&q->q_wait); /* the readers */ 2177 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2178 cv_broadcast(&stp->sd_monitor); /* the ioctllers */ 2179 strhup(stp); 2180 mutex_exit(&stp->sd_lock); 2181 return (0); 2182 2183 case M_UNHANGUP: 2184 freemsg(bp); 2185 mutex_enter(&stp->sd_lock); 2186 stp->sd_werror = 0; 2187 stp->sd_flag &= ~STRHUP; 2188 mutex_exit(&stp->sd_lock); 2189 return (0); 2190 2191 case M_SIG: 2192 /* 2193 * Someone downstream wants to post a signal. The 2194 * signal to post is contained in the first byte of the 2195 * message. If the message would go on the front of 2196 * the queue, send a signal to the process group 2197 * (if not SIGPOLL) or to the siglist processes 2198 * (SIGPOLL). If something is already on the queue, 2199 * OR if we are delivering a delayed suspend (*sigh* 2200 * another "tty" hack) and there's no one sleeping already, 2201 * just enqueue the message. 2202 */ 2203 mutex_enter(&stp->sd_lock); 2204 if (q->q_first || (*bp->b_rptr == SIGTSTP && 2205 !(stp->sd_flag & RSLEEP))) { 2206 (void) putq(q, bp); 2207 mutex_exit(&stp->sd_lock); 2208 return (0); 2209 } 2210 mutex_exit(&stp->sd_lock); 2211 /* FALLTHRU */ 2212 2213 case M_PCSIG: 2214 /* 2215 * Don't enqueue, just post the signal. 2216 */ 2217 strsignal(stp, *bp->b_rptr, 0L); 2218 freemsg(bp); 2219 return (0); 2220 2221 case M_CMD: 2222 if (MBLKL(bp) != sizeof (cmdblk_t)) { 2223 freemsg(bp); 2224 return (0); 2225 } 2226 2227 mutex_enter(&stp->sd_lock); 2228 if (stp->sd_flag & STRCMDWAIT) { 2229 ASSERT(stp->sd_cmdblk == NULL); 2230 stp->sd_cmdblk = bp; 2231 cv_broadcast(&stp->sd_monitor); 2232 mutex_exit(&stp->sd_lock); 2233 } else { 2234 mutex_exit(&stp->sd_lock); 2235 freemsg(bp); 2236 } 2237 return (0); 2238 2239 case M_FLUSH: 2240 /* 2241 * Flush queues. The indication of which queues to flush 2242 * is in the first byte of the message. If the read queue 2243 * is specified, then flush it. If FLUSHBAND is set, just 2244 * flush the band specified by the second byte of the message. 2245 * 2246 * If a module has issued a M_SETOPT to not flush hi 2247 * priority messages off of the stream head, then pass this 2248 * flag into the flushq code to preserve such messages. 2249 */ 2250 2251 if (*bp->b_rptr & FLUSHR) { 2252 mutex_enter(&stp->sd_lock); 2253 if (*bp->b_rptr & FLUSHBAND) { 2254 ASSERT((bp->b_wptr - bp->b_rptr) >= 2); 2255 flushband(q, *(bp->b_rptr + 1), FLUSHALL); 2256 } else 2257 flushq_common(q, FLUSHALL, 2258 stp->sd_read_opt & RFLUSHPCPROT); 2259 if ((q->q_first == NULL) || 2260 (q->q_first->b_datap->db_type < QPCTL)) 2261 stp->sd_flag &= ~STRPRI; 2262 else { 2263 ASSERT(stp->sd_flag & STRPRI); 2264 } 2265 mutex_exit(&stp->sd_lock); 2266 } 2267 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 2268 *bp->b_rptr &= ~FLUSHR; 2269 bp->b_flag |= MSGNOLOOP; 2270 /* 2271 * Protect against the driver passing up 2272 * messages after it has done a qprocsoff. 2273 */ 2274 if (_OTHERQ(q)->q_next == NULL) 2275 freemsg(bp); 2276 else 2277 qreply(q, bp); 2278 return (0); 2279 } 2280 freemsg(bp); 2281 return (0); 2282 2283 case M_IOCACK: 2284 case M_IOCNAK: 2285 iocbp = (struct iocblk *)bp->b_rptr; 2286 /* 2287 * If not waiting for ACK or NAK then just free msg. 2288 * If incorrect id sequence number then just free msg. 2289 * If already have ACK or NAK for user then this is a 2290 * duplicate, display a warning and free the msg. 2291 */ 2292 mutex_enter(&stp->sd_lock); 2293 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2294 (stp->sd_iocid != iocbp->ioc_id)) { 2295 /* 2296 * If the ACK/NAK is a dup, display a message 2297 * Dup is when sd_iocid == ioc_id, and 2298 * sd_iocblk == <valid ptr> or -1 (the former 2299 * is when an ioctl has been put on the stream 2300 * head, but has not yet been consumed, the 2301 * later is when it has been consumed). 2302 */ 2303 if ((stp->sd_iocid == iocbp->ioc_id) && 2304 (stp->sd_iocblk != NULL)) { 2305 log_dupioc(q, bp); 2306 } 2307 freemsg(bp); 2308 mutex_exit(&stp->sd_lock); 2309 return (0); 2310 } 2311 2312 /* 2313 * Assign ACK or NAK to user and wake up. 2314 */ 2315 stp->sd_iocblk = bp; 2316 cv_broadcast(&stp->sd_monitor); 2317 mutex_exit(&stp->sd_lock); 2318 return (0); 2319 2320 case M_COPYIN: 2321 case M_COPYOUT: 2322 reqp = (struct copyreq *)bp->b_rptr; 2323 2324 /* 2325 * If not waiting for ACK or NAK then just fail request. 2326 * If already have ACK, NAK, or copy request, then just 2327 * fail request. 2328 * If incorrect id sequence number then just fail request. 2329 */ 2330 mutex_enter(&stp->sd_lock); 2331 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2332 (stp->sd_iocid != reqp->cq_id)) { 2333 if (bp->b_cont) { 2334 freemsg(bp->b_cont); 2335 bp->b_cont = NULL; 2336 } 2337 bp->b_datap->db_type = M_IOCDATA; 2338 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 2339 resp = (struct copyresp *)bp->b_rptr; 2340 resp->cp_rval = (caddr_t)1; /* failure */ 2341 mutex_exit(&stp->sd_lock); 2342 putnext(stp->sd_wrq, bp); 2343 return (0); 2344 } 2345 2346 /* 2347 * Assign copy request to user and wake up. 2348 */ 2349 stp->sd_iocblk = bp; 2350 cv_broadcast(&stp->sd_monitor); 2351 mutex_exit(&stp->sd_lock); 2352 return (0); 2353 2354 case M_SETOPTS: 2355 /* 2356 * Set stream head options (read option, write offset, 2357 * min/max packet size, and/or high/low water marks for 2358 * the read side only). 2359 */ 2360 2361 bpri = 0; 2362 sop = (struct stroptions *)bp->b_rptr; 2363 mutex_enter(&stp->sd_lock); 2364 if (sop->so_flags & SO_READOPT) { 2365 switch (sop->so_readopt & RMODEMASK) { 2366 case RNORM: 2367 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 2368 break; 2369 2370 case RMSGD: 2371 stp->sd_read_opt = 2372 ((stp->sd_read_opt & ~RD_MSGNODIS) | 2373 RD_MSGDIS); 2374 break; 2375 2376 case RMSGN: 2377 stp->sd_read_opt = 2378 ((stp->sd_read_opt & ~RD_MSGDIS) | 2379 RD_MSGNODIS); 2380 break; 2381 } 2382 switch (sop->so_readopt & RPROTMASK) { 2383 case RPROTNORM: 2384 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 2385 break; 2386 2387 case RPROTDAT: 2388 stp->sd_read_opt = 2389 ((stp->sd_read_opt & ~RD_PROTDIS) | 2390 RD_PROTDAT); 2391 break; 2392 2393 case RPROTDIS: 2394 stp->sd_read_opt = 2395 ((stp->sd_read_opt & ~RD_PROTDAT) | 2396 RD_PROTDIS); 2397 break; 2398 } 2399 switch (sop->so_readopt & RFLUSHMASK) { 2400 case RFLUSHPCPROT: 2401 /* 2402 * This sets the stream head to NOT flush 2403 * M_PCPROTO messages. 2404 */ 2405 stp->sd_read_opt |= RFLUSHPCPROT; 2406 break; 2407 } 2408 } 2409 if (sop->so_flags & SO_ERROPT) { 2410 switch (sop->so_erropt & RERRMASK) { 2411 case RERRNORM: 2412 stp->sd_flag &= ~STRDERRNONPERSIST; 2413 break; 2414 case RERRNONPERSIST: 2415 stp->sd_flag |= STRDERRNONPERSIST; 2416 break; 2417 } 2418 switch (sop->so_erropt & WERRMASK) { 2419 case WERRNORM: 2420 stp->sd_flag &= ~STWRERRNONPERSIST; 2421 break; 2422 case WERRNONPERSIST: 2423 stp->sd_flag |= STWRERRNONPERSIST; 2424 break; 2425 } 2426 } 2427 if (sop->so_flags & SO_COPYOPT) { 2428 if (sop->so_copyopt & ZCVMSAFE) { 2429 stp->sd_copyflag |= STZCVMSAFE; 2430 stp->sd_copyflag &= ~STZCVMUNSAFE; 2431 } else if (sop->so_copyopt & ZCVMUNSAFE) { 2432 stp->sd_copyflag |= STZCVMUNSAFE; 2433 stp->sd_copyflag &= ~STZCVMSAFE; 2434 } 2435 2436 if (sop->so_copyopt & COPYCACHED) { 2437 stp->sd_copyflag |= STRCOPYCACHED; 2438 } 2439 } 2440 if (sop->so_flags & SO_WROFF) 2441 stp->sd_wroff = sop->so_wroff; 2442 if (sop->so_flags & SO_TAIL) 2443 stp->sd_tail = sop->so_tail; 2444 if (sop->so_flags & SO_MINPSZ) 2445 q->q_minpsz = sop->so_minpsz; 2446 if (sop->so_flags & SO_MAXPSZ) 2447 q->q_maxpsz = sop->so_maxpsz; 2448 if (sop->so_flags & SO_MAXBLK) 2449 stp->sd_maxblk = sop->so_maxblk; 2450 if (sop->so_flags & SO_HIWAT) { 2451 if (sop->so_flags & SO_BAND) { 2452 if (strqset(q, QHIWAT, 2453 sop->so_band, sop->so_hiwat)) { 2454 cmn_err(CE_WARN, "strrput: could not " 2455 "allocate qband\n"); 2456 } else { 2457 bpri = sop->so_band; 2458 } 2459 } else { 2460 q->q_hiwat = sop->so_hiwat; 2461 } 2462 } 2463 if (sop->so_flags & SO_LOWAT) { 2464 if (sop->so_flags & SO_BAND) { 2465 if (strqset(q, QLOWAT, 2466 sop->so_band, sop->so_lowat)) { 2467 cmn_err(CE_WARN, "strrput: could not " 2468 "allocate qband\n"); 2469 } else { 2470 bpri = sop->so_band; 2471 } 2472 } else { 2473 q->q_lowat = sop->so_lowat; 2474 } 2475 } 2476 if (sop->so_flags & SO_MREADON) 2477 stp->sd_flag |= SNDMREAD; 2478 if (sop->so_flags & SO_MREADOFF) 2479 stp->sd_flag &= ~SNDMREAD; 2480 if (sop->so_flags & SO_NDELON) 2481 stp->sd_flag |= OLDNDELAY; 2482 if (sop->so_flags & SO_NDELOFF) 2483 stp->sd_flag &= ~OLDNDELAY; 2484 if (sop->so_flags & SO_ISTTY) 2485 stp->sd_flag |= STRISTTY; 2486 if (sop->so_flags & SO_ISNTTY) 2487 stp->sd_flag &= ~STRISTTY; 2488 if (sop->so_flags & SO_TOSTOP) 2489 stp->sd_flag |= STRTOSTOP; 2490 if (sop->so_flags & SO_TONSTOP) 2491 stp->sd_flag &= ~STRTOSTOP; 2492 if (sop->so_flags & SO_DELIM) 2493 stp->sd_flag |= STRDELIM; 2494 if (sop->so_flags & SO_NODELIM) 2495 stp->sd_flag &= ~STRDELIM; 2496 2497 mutex_exit(&stp->sd_lock); 2498 freemsg(bp); 2499 2500 /* Check backenable in case the water marks changed */ 2501 qbackenable(q, bpri); 2502 return (0); 2503 2504 /* 2505 * The following set of cases deal with situations where two stream 2506 * heads are connected to each other (twisted streams). These messages 2507 * have no meaning at the stream head. 2508 */ 2509 case M_BREAK: 2510 case M_CTL: 2511 case M_DELAY: 2512 case M_START: 2513 case M_STOP: 2514 case M_IOCDATA: 2515 case M_STARTI: 2516 case M_STOPI: 2517 freemsg(bp); 2518 return (0); 2519 2520 case M_IOCTL: 2521 /* 2522 * Always NAK this condition 2523 * (makes no sense) 2524 * If there is one or more threads in the read side 2525 * rwnext we have to defer the nacking until that thread 2526 * returns (in strget). 2527 */ 2528 mutex_enter(&stp->sd_lock); 2529 if (stp->sd_struiodnak != 0) { 2530 /* 2531 * Defer NAK to the streamhead. Queue at the end 2532 * the list. 2533 */ 2534 mblk_t *mp = stp->sd_struionak; 2535 2536 while (mp && mp->b_next) 2537 mp = mp->b_next; 2538 if (mp) 2539 mp->b_next = bp; 2540 else 2541 stp->sd_struionak = bp; 2542 bp->b_next = NULL; 2543 mutex_exit(&stp->sd_lock); 2544 return (0); 2545 } 2546 mutex_exit(&stp->sd_lock); 2547 2548 bp->b_datap->db_type = M_IOCNAK; 2549 /* 2550 * Protect against the driver passing up 2551 * messages after it has done a qprocsoff. 2552 */ 2553 if (_OTHERQ(q)->q_next == NULL) 2554 freemsg(bp); 2555 else 2556 qreply(q, bp); 2557 return (0); 2558 2559 default: 2560 #ifdef DEBUG 2561 cmn_err(CE_WARN, 2562 "bad message type %x received at stream head\n", 2563 bp->b_datap->db_type); 2564 #endif 2565 freemsg(bp); 2566 return (0); 2567 } 2568 2569 /* NOTREACHED */ 2570 } 2571 2572 /* 2573 * Check if the stream pointed to by `stp' can be written to, and return an 2574 * error code if not. If `eiohup' is set, then return EIO if STRHUP is set. 2575 * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream, 2576 * then always return EPIPE and send a SIGPIPE to the invoking thread. 2577 */ 2578 static int 2579 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok) 2580 { 2581 int error; 2582 2583 ASSERT(MUTEX_HELD(&stp->sd_lock)); 2584 2585 /* 2586 * For modem support, POSIX states that on writes, EIO should 2587 * be returned if the stream has been hung up. 2588 */ 2589 if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP) 2590 error = EIO; 2591 else 2592 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0); 2593 2594 if (error != 0) { 2595 if (!(stp->sd_flag & STPLEX) && 2596 (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) { 2597 tsignal(curthread, SIGPIPE); 2598 error = EPIPE; 2599 } 2600 } 2601 2602 return (error); 2603 } 2604 2605 /* 2606 * Copyin and send data down a stream. 2607 * The caller will allocate and copyin any control part that precedes the 2608 * message and pass than in as mctl. 2609 * 2610 * Caller should *not* hold sd_lock. 2611 * When EWOULDBLOCK is returned the caller has to redo the canputnext 2612 * under sd_lock in order to avoid missing a backenabling wakeup. 2613 * 2614 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA. 2615 * 2616 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages. 2617 * For sync streams we can only ignore flow control by reverting to using 2618 * putnext. 2619 * 2620 * If sd_maxblk is less than *iosize this routine might return without 2621 * transferring all of *iosize. In all cases, on return *iosize will contain 2622 * the amount of data that was transferred. 2623 */ 2624 static int 2625 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize, 2626 int b_flag, int pri, int flags) 2627 { 2628 struiod_t uiod; 2629 mblk_t *mp; 2630 queue_t *wqp = stp->sd_wrq; 2631 int error = 0; 2632 ssize_t count = *iosize; 2633 cred_t *cr; 2634 2635 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 2636 2637 if (uiop != NULL && count >= 0) 2638 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0; 2639 2640 if (!(flags & STRUIO_POSTPONE)) { 2641 /* 2642 * Use regular canputnext, strmakedata, putnext sequence. 2643 */ 2644 if (pri == 0) { 2645 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2646 freemsg(mctl); 2647 return (EWOULDBLOCK); 2648 } 2649 } else { 2650 if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) { 2651 freemsg(mctl); 2652 return (EWOULDBLOCK); 2653 } 2654 } 2655 2656 if ((error = strmakedata(iosize, uiop, stp, flags, 2657 &mp)) != 0) { 2658 freemsg(mctl); 2659 /* 2660 * need to change return code to ENOMEM 2661 * so that this is not confused with 2662 * flow control, EAGAIN. 2663 */ 2664 2665 if (error == EAGAIN) 2666 return (ENOMEM); 2667 else 2668 return (error); 2669 } 2670 if (mctl != NULL) { 2671 if (mctl->b_cont == NULL) 2672 mctl->b_cont = mp; 2673 else if (mp != NULL) 2674 linkb(mctl, mp); 2675 mp = mctl; 2676 /* 2677 * Note that for interrupt thread, the CRED() is 2678 * NULL. Don't bother with the pid either. 2679 */ 2680 if ((cr = CRED()) != NULL) { 2681 mblk_setcred(mp, cr); 2682 DB_CPID(mp) = curproc->p_pid; 2683 } 2684 } else if (mp == NULL) 2685 return (0); 2686 2687 mp->b_flag |= b_flag; 2688 mp->b_band = (uchar_t)pri; 2689 2690 if (flags & MSG_IGNFLOW) { 2691 /* 2692 * XXX Hack: Don't get stuck running service 2693 * procedures. This is needed for sockfs when 2694 * sending the unbind message out of the rput 2695 * procedure - we don't want a put procedure 2696 * to run service procedures. 2697 */ 2698 putnext(wqp, mp); 2699 } else { 2700 stream_willservice(stp); 2701 putnext(wqp, mp); 2702 stream_runservice(stp); 2703 } 2704 return (0); 2705 } 2706 /* 2707 * Stream supports rwnext() for the write side. 2708 */ 2709 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) { 2710 freemsg(mctl); 2711 /* 2712 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled". 2713 */ 2714 return (error == EAGAIN ? ENOMEM : error); 2715 } 2716 if (mctl != NULL) { 2717 if (mctl->b_cont == NULL) 2718 mctl->b_cont = mp; 2719 else if (mp != NULL) 2720 linkb(mctl, mp); 2721 mp = mctl; 2722 /* 2723 * Note that for interrupt thread, the CRED() is 2724 * NULL. Don't bother with the pid either. 2725 */ 2726 if ((cr = CRED()) != NULL) { 2727 mblk_setcred(mp, cr); 2728 DB_CPID(mp) = curproc->p_pid; 2729 } 2730 } else if (mp == NULL) { 2731 return (0); 2732 } 2733 2734 mp->b_flag |= b_flag; 2735 mp->b_band = (uchar_t)pri; 2736 2737 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 2738 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 2739 uiod.d_uio.uio_offset = 0; 2740 uiod.d_mp = mp; 2741 error = rwnext(wqp, &uiod); 2742 if (! uiod.d_mp) { 2743 uioskip(uiop, *iosize); 2744 return (error); 2745 } 2746 ASSERT(mp == uiod.d_mp); 2747 if (error == EINVAL) { 2748 /* 2749 * The stream plumbing must have changed while 2750 * we were away, so just turn off rwnext()s. 2751 */ 2752 error = 0; 2753 } else if (error == EBUSY || error == EWOULDBLOCK) { 2754 /* 2755 * Couldn't enter a perimeter or took a page fault, 2756 * so fall-back to putnext(). 2757 */ 2758 error = 0; 2759 } else { 2760 freemsg(mp); 2761 return (error); 2762 } 2763 /* Have to check canput before consuming data from the uio */ 2764 if (pri == 0) { 2765 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2766 freemsg(mp); 2767 return (EWOULDBLOCK); 2768 } 2769 } else { 2770 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) { 2771 freemsg(mp); 2772 return (EWOULDBLOCK); 2773 } 2774 } 2775 ASSERT(mp == uiod.d_mp); 2776 /* Copyin data from the uio */ 2777 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) { 2778 freemsg(mp); 2779 return (error); 2780 } 2781 uioskip(uiop, *iosize); 2782 if (flags & MSG_IGNFLOW) { 2783 /* 2784 * XXX Hack: Don't get stuck running service procedures. 2785 * This is needed for sockfs when sending the unbind message 2786 * out of the rput procedure - we don't want a put procedure 2787 * to run service procedures. 2788 */ 2789 putnext(wqp, mp); 2790 } else { 2791 stream_willservice(stp); 2792 putnext(wqp, mp); 2793 stream_runservice(stp); 2794 } 2795 return (0); 2796 } 2797 2798 /* 2799 * Write attempts to break the write request into messages conforming 2800 * with the minimum and maximum packet sizes set downstream. 2801 * 2802 * Write will not block if downstream queue is full and 2803 * O_NDELAY is set, otherwise it will block waiting for the queue to get room. 2804 * 2805 * A write of zero bytes gets packaged into a zero length message and sent 2806 * downstream like any other message. 2807 * 2808 * If buffers of the requested sizes are not available, the write will 2809 * sleep until the buffers become available. 2810 * 2811 * Write (if specified) will supply a write offset in a message if it 2812 * makes sense. This can be specified by downstream modules as part of 2813 * a M_SETOPTS message. Write will not supply the write offset if it 2814 * cannot supply any data in a buffer. In other words, write will never 2815 * send down an empty packet due to a write offset. 2816 */ 2817 /* ARGSUSED2 */ 2818 int 2819 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp) 2820 { 2821 return (strwrite_common(vp, uiop, crp, 0)); 2822 } 2823 2824 /* ARGSUSED2 */ 2825 int 2826 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) 2827 { 2828 struct stdata *stp; 2829 struct queue *wqp; 2830 ssize_t rmin, rmax; 2831 ssize_t iosize; 2832 int waitflag; 2833 int tempmode; 2834 int error = 0; 2835 int b_flag; 2836 2837 ASSERT(vp->v_stream); 2838 stp = vp->v_stream; 2839 2840 mutex_enter(&stp->sd_lock); 2841 2842 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2843 mutex_exit(&stp->sd_lock); 2844 return (error); 2845 } 2846 2847 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 2848 error = strwriteable(stp, B_TRUE, B_TRUE); 2849 if (error != 0) { 2850 mutex_exit(&stp->sd_lock); 2851 return (error); 2852 } 2853 } 2854 2855 mutex_exit(&stp->sd_lock); 2856 2857 wqp = stp->sd_wrq; 2858 2859 /* get these values from them cached in the stream head */ 2860 rmin = stp->sd_qn_minpsz; 2861 rmax = stp->sd_qn_maxpsz; 2862 2863 /* 2864 * Check the min/max packet size constraints. If min packet size 2865 * is non-zero, the write cannot be split into multiple messages 2866 * and still guarantee the size constraints. 2867 */ 2868 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp); 2869 2870 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 2871 if (rmax == 0) { 2872 return (0); 2873 } 2874 if (rmin > 0) { 2875 if (uiop->uio_resid < rmin) { 2876 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2877 "strwrite out:q %p out %d error %d", 2878 wqp, 0, ERANGE); 2879 return (ERANGE); 2880 } 2881 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) { 2882 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2883 "strwrite out:q %p out %d error %d", 2884 wqp, 1, ERANGE); 2885 return (ERANGE); 2886 } 2887 } 2888 2889 /* 2890 * Do until count satisfied or error. 2891 */ 2892 waitflag = WRITEWAIT | wflag; 2893 if (stp->sd_flag & OLDNDELAY) 2894 tempmode = uiop->uio_fmode & ~FNDELAY; 2895 else 2896 tempmode = uiop->uio_fmode; 2897 2898 if (rmax == INFPSZ) 2899 rmax = uiop->uio_resid; 2900 2901 /* 2902 * Note that tempmode does not get used in strput/strmakedata 2903 * but only in strwaitq. The other routines use uio_fmode 2904 * unmodified. 2905 */ 2906 2907 /* LINTED: constant in conditional context */ 2908 while (1) { /* breaks when uio_resid reaches zero */ 2909 /* 2910 * Determine the size of the next message to be 2911 * packaged. May have to break write into several 2912 * messages based on max packet size. 2913 */ 2914 iosize = MIN(uiop->uio_resid, rmax); 2915 2916 /* 2917 * Put block downstream when flow control allows it. 2918 */ 2919 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize)) 2920 b_flag = MSGDELIM; 2921 else 2922 b_flag = 0; 2923 2924 for (;;) { 2925 int done = 0; 2926 2927 error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0); 2928 if (error == 0) 2929 break; 2930 if (error != EWOULDBLOCK) 2931 goto out; 2932 2933 mutex_enter(&stp->sd_lock); 2934 /* 2935 * Check for a missed wakeup. 2936 * Needed since strput did not hold sd_lock across 2937 * the canputnext. 2938 */ 2939 if (canputnext(wqp)) { 2940 /* Try again */ 2941 mutex_exit(&stp->sd_lock); 2942 continue; 2943 } 2944 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT, 2945 "strwrite wait:q %p wait", wqp); 2946 if ((error = strwaitq(stp, waitflag, (ssize_t)0, 2947 tempmode, -1, &done)) != 0 || done) { 2948 mutex_exit(&stp->sd_lock); 2949 if ((vp->v_type == VFIFO) && 2950 (uiop->uio_fmode & FNDELAY) && 2951 (error == EAGAIN)) 2952 error = 0; 2953 goto out; 2954 } 2955 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE, 2956 "strwrite wake:q %p awakes", wqp); 2957 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2958 mutex_exit(&stp->sd_lock); 2959 goto out; 2960 } 2961 mutex_exit(&stp->sd_lock); 2962 } 2963 waitflag |= NOINTR; 2964 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID, 2965 "strwrite resid:q %p uiop %p", wqp, uiop); 2966 if (uiop->uio_resid) { 2967 /* Recheck for errors - needed for sockets */ 2968 if ((stp->sd_wput_opt & SW_RECHECK_ERR) && 2969 (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 2970 mutex_enter(&stp->sd_lock); 2971 error = strwriteable(stp, B_FALSE, B_TRUE); 2972 mutex_exit(&stp->sd_lock); 2973 if (error != 0) 2974 return (error); 2975 } 2976 continue; 2977 } 2978 break; 2979 } 2980 out: 2981 /* 2982 * For historical reasons, applications expect EAGAIN when a data 2983 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN. 2984 */ 2985 if (error == ENOMEM) 2986 error = EAGAIN; 2987 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2988 "strwrite out:q %p out %d error %d", wqp, 2, error); 2989 return (error); 2990 } 2991 2992 /* 2993 * Stream head write service routine. 2994 * Its job is to wake up any sleeping writers when a queue 2995 * downstream needs data (part of the flow control in putq and getq). 2996 * It also must wake anyone sleeping on a poll(). 2997 * For stream head right below mux module, it must also invoke put procedure 2998 * of next downstream module. 2999 */ 3000 int 3001 strwsrv(queue_t *q) 3002 { 3003 struct stdata *stp; 3004 queue_t *tq; 3005 qband_t *qbp; 3006 int i; 3007 qband_t *myqbp; 3008 int isevent; 3009 unsigned char qbf[NBAND]; /* band flushing backenable flags */ 3010 3011 TRACE_1(TR_FAC_STREAMS_FR, 3012 TR_STRWSRV, "strwsrv:q %p", q); 3013 stp = (struct stdata *)q->q_ptr; 3014 ASSERT(qclaimed(q)); 3015 mutex_enter(&stp->sd_lock); 3016 ASSERT(!(stp->sd_flag & STPLEX)); 3017 3018 if (stp->sd_flag & WSLEEP) { 3019 stp->sd_flag &= ~WSLEEP; 3020 cv_broadcast(&q->q_wait); 3021 } 3022 mutex_exit(&stp->sd_lock); 3023 3024 /* The other end of a stream pipe went away. */ 3025 if ((tq = q->q_next) == NULL) { 3026 return (0); 3027 } 3028 3029 /* Find the next module forward that has a service procedure */ 3030 claimstr(q); 3031 tq = q->q_nfsrv; 3032 ASSERT(tq != NULL); 3033 3034 if ((q->q_flag & QBACK)) { 3035 if ((tq->q_flag & QFULL)) { 3036 mutex_enter(QLOCK(tq)); 3037 if (!(tq->q_flag & QFULL)) { 3038 mutex_exit(QLOCK(tq)); 3039 goto wakeup; 3040 } 3041 /* 3042 * The queue must have become full again. Set QWANTW 3043 * again so strwsrv will be back enabled when 3044 * the queue becomes non-full next time. 3045 */ 3046 tq->q_flag |= QWANTW; 3047 mutex_exit(QLOCK(tq)); 3048 } else { 3049 wakeup: 3050 pollwakeup(&stp->sd_pollist, POLLWRNORM); 3051 mutex_enter(&stp->sd_lock); 3052 if (stp->sd_sigflags & S_WRNORM) 3053 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 3054 mutex_exit(&stp->sd_lock); 3055 } 3056 } 3057 3058 isevent = 0; 3059 i = 1; 3060 bzero((caddr_t)qbf, NBAND); 3061 mutex_enter(QLOCK(tq)); 3062 if ((myqbp = q->q_bandp) != NULL) 3063 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) { 3064 ASSERT(myqbp); 3065 if ((myqbp->qb_flag & QB_BACK)) { 3066 if (qbp->qb_flag & QB_FULL) { 3067 /* 3068 * The band must have become full again. 3069 * Set QB_WANTW again so strwsrv will 3070 * be back enabled when the band becomes 3071 * non-full next time. 3072 */ 3073 qbp->qb_flag |= QB_WANTW; 3074 } else { 3075 isevent = 1; 3076 qbf[i] = 1; 3077 } 3078 } 3079 myqbp = myqbp->qb_next; 3080 i++; 3081 } 3082 mutex_exit(QLOCK(tq)); 3083 3084 if (isevent) { 3085 for (i = tq->q_nband; i; i--) { 3086 if (qbf[i]) { 3087 pollwakeup(&stp->sd_pollist, POLLWRBAND); 3088 mutex_enter(&stp->sd_lock); 3089 if (stp->sd_sigflags & S_WRBAND) 3090 strsendsig(stp->sd_siglist, S_WRBAND, 3091 (uchar_t)i, 0); 3092 mutex_exit(&stp->sd_lock); 3093 } 3094 } 3095 } 3096 3097 releasestr(q); 3098 return (0); 3099 } 3100 3101 /* 3102 * Special case of strcopyin/strcopyout for copying 3103 * struct strioctl that can deal with both data 3104 * models. 3105 */ 3106 3107 #ifdef _LP64 3108 3109 static int 3110 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3111 { 3112 struct strioctl32 strioc32; 3113 struct strioctl *striocp; 3114 3115 if (copyflag & U_TO_K) { 3116 ASSERT((copyflag & K_TO_K) == 0); 3117 3118 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3119 if (copyin(from, &strioc32, sizeof (strioc32))) 3120 return (EFAULT); 3121 3122 striocp = (struct strioctl *)to; 3123 striocp->ic_cmd = strioc32.ic_cmd; 3124 striocp->ic_timout = strioc32.ic_timout; 3125 striocp->ic_len = strioc32.ic_len; 3126 striocp->ic_dp = (char *)(uintptr_t)strioc32.ic_dp; 3127 3128 } else { /* NATIVE data model */ 3129 if (copyin(from, to, sizeof (struct strioctl))) { 3130 return (EFAULT); 3131 } else { 3132 return (0); 3133 } 3134 } 3135 } else { 3136 ASSERT(copyflag & K_TO_K); 3137 bcopy(from, to, sizeof (struct strioctl)); 3138 } 3139 return (0); 3140 } 3141 3142 static int 3143 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3144 { 3145 struct strioctl32 strioc32; 3146 struct strioctl *striocp; 3147 3148 if (copyflag & U_TO_K) { 3149 ASSERT((copyflag & K_TO_K) == 0); 3150 3151 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3152 striocp = (struct strioctl *)from; 3153 strioc32.ic_cmd = striocp->ic_cmd; 3154 strioc32.ic_timout = striocp->ic_timout; 3155 strioc32.ic_len = striocp->ic_len; 3156 strioc32.ic_dp = (caddr32_t)(uintptr_t)striocp->ic_dp; 3157 ASSERT((char *)(uintptr_t)strioc32.ic_dp == 3158 striocp->ic_dp); 3159 3160 if (copyout(&strioc32, to, sizeof (strioc32))) 3161 return (EFAULT); 3162 3163 } else { /* NATIVE data model */ 3164 if (copyout(from, to, sizeof (struct strioctl))) { 3165 return (EFAULT); 3166 } else { 3167 return (0); 3168 } 3169 } 3170 } else { 3171 ASSERT(copyflag & K_TO_K); 3172 bcopy(from, to, sizeof (struct strioctl)); 3173 } 3174 return (0); 3175 } 3176 3177 #else /* ! _LP64 */ 3178 3179 /* ARGSUSED2 */ 3180 static int 3181 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3182 { 3183 return (strcopyin(from, to, sizeof (struct strioctl), copyflag)); 3184 } 3185 3186 /* ARGSUSED2 */ 3187 static int 3188 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3189 { 3190 return (strcopyout(from, to, sizeof (struct strioctl), copyflag)); 3191 } 3192 3193 #endif /* _LP64 */ 3194 3195 /* 3196 * Determine type of job control semantics expected by user. The 3197 * possibilities are: 3198 * JCREAD - Behaves like read() on fd; send SIGTTIN 3199 * JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set 3200 * JCSETP - Sets a value in the stream; send SIGTTOU, ignore TOSTOP 3201 * JCGETP - Gets a value in the stream; no signals. 3202 * See straccess in strsubr.c for usage of these values. 3203 * 3204 * This routine also returns -1 for I_STR as a special case; the 3205 * caller must call again with the real ioctl number for 3206 * classification. 3207 */ 3208 static int 3209 job_control_type(int cmd) 3210 { 3211 switch (cmd) { 3212 case I_STR: 3213 return (-1); 3214 3215 case I_RECVFD: 3216 case I_E_RECVFD: 3217 return (JCREAD); 3218 3219 case I_FDINSERT: 3220 case I_SENDFD: 3221 return (JCWRITE); 3222 3223 case TCSETA: 3224 case TCSETAW: 3225 case TCSETAF: 3226 case TCSBRK: 3227 case TCXONC: 3228 case TCFLSH: 3229 case TCDSET: /* Obsolete */ 3230 case TIOCSWINSZ: 3231 case TCSETS: 3232 case TCSETSW: 3233 case TCSETSF: 3234 case TIOCSETD: 3235 case TIOCHPCL: 3236 case TIOCSETP: 3237 case TIOCSETN: 3238 case TIOCEXCL: 3239 case TIOCNXCL: 3240 case TIOCFLUSH: 3241 case TIOCSETC: 3242 case TIOCLBIS: 3243 case TIOCLBIC: 3244 case TIOCLSET: 3245 case TIOCSBRK: 3246 case TIOCCBRK: 3247 case TIOCSDTR: 3248 case TIOCCDTR: 3249 case TIOCSLTC: 3250 case TIOCSTOP: 3251 case TIOCSTART: 3252 case TIOCSTI: 3253 case TIOCSPGRP: 3254 case TIOCMSET: 3255 case TIOCMBIS: 3256 case TIOCMBIC: 3257 case TIOCREMOTE: 3258 case TIOCSIGNAL: 3259 case LDSETT: 3260 case LDSMAP: /* Obsolete */ 3261 case DIOCSETP: 3262 case I_FLUSH: 3263 case I_SRDOPT: 3264 case I_SETSIG: 3265 case I_SWROPT: 3266 case I_FLUSHBAND: 3267 case I_SETCLTIME: 3268 case I_SERROPT: 3269 case I_ESETSIG: 3270 case FIONBIO: 3271 case FIOASYNC: 3272 case FIOSETOWN: 3273 case JBOOT: /* Obsolete */ 3274 case JTERM: /* Obsolete */ 3275 case JTIMOM: /* Obsolete */ 3276 case JZOMBOOT: /* Obsolete */ 3277 case JAGENT: /* Obsolete */ 3278 case JTRUN: /* Obsolete */ 3279 case JXTPROTO: /* Obsolete */ 3280 case TIOCSETLD: 3281 return (JCSETP); 3282 } 3283 3284 return (JCGETP); 3285 } 3286 3287 /* 3288 * ioctl for streams 3289 */ 3290 int 3291 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, 3292 cred_t *crp, int *rvalp) 3293 { 3294 struct stdata *stp; 3295 struct strcmd *scp; 3296 struct strioctl strioc; 3297 struct uio uio; 3298 struct iovec iov; 3299 int access; 3300 mblk_t *mp; 3301 int error = 0; 3302 int done = 0; 3303 ssize_t rmin, rmax; 3304 queue_t *wrq; 3305 queue_t *rdq; 3306 boolean_t kioctl = B_FALSE; 3307 3308 if (flag & FKIOCTL) { 3309 copyflag = K_TO_K; 3310 kioctl = B_TRUE; 3311 } 3312 ASSERT(vp->v_stream); 3313 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 3314 stp = vp->v_stream; 3315 3316 TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER, 3317 "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg); 3318 3319 if (audit_active) 3320 audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp); 3321 3322 /* 3323 * If the copy is kernel to kernel, make sure that the FNATIVE 3324 * flag is set. After this it would be a serious error to have 3325 * no model flag. 3326 */ 3327 if (copyflag == K_TO_K) 3328 flag = (flag & ~FMODELS) | FNATIVE; 3329 3330 ASSERT((flag & FMODELS) != 0); 3331 3332 wrq = stp->sd_wrq; 3333 rdq = _RD(wrq); 3334 3335 access = job_control_type(cmd); 3336 3337 /* We should never see these here, should be handled by iwscn */ 3338 if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR) 3339 return (EINVAL); 3340 3341 mutex_enter(&stp->sd_lock); 3342 if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) { 3343 mutex_exit(&stp->sd_lock); 3344 return (error); 3345 } 3346 mutex_exit(&stp->sd_lock); 3347 3348 /* 3349 * Check for sgttyb-related ioctls first, and complain as 3350 * necessary. 3351 */ 3352 switch (cmd) { 3353 case TIOCGETP: 3354 case TIOCSETP: 3355 case TIOCSETN: 3356 if (sgttyb_handling >= 2 && !sgttyb_complaint) { 3357 sgttyb_complaint = B_TRUE; 3358 cmn_err(CE_NOTE, 3359 "application used obsolete TIOC[GS]ET"); 3360 } 3361 if (sgttyb_handling >= 3) { 3362 tsignal(curthread, SIGSYS); 3363 return (EIO); 3364 } 3365 break; 3366 } 3367 3368 mutex_enter(&stp->sd_lock); 3369 3370 switch (cmd) { 3371 case I_RECVFD: 3372 case I_E_RECVFD: 3373 case I_PEEK: 3374 case I_NREAD: 3375 case FIONREAD: 3376 case FIORDCHK: 3377 case I_ATMARK: 3378 case FIONBIO: 3379 case FIOASYNC: 3380 if (stp->sd_flag & (STRDERR|STPLEX)) { 3381 error = strgeterr(stp, STRDERR|STPLEX, 0); 3382 if (error != 0) { 3383 mutex_exit(&stp->sd_lock); 3384 return (error); 3385 } 3386 } 3387 break; 3388 3389 default: 3390 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) { 3391 error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0); 3392 if (error != 0) { 3393 mutex_exit(&stp->sd_lock); 3394 return (error); 3395 } 3396 } 3397 } 3398 3399 mutex_exit(&stp->sd_lock); 3400 3401 switch (cmd) { 3402 default: 3403 /* 3404 * The stream head has hardcoded knowledge of a 3405 * miscellaneous collection of terminal-, keyboard- and 3406 * mouse-related ioctls, enumerated below. This hardcoded 3407 * knowledge allows the stream head to automatically 3408 * convert transparent ioctl requests made by userland 3409 * programs into I_STR ioctls which many old STREAMS 3410 * modules and drivers require. 3411 * 3412 * No new ioctls should ever be added to this list. 3413 * Instead, the STREAMS module or driver should be written 3414 * to either handle transparent ioctls or require any 3415 * userland programs to use I_STR ioctls (by returning 3416 * EINVAL to any transparent ioctl requests). 3417 * 3418 * More importantly, removing ioctls from this list should 3419 * be done with the utmost care, since our STREAMS modules 3420 * and drivers *count* on the stream head performing this 3421 * conversion, and thus may panic while processing 3422 * transparent ioctl request for one of these ioctls (keep 3423 * in mind that third party modules and drivers may have 3424 * similar problems). 3425 */ 3426 if (((cmd & IOCTYPE) == LDIOC) || 3427 ((cmd & IOCTYPE) == tIOC) || 3428 ((cmd & IOCTYPE) == TIOC) || 3429 ((cmd & IOCTYPE) == KIOC) || 3430 ((cmd & IOCTYPE) == MSIOC) || 3431 ((cmd & IOCTYPE) == VUIOC)) { 3432 /* 3433 * The ioctl is a tty ioctl - set up strioc buffer 3434 * and call strdoioctl() to do the work. 3435 */ 3436 if (stp->sd_flag & STRHUP) 3437 return (ENXIO); 3438 strioc.ic_cmd = cmd; 3439 strioc.ic_timout = INFTIM; 3440 3441 switch (cmd) { 3442 3443 case TCXONC: 3444 case TCSBRK: 3445 case TCFLSH: 3446 case TCDSET: 3447 { 3448 int native_arg = (int)arg; 3449 strioc.ic_len = sizeof (int); 3450 strioc.ic_dp = (char *)&native_arg; 3451 return (strdoioctl(stp, &strioc, flag, 3452 K_TO_K, crp, rvalp)); 3453 } 3454 3455 case TCSETA: 3456 case TCSETAW: 3457 case TCSETAF: 3458 strioc.ic_len = sizeof (struct termio); 3459 strioc.ic_dp = (char *)arg; 3460 return (strdoioctl(stp, &strioc, flag, 3461 copyflag, crp, rvalp)); 3462 3463 case TCSETS: 3464 case TCSETSW: 3465 case TCSETSF: 3466 strioc.ic_len = sizeof (struct termios); 3467 strioc.ic_dp = (char *)arg; 3468 return (strdoioctl(stp, &strioc, flag, 3469 copyflag, crp, rvalp)); 3470 3471 case LDSETT: 3472 strioc.ic_len = sizeof (struct termcb); 3473 strioc.ic_dp = (char *)arg; 3474 return (strdoioctl(stp, &strioc, flag, 3475 copyflag, crp, rvalp)); 3476 3477 case TIOCSETP: 3478 strioc.ic_len = sizeof (struct sgttyb); 3479 strioc.ic_dp = (char *)arg; 3480 return (strdoioctl(stp, &strioc, flag, 3481 copyflag, crp, rvalp)); 3482 3483 case TIOCSTI: 3484 if ((flag & FREAD) == 0 && 3485 secpolicy_sti(crp) != 0) { 3486 return (EPERM); 3487 } 3488 mutex_enter(&stp->sd_lock); 3489 mutex_enter(&curproc->p_splock); 3490 if (stp->sd_sidp != curproc->p_sessp->s_sidp && 3491 secpolicy_sti(crp) != 0) { 3492 mutex_exit(&curproc->p_splock); 3493 mutex_exit(&stp->sd_lock); 3494 return (EACCES); 3495 } 3496 mutex_exit(&curproc->p_splock); 3497 mutex_exit(&stp->sd_lock); 3498 3499 strioc.ic_len = sizeof (char); 3500 strioc.ic_dp = (char *)arg; 3501 return (strdoioctl(stp, &strioc, flag, 3502 copyflag, crp, rvalp)); 3503 3504 case TIOCSWINSZ: 3505 strioc.ic_len = sizeof (struct winsize); 3506 strioc.ic_dp = (char *)arg; 3507 return (strdoioctl(stp, &strioc, flag, 3508 copyflag, crp, rvalp)); 3509 3510 case TIOCSSIZE: 3511 strioc.ic_len = sizeof (struct ttysize); 3512 strioc.ic_dp = (char *)arg; 3513 return (strdoioctl(stp, &strioc, flag, 3514 copyflag, crp, rvalp)); 3515 3516 case TIOCSSOFTCAR: 3517 case KIOCTRANS: 3518 case KIOCTRANSABLE: 3519 case KIOCCMD: 3520 case KIOCSDIRECT: 3521 case KIOCSCOMPAT: 3522 case KIOCSKABORTEN: 3523 case KIOCSRPTDELAY: 3524 case KIOCSRPTRATE: 3525 case VUIDSFORMAT: 3526 case TIOCSPPS: 3527 strioc.ic_len = sizeof (int); 3528 strioc.ic_dp = (char *)arg; 3529 return (strdoioctl(stp, &strioc, flag, 3530 copyflag, crp, rvalp)); 3531 3532 case KIOCSETKEY: 3533 case KIOCGETKEY: 3534 strioc.ic_len = sizeof (struct kiockey); 3535 strioc.ic_dp = (char *)arg; 3536 return (strdoioctl(stp, &strioc, flag, 3537 copyflag, crp, rvalp)); 3538 3539 case KIOCSKEY: 3540 case KIOCGKEY: 3541 strioc.ic_len = sizeof (struct kiockeymap); 3542 strioc.ic_dp = (char *)arg; 3543 return (strdoioctl(stp, &strioc, flag, 3544 copyflag, crp, rvalp)); 3545 3546 case KIOCSLED: 3547 /* arg is a pointer to char */ 3548 strioc.ic_len = sizeof (char); 3549 strioc.ic_dp = (char *)arg; 3550 return (strdoioctl(stp, &strioc, flag, 3551 copyflag, crp, rvalp)); 3552 3553 case MSIOSETPARMS: 3554 strioc.ic_len = sizeof (Ms_parms); 3555 strioc.ic_dp = (char *)arg; 3556 return (strdoioctl(stp, &strioc, flag, 3557 copyflag, crp, rvalp)); 3558 3559 case VUIDSADDR: 3560 case VUIDGADDR: 3561 strioc.ic_len = sizeof (struct vuid_addr_probe); 3562 strioc.ic_dp = (char *)arg; 3563 return (strdoioctl(stp, &strioc, flag, 3564 copyflag, crp, rvalp)); 3565 3566 /* 3567 * These M_IOCTL's don't require any data to be sent 3568 * downstream, and the driver will allocate and link 3569 * on its own mblk_t upon M_IOCACK -- thus we set 3570 * ic_len to zero and set ic_dp to arg so we know 3571 * where to copyout to later. 3572 */ 3573 case TIOCGSOFTCAR: 3574 case TIOCGWINSZ: 3575 case TIOCGSIZE: 3576 case KIOCGTRANS: 3577 case KIOCGTRANSABLE: 3578 case KIOCTYPE: 3579 case KIOCGDIRECT: 3580 case KIOCGCOMPAT: 3581 case KIOCLAYOUT: 3582 case KIOCGLED: 3583 case MSIOGETPARMS: 3584 case MSIOBUTTONS: 3585 case VUIDGFORMAT: 3586 case TIOCGPPS: 3587 case TIOCGPPSEV: 3588 case TCGETA: 3589 case TCGETS: 3590 case LDGETT: 3591 case TIOCGETP: 3592 case KIOCGRPTDELAY: 3593 case KIOCGRPTRATE: 3594 strioc.ic_len = 0; 3595 strioc.ic_dp = (char *)arg; 3596 return (strdoioctl(stp, &strioc, flag, 3597 copyflag, crp, rvalp)); 3598 } 3599 } 3600 3601 /* 3602 * Unknown cmd - send it down as a transparent ioctl. 3603 */ 3604 strioc.ic_cmd = cmd; 3605 strioc.ic_timout = INFTIM; 3606 strioc.ic_len = TRANSPARENT; 3607 strioc.ic_dp = (char *)&arg; 3608 3609 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp)); 3610 3611 case I_STR: 3612 /* 3613 * Stream ioctl. Read in an strioctl buffer from the user 3614 * along with any data specified and send it downstream. 3615 * Strdoioctl will wait allow only one ioctl message at 3616 * a time, and waits for the acknowledgement. 3617 */ 3618 3619 if (stp->sd_flag & STRHUP) 3620 return (ENXIO); 3621 3622 error = strcopyin_strioctl((void *)arg, &strioc, flag, 3623 copyflag); 3624 if (error != 0) 3625 return (error); 3626 3627 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1)) 3628 return (EINVAL); 3629 3630 access = job_control_type(strioc.ic_cmd); 3631 mutex_enter(&stp->sd_lock); 3632 if ((access != -1) && 3633 ((error = i_straccess(stp, access)) != 0)) { 3634 mutex_exit(&stp->sd_lock); 3635 return (error); 3636 } 3637 mutex_exit(&stp->sd_lock); 3638 3639 /* 3640 * The I_STR facility provides a trap door for malicious 3641 * code to send down bogus streamio(7I) ioctl commands to 3642 * unsuspecting STREAMS modules and drivers which expect to 3643 * only get these messages from the stream head. 3644 * Explicitly prohibit any streamio ioctls which can be 3645 * passed downstream by the stream head. Note that we do 3646 * not block all streamio ioctls because the ioctl 3647 * numberspace is not well managed and thus it's possible 3648 * that a module or driver's ioctl numbers may accidentally 3649 * collide with them. 3650 */ 3651 switch (strioc.ic_cmd) { 3652 case I_LINK: 3653 case I_PLINK: 3654 case I_UNLINK: 3655 case I_PUNLINK: 3656 case _I_GETPEERCRED: 3657 case _I_PLINK_LH: 3658 return (EINVAL); 3659 } 3660 3661 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp); 3662 if (error == 0) { 3663 error = strcopyout_strioctl(&strioc, (void *)arg, 3664 flag, copyflag); 3665 } 3666 return (error); 3667 3668 case _I_CMD: 3669 /* 3670 * Like I_STR, but without using M_IOC* messages and without 3671 * copyins/copyouts beyond the passed-in argument. 3672 */ 3673 if (stp->sd_flag & STRHUP) 3674 return (ENXIO); 3675 3676 if ((scp = kmem_alloc(sizeof (strcmd_t), KM_NOSLEEP)) == NULL) 3677 return (ENOMEM); 3678 3679 if (copyin((void *)arg, scp, sizeof (strcmd_t))) { 3680 kmem_free(scp, sizeof (strcmd_t)); 3681 return (EFAULT); 3682 } 3683 3684 access = job_control_type(scp->sc_cmd); 3685 mutex_enter(&stp->sd_lock); 3686 if (access != -1 && (error = i_straccess(stp, access)) != 0) { 3687 mutex_exit(&stp->sd_lock); 3688 kmem_free(scp, sizeof (strcmd_t)); 3689 return (error); 3690 } 3691 mutex_exit(&stp->sd_lock); 3692 3693 *rvalp = 0; 3694 if ((error = strdocmd(stp, scp, crp)) == 0) { 3695 if (copyout(scp, (void *)arg, sizeof (strcmd_t))) 3696 error = EFAULT; 3697 } 3698 kmem_free(scp, sizeof (strcmd_t)); 3699 return (error); 3700 3701 case I_NREAD: 3702 /* 3703 * Return number of bytes of data in first message 3704 * in queue in "arg" and return the number of messages 3705 * in queue in return value. 3706 */ 3707 { 3708 size_t size; 3709 int retval; 3710 int count = 0; 3711 3712 mutex_enter(QLOCK(rdq)); 3713 3714 size = msgdsize(rdq->q_first); 3715 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3716 count++; 3717 3718 mutex_exit(QLOCK(rdq)); 3719 if (stp->sd_struiordq) { 3720 infod_t infod; 3721 3722 infod.d_cmd = INFOD_COUNT; 3723 infod.d_count = 0; 3724 if (count == 0) { 3725 infod.d_cmd |= INFOD_FIRSTBYTES; 3726 infod.d_bytes = 0; 3727 } 3728 infod.d_res = 0; 3729 (void) infonext(rdq, &infod); 3730 count += infod.d_count; 3731 if (infod.d_res & INFOD_FIRSTBYTES) 3732 size = infod.d_bytes; 3733 } 3734 3735 /* 3736 * Drop down from size_t to the "int" required by the 3737 * interface. Cap at INT_MAX. 3738 */ 3739 retval = MIN(size, INT_MAX); 3740 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3741 copyflag); 3742 if (!error) 3743 *rvalp = count; 3744 return (error); 3745 } 3746 3747 case FIONREAD: 3748 /* 3749 * Return number of bytes of data in all data messages 3750 * in queue in "arg". 3751 */ 3752 { 3753 size_t size = 0; 3754 int retval; 3755 3756 mutex_enter(QLOCK(rdq)); 3757 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3758 size += msgdsize(mp); 3759 mutex_exit(QLOCK(rdq)); 3760 3761 if (stp->sd_struiordq) { 3762 infod_t infod; 3763 3764 infod.d_cmd = INFOD_BYTES; 3765 infod.d_res = 0; 3766 infod.d_bytes = 0; 3767 (void) infonext(rdq, &infod); 3768 size += infod.d_bytes; 3769 } 3770 3771 /* 3772 * Drop down from size_t to the "int" required by the 3773 * interface. Cap at INT_MAX. 3774 */ 3775 retval = MIN(size, INT_MAX); 3776 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3777 copyflag); 3778 3779 *rvalp = 0; 3780 return (error); 3781 } 3782 case FIORDCHK: 3783 /* 3784 * FIORDCHK does not use arg value (like FIONREAD), 3785 * instead a count is returned. I_NREAD value may 3786 * not be accurate but safe. The real thing to do is 3787 * to add the msgdsizes of all data messages until 3788 * a non-data message. 3789 */ 3790 { 3791 size_t size = 0; 3792 3793 mutex_enter(QLOCK(rdq)); 3794 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3795 size += msgdsize(mp); 3796 mutex_exit(QLOCK(rdq)); 3797 3798 if (stp->sd_struiordq) { 3799 infod_t infod; 3800 3801 infod.d_cmd = INFOD_BYTES; 3802 infod.d_res = 0; 3803 infod.d_bytes = 0; 3804 (void) infonext(rdq, &infod); 3805 size += infod.d_bytes; 3806 } 3807 3808 /* 3809 * Since ioctl returns an int, and memory sizes under 3810 * LP64 may not fit, we return INT_MAX if the count was 3811 * actually greater. 3812 */ 3813 *rvalp = MIN(size, INT_MAX); 3814 return (0); 3815 } 3816 3817 case I_FIND: 3818 /* 3819 * Get module name. 3820 */ 3821 { 3822 char mname[FMNAMESZ + 1]; 3823 queue_t *q; 3824 3825 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3826 mname, FMNAMESZ + 1, NULL); 3827 if (error) 3828 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3829 3830 /* 3831 * Return EINVAL if we're handed a bogus module name. 3832 */ 3833 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) { 3834 TRACE_0(TR_FAC_STREAMS_FR, 3835 TR_I_CANT_FIND, "couldn't I_FIND"); 3836 return (EINVAL); 3837 } 3838 3839 *rvalp = 0; 3840 3841 /* Look downstream to see if module is there. */ 3842 claimstr(stp->sd_wrq); 3843 for (q = stp->sd_wrq->q_next; q; q = q->q_next) { 3844 if (q->q_flag&QREADR) { 3845 q = NULL; 3846 break; 3847 } 3848 if (strcmp(mname, q->q_qinfo->qi_minfo->mi_idname) == 0) 3849 break; 3850 } 3851 releasestr(stp->sd_wrq); 3852 3853 *rvalp = (q ? 1 : 0); 3854 return (error); 3855 } 3856 3857 case I_PUSH: 3858 case __I_PUSH_NOCTTY: 3859 /* 3860 * Push a module. 3861 * For the case __I_PUSH_NOCTTY push a module but 3862 * do not allocate controlling tty. See bugid 4025044 3863 */ 3864 3865 { 3866 char mname[FMNAMESZ + 1]; 3867 fmodsw_impl_t *fp; 3868 dev_t dummydev; 3869 3870 if (stp->sd_flag & STRHUP) 3871 return (ENXIO); 3872 3873 /* 3874 * Get module name and look up in fmodsw. 3875 */ 3876 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3877 mname, FMNAMESZ + 1, NULL); 3878 if (error) 3879 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3880 3881 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) == 3882 NULL) 3883 return (EINVAL); 3884 3885 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH, 3886 "I_PUSH:fp %p stp %p", fp, stp); 3887 3888 if (error = strstartplumb(stp, flag, cmd)) { 3889 fmodsw_rele(fp); 3890 return (error); 3891 } 3892 3893 /* 3894 * See if any more modules can be pushed on this stream. 3895 * Note that this check must be done after strstartplumb() 3896 * since otherwise multiple threads issuing I_PUSHes on 3897 * the same stream will be able to exceed nstrpush. 3898 */ 3899 mutex_enter(&stp->sd_lock); 3900 if (stp->sd_pushcnt >= nstrpush) { 3901 fmodsw_rele(fp); 3902 strendplumb(stp); 3903 mutex_exit(&stp->sd_lock); 3904 return (EINVAL); 3905 } 3906 mutex_exit(&stp->sd_lock); 3907 3908 /* 3909 * Push new module and call its open routine 3910 * via qattach(). Modules don't change device 3911 * numbers, so just ignore dummydev here. 3912 */ 3913 dummydev = vp->v_rdev; 3914 if ((error = qattach(rdq, &dummydev, 0, crp, fp, 3915 B_FALSE)) == 0) { 3916 if (vp->v_type == VCHR && /* sorry, no pipes allowed */ 3917 (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) { 3918 /* 3919 * try to allocate it as a controlling terminal 3920 */ 3921 (void) strctty(stp); 3922 } 3923 } 3924 3925 mutex_enter(&stp->sd_lock); 3926 3927 /* 3928 * As a performance concern we are caching the values of 3929 * q_minpsz and q_maxpsz of the module below the stream 3930 * head in the stream head. 3931 */ 3932 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 3933 rmin = stp->sd_wrq->q_next->q_minpsz; 3934 rmax = stp->sd_wrq->q_next->q_maxpsz; 3935 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 3936 3937 /* Do this processing here as a performance concern */ 3938 if (strmsgsz != 0) { 3939 if (rmax == INFPSZ) 3940 rmax = strmsgsz; 3941 else { 3942 if (vp->v_type == VFIFO) 3943 rmax = MIN(PIPE_BUF, rmax); 3944 else rmax = MIN(strmsgsz, rmax); 3945 } 3946 } 3947 3948 mutex_enter(QLOCK(wrq)); 3949 stp->sd_qn_minpsz = rmin; 3950 stp->sd_qn_maxpsz = rmax; 3951 mutex_exit(QLOCK(wrq)); 3952 3953 strendplumb(stp); 3954 mutex_exit(&stp->sd_lock); 3955 return (error); 3956 } 3957 3958 case I_POP: 3959 { 3960 queue_t *q; 3961 3962 if (stp->sd_flag & STRHUP) 3963 return (ENXIO); 3964 if (!wrq->q_next) /* for broken pipes */ 3965 return (EINVAL); 3966 3967 if (error = strstartplumb(stp, flag, cmd)) 3968 return (error); 3969 3970 /* 3971 * If there is an anchor on this stream and popping 3972 * the current module would attempt to pop through the 3973 * anchor, then disallow the pop unless we have sufficient 3974 * privileges; take the cheapest (non-locking) check 3975 * first. 3976 */ 3977 if (secpolicy_ip_config(crp, B_TRUE) != 0 || 3978 (stp->sd_anchorzone != crgetzoneid(crp))) { 3979 mutex_enter(&stp->sd_lock); 3980 /* 3981 * Anchors only apply if there's at least one 3982 * module on the stream (sd_pushcnt > 0). 3983 */ 3984 if (stp->sd_pushcnt > 0 && 3985 stp->sd_pushcnt == stp->sd_anchor && 3986 stp->sd_vnode->v_type != VFIFO) { 3987 strendplumb(stp); 3988 mutex_exit(&stp->sd_lock); 3989 if (stp->sd_anchorzone != crgetzoneid(crp)) 3990 return (EINVAL); 3991 /* Audit and report error */ 3992 return (secpolicy_ip_config(crp, B_FALSE)); 3993 } 3994 mutex_exit(&stp->sd_lock); 3995 } 3996 3997 q = wrq->q_next; 3998 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP, 3999 "I_POP:%p from %p", q, stp); 4000 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) { 4001 error = EINVAL; 4002 } else { 4003 qdetach(_RD(q), 1, flag, crp, B_FALSE); 4004 error = 0; 4005 } 4006 mutex_enter(&stp->sd_lock); 4007 4008 /* 4009 * As a performance concern we are caching the values of 4010 * q_minpsz and q_maxpsz of the module below the stream 4011 * head in the stream head. 4012 */ 4013 mutex_enter(QLOCK(wrq->q_next)); 4014 rmin = wrq->q_next->q_minpsz; 4015 rmax = wrq->q_next->q_maxpsz; 4016 mutex_exit(QLOCK(wrq->q_next)); 4017 4018 /* Do this processing here as a performance concern */ 4019 if (strmsgsz != 0) { 4020 if (rmax == INFPSZ) 4021 rmax = strmsgsz; 4022 else { 4023 if (vp->v_type == VFIFO) 4024 rmax = MIN(PIPE_BUF, rmax); 4025 else rmax = MIN(strmsgsz, rmax); 4026 } 4027 } 4028 4029 mutex_enter(QLOCK(wrq)); 4030 stp->sd_qn_minpsz = rmin; 4031 stp->sd_qn_maxpsz = rmax; 4032 mutex_exit(QLOCK(wrq)); 4033 4034 /* If we popped through the anchor, then reset the anchor. */ 4035 if (stp->sd_pushcnt < stp->sd_anchor) { 4036 stp->sd_anchor = 0; 4037 stp->sd_anchorzone = 0; 4038 } 4039 strendplumb(stp); 4040 mutex_exit(&stp->sd_lock); 4041 return (error); 4042 } 4043 4044 case _I_MUXID2FD: 4045 { 4046 /* 4047 * Create a fd for a I_PLINK'ed lower stream with a given 4048 * muxid. With the fd, application can send down ioctls, 4049 * like I_LIST, to the previously I_PLINK'ed stream. Note 4050 * that after getting the fd, the application has to do an 4051 * I_PUNLINK on the muxid before it can do any operation 4052 * on the lower stream. This is required by spec1170. 4053 * 4054 * The fd used to do this ioctl should point to the same 4055 * controlling device used to do the I_PLINK. If it uses 4056 * a different stream or an invalid muxid, I_MUXID2FD will 4057 * fail. The error code is set to EINVAL. 4058 * 4059 * The intended use of this interface is the following. 4060 * An application I_PLINK'ed a stream and exits. The fd 4061 * to the lower stream is gone. Another application 4062 * wants to get a fd to the lower stream, it uses I_MUXID2FD. 4063 */ 4064 int muxid = (int)arg; 4065 int fd; 4066 linkinfo_t *linkp; 4067 struct file *fp; 4068 netstack_t *ns; 4069 str_stack_t *ss; 4070 4071 /* 4072 * Do not allow the wildcard muxid. This ioctl is not 4073 * intended to find arbitrary link. 4074 */ 4075 if (muxid == 0) { 4076 return (EINVAL); 4077 } 4078 4079 ns = netstack_find_by_cred(crp); 4080 ASSERT(ns != NULL); 4081 ss = ns->netstack_str; 4082 ASSERT(ss != NULL); 4083 4084 mutex_enter(&muxifier); 4085 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss); 4086 if (linkp == NULL) { 4087 mutex_exit(&muxifier); 4088 netstack_rele(ss->ss_netstack); 4089 return (EINVAL); 4090 } 4091 4092 if ((fd = ufalloc(0)) == -1) { 4093 mutex_exit(&muxifier); 4094 netstack_rele(ss->ss_netstack); 4095 return (EMFILE); 4096 } 4097 fp = linkp->li_fpdown; 4098 mutex_enter(&fp->f_tlock); 4099 fp->f_count++; 4100 mutex_exit(&fp->f_tlock); 4101 mutex_exit(&muxifier); 4102 setf(fd, fp); 4103 *rvalp = fd; 4104 netstack_rele(ss->ss_netstack); 4105 return (0); 4106 } 4107 4108 case _I_INSERT: 4109 { 4110 /* 4111 * To insert a module to a given position in a stream. 4112 * In the first release, only allow privileged user 4113 * to use this ioctl. Furthermore, the insert is only allowed 4114 * below an anchor if the zoneid is the same as the zoneid 4115 * which created the anchor. 4116 * 4117 * Note that we do not plan to support this ioctl 4118 * on pipes in the first release. We want to learn more 4119 * about the implications of these ioctls before extending 4120 * their support. And we do not think these features are 4121 * valuable for pipes. 4122 * 4123 * Neither do we support O/C hot stream. Note that only 4124 * the upper streams of TCP/IP stack are O/C hot streams. 4125 * The lower IP stream is not. 4126 * When there is a O/C cold barrier, we only allow inserts 4127 * above the barrier. 4128 */ 4129 STRUCT_DECL(strmodconf, strmodinsert); 4130 char mod_name[FMNAMESZ + 1]; 4131 fmodsw_impl_t *fp; 4132 dev_t dummydev; 4133 queue_t *tmp_wrq; 4134 int pos; 4135 boolean_t is_insert; 4136 4137 STRUCT_INIT(strmodinsert, flag); 4138 if (stp->sd_flag & STRHUP) 4139 return (ENXIO); 4140 if (STRMATED(stp)) 4141 return (EINVAL); 4142 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4143 return (error); 4144 if (stp->sd_anchor != 0 && 4145 stp->sd_anchorzone != crgetzoneid(crp)) 4146 return (EINVAL); 4147 4148 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert), 4149 STRUCT_SIZE(strmodinsert), copyflag); 4150 if (error) 4151 return (error); 4152 4153 /* 4154 * Get module name and look up in fmodsw. 4155 */ 4156 error = (copyflag & U_TO_K ? copyinstr : 4157 copystr)(STRUCT_FGETP(strmodinsert, mod_name), 4158 mod_name, FMNAMESZ + 1, NULL); 4159 if (error) 4160 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4161 4162 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) == 4163 NULL) 4164 return (EINVAL); 4165 4166 if (error = strstartplumb(stp, flag, cmd)) { 4167 fmodsw_rele(fp); 4168 return (error); 4169 } 4170 4171 /* 4172 * Is this _I_INSERT just like an I_PUSH? We need to know 4173 * this because we do some optimizations if this is a 4174 * module being pushed. 4175 */ 4176 pos = STRUCT_FGET(strmodinsert, pos); 4177 is_insert = (pos != 0); 4178 4179 /* 4180 * Make sure pos is valid. Even though it is not an I_PUSH, 4181 * we impose the same limit on the number of modules in a 4182 * stream. 4183 */ 4184 mutex_enter(&stp->sd_lock); 4185 if (stp->sd_pushcnt >= nstrpush || pos < 0 || 4186 pos > stp->sd_pushcnt) { 4187 fmodsw_rele(fp); 4188 strendplumb(stp); 4189 mutex_exit(&stp->sd_lock); 4190 return (EINVAL); 4191 } 4192 if (stp->sd_anchor != 0) { 4193 /* 4194 * Is this insert below the anchor? 4195 * Pushcnt hasn't been increased yet hence 4196 * we test for greater than here, and greater or 4197 * equal after qattach. 4198 */ 4199 if (pos > (stp->sd_pushcnt - stp->sd_anchor) && 4200 stp->sd_anchorzone != crgetzoneid(crp)) { 4201 fmodsw_rele(fp); 4202 strendplumb(stp); 4203 mutex_exit(&stp->sd_lock); 4204 return (EPERM); 4205 } 4206 } 4207 4208 mutex_exit(&stp->sd_lock); 4209 4210 /* 4211 * First find the correct position this module to 4212 * be inserted. We don't need to call claimstr() 4213 * as the stream should not be changing at this point. 4214 * 4215 * Insert new module and call its open routine 4216 * via qattach(). Modules don't change device 4217 * numbers, so just ignore dummydev here. 4218 */ 4219 for (tmp_wrq = stp->sd_wrq; pos > 0; 4220 tmp_wrq = tmp_wrq->q_next, pos--) { 4221 ASSERT(SAMESTR(tmp_wrq)); 4222 } 4223 dummydev = vp->v_rdev; 4224 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp, 4225 fp, is_insert)) != 0) { 4226 mutex_enter(&stp->sd_lock); 4227 strendplumb(stp); 4228 mutex_exit(&stp->sd_lock); 4229 return (error); 4230 } 4231 4232 mutex_enter(&stp->sd_lock); 4233 4234 /* 4235 * As a performance concern we are caching the values of 4236 * q_minpsz and q_maxpsz of the module below the stream 4237 * head in the stream head. 4238 */ 4239 if (!is_insert) { 4240 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 4241 rmin = stp->sd_wrq->q_next->q_minpsz; 4242 rmax = stp->sd_wrq->q_next->q_maxpsz; 4243 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 4244 4245 /* Do this processing here as a performance concern */ 4246 if (strmsgsz != 0) { 4247 if (rmax == INFPSZ) { 4248 rmax = strmsgsz; 4249 } else { 4250 rmax = MIN(strmsgsz, rmax); 4251 } 4252 } 4253 4254 mutex_enter(QLOCK(wrq)); 4255 stp->sd_qn_minpsz = rmin; 4256 stp->sd_qn_maxpsz = rmax; 4257 mutex_exit(QLOCK(wrq)); 4258 } 4259 4260 /* 4261 * Need to update the anchor value if this module is 4262 * inserted below the anchor point. 4263 */ 4264 if (stp->sd_anchor != 0) { 4265 pos = STRUCT_FGET(strmodinsert, pos); 4266 if (pos >= (stp->sd_pushcnt - stp->sd_anchor)) 4267 stp->sd_anchor++; 4268 } 4269 4270 strendplumb(stp); 4271 mutex_exit(&stp->sd_lock); 4272 return (0); 4273 } 4274 4275 case _I_REMOVE: 4276 { 4277 /* 4278 * To remove a module with a given name in a stream. The 4279 * caller of this ioctl needs to provide both the name and 4280 * the position of the module to be removed. This eliminates 4281 * the ambiguity of removal if a module is inserted/pushed 4282 * multiple times in a stream. In the first release, only 4283 * allow privileged user to use this ioctl. 4284 * Furthermore, the remove is only allowed 4285 * below an anchor if the zoneid is the same as the zoneid 4286 * which created the anchor. 4287 * 4288 * Note that we do not plan to support this ioctl 4289 * on pipes in the first release. We want to learn more 4290 * about the implications of these ioctls before extending 4291 * their support. And we do not think these features are 4292 * valuable for pipes. 4293 * 4294 * Neither do we support O/C hot stream. Note that only 4295 * the upper streams of TCP/IP stack are O/C hot streams. 4296 * The lower IP stream is not. 4297 * When there is a O/C cold barrier we do not allow removal 4298 * below the barrier. 4299 * 4300 * Also note that _I_REMOVE cannot be used to remove a 4301 * driver or the stream head. 4302 */ 4303 STRUCT_DECL(strmodconf, strmodremove); 4304 queue_t *q; 4305 int pos; 4306 char mod_name[FMNAMESZ + 1]; 4307 boolean_t is_remove; 4308 4309 STRUCT_INIT(strmodremove, flag); 4310 if (stp->sd_flag & STRHUP) 4311 return (ENXIO); 4312 if (STRMATED(stp)) 4313 return (EINVAL); 4314 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4315 return (error); 4316 if (stp->sd_anchor != 0 && 4317 stp->sd_anchorzone != crgetzoneid(crp)) 4318 return (EINVAL); 4319 4320 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove), 4321 STRUCT_SIZE(strmodremove), copyflag); 4322 if (error) 4323 return (error); 4324 4325 error = (copyflag & U_TO_K ? copyinstr : 4326 copystr)(STRUCT_FGETP(strmodremove, mod_name), 4327 mod_name, FMNAMESZ + 1, NULL); 4328 if (error) 4329 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4330 4331 if ((error = strstartplumb(stp, flag, cmd)) != 0) 4332 return (error); 4333 4334 /* 4335 * Match the name of given module to the name of module at 4336 * the given position. 4337 */ 4338 pos = STRUCT_FGET(strmodremove, pos); 4339 4340 is_remove = (pos != 0); 4341 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0; 4342 q = q->q_next, pos--) 4343 ; 4344 if (pos > 0 || ! SAMESTR(q) || 4345 strncmp(q->q_qinfo->qi_minfo->mi_idname, mod_name, 4346 strlen(q->q_qinfo->qi_minfo->mi_idname)) != 0) { 4347 mutex_enter(&stp->sd_lock); 4348 strendplumb(stp); 4349 mutex_exit(&stp->sd_lock); 4350 return (EINVAL); 4351 } 4352 4353 /* 4354 * If the position is at or below an anchor, then the zoneid 4355 * must match the zoneid that created the anchor. 4356 */ 4357 if (stp->sd_anchor != 0) { 4358 pos = STRUCT_FGET(strmodremove, pos); 4359 if (pos >= (stp->sd_pushcnt - stp->sd_anchor) && 4360 stp->sd_anchorzone != crgetzoneid(crp)) { 4361 mutex_enter(&stp->sd_lock); 4362 strendplumb(stp); 4363 mutex_exit(&stp->sd_lock); 4364 return (EPERM); 4365 } 4366 } 4367 4368 4369 ASSERT(!(q->q_flag & QREADR)); 4370 qdetach(_RD(q), 1, flag, crp, is_remove); 4371 4372 mutex_enter(&stp->sd_lock); 4373 4374 /* 4375 * As a performance concern we are caching the values of 4376 * q_minpsz and q_maxpsz of the module below the stream 4377 * head in the stream head. 4378 */ 4379 if (!is_remove) { 4380 mutex_enter(QLOCK(wrq->q_next)); 4381 rmin = wrq->q_next->q_minpsz; 4382 rmax = wrq->q_next->q_maxpsz; 4383 mutex_exit(QLOCK(wrq->q_next)); 4384 4385 /* Do this processing here as a performance concern */ 4386 if (strmsgsz != 0) { 4387 if (rmax == INFPSZ) 4388 rmax = strmsgsz; 4389 else { 4390 if (vp->v_type == VFIFO) 4391 rmax = MIN(PIPE_BUF, rmax); 4392 else rmax = MIN(strmsgsz, rmax); 4393 } 4394 } 4395 4396 mutex_enter(QLOCK(wrq)); 4397 stp->sd_qn_minpsz = rmin; 4398 stp->sd_qn_maxpsz = rmax; 4399 mutex_exit(QLOCK(wrq)); 4400 } 4401 4402 /* 4403 * Need to update the anchor value if this module is removed 4404 * at or below the anchor point. If the removed module is at 4405 * the anchor point, remove the anchor for this stream if 4406 * there is no module above the anchor point. Otherwise, if 4407 * the removed module is below the anchor point, decrement the 4408 * anchor point by 1. 4409 */ 4410 if (stp->sd_anchor != 0) { 4411 pos = STRUCT_FGET(strmodremove, pos); 4412 if (pos == stp->sd_pushcnt - stp->sd_anchor + 1) 4413 stp->sd_anchor = 0; 4414 else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1)) 4415 stp->sd_anchor--; 4416 } 4417 4418 strendplumb(stp); 4419 mutex_exit(&stp->sd_lock); 4420 return (0); 4421 } 4422 4423 case I_ANCHOR: 4424 /* 4425 * Set the anchor position on the stream to reside at 4426 * the top module (in other words, the top module 4427 * cannot be popped). Anchors with a FIFO make no 4428 * obvious sense, so they're not allowed. 4429 */ 4430 mutex_enter(&stp->sd_lock); 4431 4432 if (stp->sd_vnode->v_type == VFIFO) { 4433 mutex_exit(&stp->sd_lock); 4434 return (EINVAL); 4435 } 4436 /* Only allow the same zoneid to update the anchor */ 4437 if (stp->sd_anchor != 0 && 4438 stp->sd_anchorzone != crgetzoneid(crp)) { 4439 mutex_exit(&stp->sd_lock); 4440 return (EINVAL); 4441 } 4442 stp->sd_anchor = stp->sd_pushcnt; 4443 stp->sd_anchorzone = crgetzoneid(crp); 4444 mutex_exit(&stp->sd_lock); 4445 return (0); 4446 4447 case I_LOOK: 4448 /* 4449 * Get name of first module downstream. 4450 * If no module, return an error. 4451 */ 4452 { 4453 claimstr(wrq); 4454 if (_SAMESTR(wrq) && wrq->q_next->q_next) { 4455 char *name = wrq->q_next->q_qinfo->qi_minfo->mi_idname; 4456 error = strcopyout(name, (void *)arg, strlen(name) + 1, 4457 copyflag); 4458 releasestr(wrq); 4459 return (error); 4460 } 4461 releasestr(wrq); 4462 return (EINVAL); 4463 } 4464 4465 case I_LINK: 4466 case I_PLINK: 4467 /* 4468 * Link a multiplexor. 4469 */ 4470 error = mlink(vp, cmd, (int)arg, crp, rvalp, 0); 4471 return (error); 4472 4473 case _I_PLINK_LH: 4474 /* 4475 * Link a multiplexor: Call must originate from kernel. 4476 */ 4477 if (kioctl) 4478 return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp)); 4479 4480 return (EINVAL); 4481 case I_UNLINK: 4482 case I_PUNLINK: 4483 /* 4484 * Unlink a multiplexor. 4485 * If arg is -1, unlink all links for which this is the 4486 * controlling stream. Otherwise, arg is an index number 4487 * for a link to be removed. 4488 */ 4489 { 4490 struct linkinfo *linkp; 4491 int native_arg = (int)arg; 4492 int type; 4493 netstack_t *ns; 4494 str_stack_t *ss; 4495 4496 TRACE_1(TR_FAC_STREAMS_FR, 4497 TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp); 4498 if (vp->v_type == VFIFO) { 4499 return (EINVAL); 4500 } 4501 if (cmd == I_UNLINK) 4502 type = LINKNORMAL; 4503 else /* I_PUNLINK */ 4504 type = LINKPERSIST; 4505 if (native_arg == 0) { 4506 return (EINVAL); 4507 } 4508 ns = netstack_find_by_cred(crp); 4509 ASSERT(ns != NULL); 4510 ss = ns->netstack_str; 4511 ASSERT(ss != NULL); 4512 4513 if (native_arg == MUXID_ALL) 4514 error = munlinkall(stp, type, crp, rvalp, ss); 4515 else { 4516 mutex_enter(&muxifier); 4517 if (!(linkp = findlinks(stp, (int)arg, type, ss))) { 4518 /* invalid user supplied index number */ 4519 mutex_exit(&muxifier); 4520 netstack_rele(ss->ss_netstack); 4521 return (EINVAL); 4522 } 4523 /* munlink drops the muxifier lock */ 4524 error = munlink(stp, linkp, type, crp, rvalp, ss); 4525 } 4526 netstack_rele(ss->ss_netstack); 4527 return (error); 4528 } 4529 4530 case I_FLUSH: 4531 /* 4532 * send a flush message downstream 4533 * flush message can indicate 4534 * FLUSHR - flush read queue 4535 * FLUSHW - flush write queue 4536 * FLUSHRW - flush read/write queue 4537 */ 4538 if (stp->sd_flag & STRHUP) 4539 return (ENXIO); 4540 if (arg & ~FLUSHRW) 4541 return (EINVAL); 4542 4543 for (;;) { 4544 if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) { 4545 break; 4546 } 4547 if (error = strwaitbuf(1, BPRI_HI)) { 4548 return (error); 4549 } 4550 } 4551 4552 /* 4553 * Send down an unsupported ioctl and wait for the nack 4554 * in order to allow the M_FLUSH to propagate back 4555 * up to the stream head. 4556 * Replaces if (qready()) runqueues(); 4557 */ 4558 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4559 strioc.ic_timout = 0; 4560 strioc.ic_len = 0; 4561 strioc.ic_dp = NULL; 4562 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4563 *rvalp = 0; 4564 return (0); 4565 4566 case I_FLUSHBAND: 4567 { 4568 struct bandinfo binfo; 4569 4570 error = strcopyin((void *)arg, &binfo, sizeof (binfo), 4571 copyflag); 4572 if (error) 4573 return (error); 4574 if (stp->sd_flag & STRHUP) 4575 return (ENXIO); 4576 if (binfo.bi_flag & ~FLUSHRW) 4577 return (EINVAL); 4578 while (!(mp = allocb(2, BPRI_HI))) { 4579 if (error = strwaitbuf(2, BPRI_HI)) 4580 return (error); 4581 } 4582 mp->b_datap->db_type = M_FLUSH; 4583 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND; 4584 *mp->b_wptr++ = binfo.bi_pri; 4585 putnext(stp->sd_wrq, mp); 4586 /* 4587 * Send down an unsupported ioctl and wait for the nack 4588 * in order to allow the M_FLUSH to propagate back 4589 * up to the stream head. 4590 * Replaces if (qready()) runqueues(); 4591 */ 4592 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4593 strioc.ic_timout = 0; 4594 strioc.ic_len = 0; 4595 strioc.ic_dp = NULL; 4596 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4597 *rvalp = 0; 4598 return (0); 4599 } 4600 4601 case I_SRDOPT: 4602 /* 4603 * Set read options 4604 * 4605 * RNORM - default stream mode 4606 * RMSGN - message no discard 4607 * RMSGD - message discard 4608 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs 4609 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs 4610 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs 4611 */ 4612 if (arg & ~(RMODEMASK | RPROTMASK)) 4613 return (EINVAL); 4614 4615 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN)) 4616 return (EINVAL); 4617 4618 mutex_enter(&stp->sd_lock); 4619 switch (arg & RMODEMASK) { 4620 case RNORM: 4621 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 4622 break; 4623 case RMSGD: 4624 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) | 4625 RD_MSGDIS; 4626 break; 4627 case RMSGN: 4628 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) | 4629 RD_MSGNODIS; 4630 break; 4631 } 4632 4633 switch (arg & RPROTMASK) { 4634 case RPROTNORM: 4635 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 4636 break; 4637 4638 case RPROTDAT: 4639 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) | 4640 RD_PROTDAT); 4641 break; 4642 4643 case RPROTDIS: 4644 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) | 4645 RD_PROTDIS); 4646 break; 4647 } 4648 mutex_exit(&stp->sd_lock); 4649 return (0); 4650 4651 case I_GRDOPT: 4652 /* 4653 * Get read option and return the value 4654 * to spot pointed to by arg 4655 */ 4656 { 4657 int rdopt; 4658 4659 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD : 4660 ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM)); 4661 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT : 4662 ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM)); 4663 4664 return (strcopyout(&rdopt, (void *)arg, sizeof (int), 4665 copyflag)); 4666 } 4667 4668 case I_SERROPT: 4669 /* 4670 * Set error options 4671 * 4672 * RERRNORM - persistent read errors 4673 * RERRNONPERSIST - non-persistent read errors 4674 * WERRNORM - persistent write errors 4675 * WERRNONPERSIST - non-persistent write errors 4676 */ 4677 if (arg & ~(RERRMASK | WERRMASK)) 4678 return (EINVAL); 4679 4680 mutex_enter(&stp->sd_lock); 4681 switch (arg & RERRMASK) { 4682 case RERRNORM: 4683 stp->sd_flag &= ~STRDERRNONPERSIST; 4684 break; 4685 case RERRNONPERSIST: 4686 stp->sd_flag |= STRDERRNONPERSIST; 4687 break; 4688 } 4689 switch (arg & WERRMASK) { 4690 case WERRNORM: 4691 stp->sd_flag &= ~STWRERRNONPERSIST; 4692 break; 4693 case WERRNONPERSIST: 4694 stp->sd_flag |= STWRERRNONPERSIST; 4695 break; 4696 } 4697 mutex_exit(&stp->sd_lock); 4698 return (0); 4699 4700 case I_GERROPT: 4701 /* 4702 * Get error option and return the value 4703 * to spot pointed to by arg 4704 */ 4705 { 4706 int erropt = 0; 4707 4708 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST : 4709 RERRNORM; 4710 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST : 4711 WERRNORM; 4712 return (strcopyout(&erropt, (void *)arg, sizeof (int), 4713 copyflag)); 4714 } 4715 4716 case I_SETSIG: 4717 /* 4718 * Register the calling proc to receive the SIGPOLL 4719 * signal based on the events given in arg. If 4720 * arg is zero, remove the proc from register list. 4721 */ 4722 { 4723 strsig_t *ssp, *pssp; 4724 struct pid *pidp; 4725 4726 pssp = NULL; 4727 pidp = curproc->p_pidp; 4728 /* 4729 * Hold sd_lock to prevent traversal of sd_siglist while 4730 * it is modified. 4731 */ 4732 mutex_enter(&stp->sd_lock); 4733 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp); 4734 pssp = ssp, ssp = ssp->ss_next) 4735 ; 4736 4737 if (arg) { 4738 if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4739 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4740 mutex_exit(&stp->sd_lock); 4741 return (EINVAL); 4742 } 4743 if ((arg & S_BANDURG) && !(arg & S_RDBAND)) { 4744 mutex_exit(&stp->sd_lock); 4745 return (EINVAL); 4746 } 4747 4748 /* 4749 * If proc not already registered, add it 4750 * to list. 4751 */ 4752 if (!ssp) { 4753 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4754 ssp->ss_pidp = pidp; 4755 ssp->ss_pid = pidp->pid_id; 4756 ssp->ss_next = NULL; 4757 if (pssp) 4758 pssp->ss_next = ssp; 4759 else 4760 stp->sd_siglist = ssp; 4761 mutex_enter(&pidlock); 4762 PID_HOLD(pidp); 4763 mutex_exit(&pidlock); 4764 } 4765 4766 /* 4767 * Set events. 4768 */ 4769 ssp->ss_events = (int)arg; 4770 } else { 4771 /* 4772 * Remove proc from register list. 4773 */ 4774 if (ssp) { 4775 mutex_enter(&pidlock); 4776 PID_RELE(pidp); 4777 mutex_exit(&pidlock); 4778 if (pssp) 4779 pssp->ss_next = ssp->ss_next; 4780 else 4781 stp->sd_siglist = ssp->ss_next; 4782 kmem_free(ssp, sizeof (strsig_t)); 4783 } else { 4784 mutex_exit(&stp->sd_lock); 4785 return (EINVAL); 4786 } 4787 } 4788 4789 /* 4790 * Recalculate OR of sig events. 4791 */ 4792 stp->sd_sigflags = 0; 4793 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4794 stp->sd_sigflags |= ssp->ss_events; 4795 mutex_exit(&stp->sd_lock); 4796 return (0); 4797 } 4798 4799 case I_GETSIG: 4800 /* 4801 * Return (in arg) the current registration of events 4802 * for which the calling proc is to be signaled. 4803 */ 4804 { 4805 struct strsig *ssp; 4806 struct pid *pidp; 4807 4808 pidp = curproc->p_pidp; 4809 mutex_enter(&stp->sd_lock); 4810 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4811 if (ssp->ss_pidp == pidp) { 4812 error = strcopyout(&ssp->ss_events, (void *)arg, 4813 sizeof (int), copyflag); 4814 mutex_exit(&stp->sd_lock); 4815 return (error); 4816 } 4817 mutex_exit(&stp->sd_lock); 4818 return (EINVAL); 4819 } 4820 4821 case I_ESETSIG: 4822 /* 4823 * Register the ss_pid to receive the SIGPOLL 4824 * signal based on the events is ss_events arg. If 4825 * ss_events is zero, remove the proc from register list. 4826 */ 4827 { 4828 struct strsig *ssp, *pssp; 4829 struct proc *proc; 4830 struct pid *pidp; 4831 pid_t pid; 4832 struct strsigset ss; 4833 4834 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4835 if (error) 4836 return (error); 4837 4838 pid = ss.ss_pid; 4839 4840 if (ss.ss_events != 0) { 4841 /* 4842 * Permissions check by sending signal 0. 4843 * Note that when kill fails it does a set_errno 4844 * causing the system call to fail. 4845 */ 4846 error = kill(pid, 0); 4847 if (error) { 4848 return (error); 4849 } 4850 } 4851 mutex_enter(&pidlock); 4852 if (pid == 0) 4853 proc = curproc; 4854 else if (pid < 0) 4855 proc = pgfind(-pid); 4856 else 4857 proc = prfind(pid); 4858 if (proc == NULL) { 4859 mutex_exit(&pidlock); 4860 return (ESRCH); 4861 } 4862 if (pid < 0) 4863 pidp = proc->p_pgidp; 4864 else 4865 pidp = proc->p_pidp; 4866 ASSERT(pidp); 4867 /* 4868 * Get a hold on the pid structure while referencing it. 4869 * There is a separate PID_HOLD should it be inserted 4870 * in the list below. 4871 */ 4872 PID_HOLD(pidp); 4873 mutex_exit(&pidlock); 4874 4875 pssp = NULL; 4876 /* 4877 * Hold sd_lock to prevent traversal of sd_siglist while 4878 * it is modified. 4879 */ 4880 mutex_enter(&stp->sd_lock); 4881 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid); 4882 pssp = ssp, ssp = ssp->ss_next) 4883 ; 4884 4885 if (ss.ss_events) { 4886 if (ss.ss_events & 4887 ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4888 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4889 mutex_exit(&stp->sd_lock); 4890 mutex_enter(&pidlock); 4891 PID_RELE(pidp); 4892 mutex_exit(&pidlock); 4893 return (EINVAL); 4894 } 4895 if ((ss.ss_events & S_BANDURG) && 4896 !(ss.ss_events & S_RDBAND)) { 4897 mutex_exit(&stp->sd_lock); 4898 mutex_enter(&pidlock); 4899 PID_RELE(pidp); 4900 mutex_exit(&pidlock); 4901 return (EINVAL); 4902 } 4903 4904 /* 4905 * If proc not already registered, add it 4906 * to list. 4907 */ 4908 if (!ssp) { 4909 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4910 ssp->ss_pidp = pidp; 4911 ssp->ss_pid = pid; 4912 ssp->ss_next = NULL; 4913 if (pssp) 4914 pssp->ss_next = ssp; 4915 else 4916 stp->sd_siglist = ssp; 4917 mutex_enter(&pidlock); 4918 PID_HOLD(pidp); 4919 mutex_exit(&pidlock); 4920 } 4921 4922 /* 4923 * Set events. 4924 */ 4925 ssp->ss_events = ss.ss_events; 4926 } else { 4927 /* 4928 * Remove proc from register list. 4929 */ 4930 if (ssp) { 4931 mutex_enter(&pidlock); 4932 PID_RELE(pidp); 4933 mutex_exit(&pidlock); 4934 if (pssp) 4935 pssp->ss_next = ssp->ss_next; 4936 else 4937 stp->sd_siglist = ssp->ss_next; 4938 kmem_free(ssp, sizeof (strsig_t)); 4939 } else { 4940 mutex_exit(&stp->sd_lock); 4941 mutex_enter(&pidlock); 4942 PID_RELE(pidp); 4943 mutex_exit(&pidlock); 4944 return (EINVAL); 4945 } 4946 } 4947 4948 /* 4949 * Recalculate OR of sig events. 4950 */ 4951 stp->sd_sigflags = 0; 4952 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4953 stp->sd_sigflags |= ssp->ss_events; 4954 mutex_exit(&stp->sd_lock); 4955 mutex_enter(&pidlock); 4956 PID_RELE(pidp); 4957 mutex_exit(&pidlock); 4958 return (0); 4959 } 4960 4961 case I_EGETSIG: 4962 /* 4963 * Return (in arg) the current registration of events 4964 * for which the calling proc is to be signaled. 4965 */ 4966 { 4967 struct strsig *ssp; 4968 struct proc *proc; 4969 pid_t pid; 4970 struct pid *pidp; 4971 struct strsigset ss; 4972 4973 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4974 if (error) 4975 return (error); 4976 4977 pid = ss.ss_pid; 4978 mutex_enter(&pidlock); 4979 if (pid == 0) 4980 proc = curproc; 4981 else if (pid < 0) 4982 proc = pgfind(-pid); 4983 else 4984 proc = prfind(pid); 4985 if (proc == NULL) { 4986 mutex_exit(&pidlock); 4987 return (ESRCH); 4988 } 4989 if (pid < 0) 4990 pidp = proc->p_pgidp; 4991 else 4992 pidp = proc->p_pidp; 4993 4994 /* Prevent the pidp from being reassigned */ 4995 PID_HOLD(pidp); 4996 mutex_exit(&pidlock); 4997 4998 mutex_enter(&stp->sd_lock); 4999 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 5000 if (ssp->ss_pid == pid) { 5001 ss.ss_pid = ssp->ss_pid; 5002 ss.ss_events = ssp->ss_events; 5003 error = strcopyout(&ss, (void *)arg, 5004 sizeof (struct strsigset), copyflag); 5005 mutex_exit(&stp->sd_lock); 5006 mutex_enter(&pidlock); 5007 PID_RELE(pidp); 5008 mutex_exit(&pidlock); 5009 return (error); 5010 } 5011 mutex_exit(&stp->sd_lock); 5012 mutex_enter(&pidlock); 5013 PID_RELE(pidp); 5014 mutex_exit(&pidlock); 5015 return (EINVAL); 5016 } 5017 5018 case I_PEEK: 5019 { 5020 STRUCT_DECL(strpeek, strpeek); 5021 size_t n; 5022 mblk_t *fmp, *tmp_mp = NULL; 5023 5024 STRUCT_INIT(strpeek, flag); 5025 5026 error = strcopyin((void *)arg, STRUCT_BUF(strpeek), 5027 STRUCT_SIZE(strpeek), copyflag); 5028 if (error) 5029 return (error); 5030 5031 mutex_enter(QLOCK(rdq)); 5032 /* 5033 * Skip the invalid messages 5034 */ 5035 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 5036 if (mp->b_datap->db_type != M_SIG) 5037 break; 5038 5039 /* 5040 * If user has requested to peek at a high priority message 5041 * and first message is not, return 0 5042 */ 5043 if (mp != NULL) { 5044 if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) && 5045 queclass(mp) == QNORM) { 5046 *rvalp = 0; 5047 mutex_exit(QLOCK(rdq)); 5048 return (0); 5049 } 5050 } else if (stp->sd_struiordq == NULL || 5051 (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) { 5052 /* 5053 * No mblks to look at at the streamhead and 5054 * 1). This isn't a synch stream or 5055 * 2). This is a synch stream but caller wants high 5056 * priority messages which is not supported by 5057 * the synch stream. (it only supports QNORM) 5058 */ 5059 *rvalp = 0; 5060 mutex_exit(QLOCK(rdq)); 5061 return (0); 5062 } 5063 5064 fmp = mp; 5065 5066 if (mp && mp->b_datap->db_type == M_PASSFP) { 5067 mutex_exit(QLOCK(rdq)); 5068 return (EBADMSG); 5069 } 5070 5071 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO || 5072 mp->b_datap->db_type == M_PROTO || 5073 mp->b_datap->db_type == M_DATA); 5074 5075 if (mp && mp->b_datap->db_type == M_PCPROTO) { 5076 STRUCT_FSET(strpeek, flags, RS_HIPRI); 5077 } else { 5078 STRUCT_FSET(strpeek, flags, 0); 5079 } 5080 5081 5082 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) { 5083 mutex_exit(QLOCK(rdq)); 5084 return (ENOSR); 5085 } 5086 mutex_exit(QLOCK(rdq)); 5087 5088 /* 5089 * set mp = tmp_mp, so that I_PEEK processing can continue. 5090 * tmp_mp is used to free the dup'd message. 5091 */ 5092 mp = tmp_mp; 5093 5094 uio.uio_fmode = 0; 5095 uio.uio_extflg = UIO_COPY_CACHED; 5096 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 5097 UIO_SYSSPACE; 5098 uio.uio_limit = 0; 5099 /* 5100 * First process PROTO blocks, if any. 5101 * If user doesn't want to get ctl info by setting maxlen <= 0, 5102 * then set len to -1/0 and skip control blocks part. 5103 */ 5104 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0) 5105 STRUCT_FSET(strpeek, ctlbuf.len, -1); 5106 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0) 5107 STRUCT_FSET(strpeek, ctlbuf.len, 0); 5108 else { 5109 int ctl_part = 0; 5110 5111 iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf); 5112 iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen); 5113 uio.uio_iov = &iov; 5114 uio.uio_resid = iov.iov_len; 5115 uio.uio_loffset = 0; 5116 uio.uio_iovcnt = 1; 5117 while (mp && mp->b_datap->db_type != M_DATA && 5118 uio.uio_resid >= 0) { 5119 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ? 5120 mp->b_datap->db_type == M_PROTO : 5121 mp->b_datap->db_type == M_PCPROTO); 5122 5123 if ((n = MIN(uio.uio_resid, 5124 mp->b_wptr - mp->b_rptr)) != 0 && 5125 (error = uiomove((char *)mp->b_rptr, n, 5126 UIO_READ, &uio)) != 0) { 5127 freemsg(tmp_mp); 5128 return (error); 5129 } 5130 ctl_part = 1; 5131 mp = mp->b_cont; 5132 } 5133 /* No ctl message */ 5134 if (ctl_part == 0) 5135 STRUCT_FSET(strpeek, ctlbuf.len, -1); 5136 else 5137 STRUCT_FSET(strpeek, ctlbuf.len, 5138 STRUCT_FGET(strpeek, ctlbuf.maxlen) - 5139 uio.uio_resid); 5140 } 5141 5142 /* 5143 * Now process DATA blocks, if any. 5144 * If user doesn't want to get data info by setting maxlen <= 0, 5145 * then set len to -1/0 and skip data blocks part. 5146 */ 5147 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0) 5148 STRUCT_FSET(strpeek, databuf.len, -1); 5149 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0) 5150 STRUCT_FSET(strpeek, databuf.len, 0); 5151 else { 5152 int data_part = 0; 5153 5154 iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf); 5155 iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen); 5156 uio.uio_iov = &iov; 5157 uio.uio_resid = iov.iov_len; 5158 uio.uio_loffset = 0; 5159 uio.uio_iovcnt = 1; 5160 while (mp && uio.uio_resid) { 5161 if (mp->b_datap->db_type == M_DATA) { 5162 if ((n = MIN(uio.uio_resid, 5163 mp->b_wptr - mp->b_rptr)) != 0 && 5164 (error = uiomove((char *)mp->b_rptr, 5165 n, UIO_READ, &uio)) != 0) { 5166 freemsg(tmp_mp); 5167 return (error); 5168 } 5169 data_part = 1; 5170 } 5171 ASSERT(data_part == 0 || 5172 mp->b_datap->db_type == M_DATA); 5173 mp = mp->b_cont; 5174 } 5175 /* No data message */ 5176 if (data_part == 0) 5177 STRUCT_FSET(strpeek, databuf.len, -1); 5178 else 5179 STRUCT_FSET(strpeek, databuf.len, 5180 STRUCT_FGET(strpeek, databuf.maxlen) - 5181 uio.uio_resid); 5182 } 5183 freemsg(tmp_mp); 5184 5185 /* 5186 * It is a synch stream and user wants to get 5187 * data (maxlen > 0). 5188 * uio setup is done by the codes that process DATA 5189 * blocks above. 5190 */ 5191 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) { 5192 infod_t infod; 5193 5194 infod.d_cmd = INFOD_COPYOUT; 5195 infod.d_res = 0; 5196 infod.d_uiop = &uio; 5197 error = infonext(rdq, &infod); 5198 if (error == EINVAL || error == EBUSY) 5199 error = 0; 5200 if (error) 5201 return (error); 5202 STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek, 5203 databuf.maxlen) - uio.uio_resid); 5204 if (STRUCT_FGET(strpeek, databuf.len) == 0) { 5205 /* 5206 * No data found by the infonext(). 5207 */ 5208 STRUCT_FSET(strpeek, databuf.len, -1); 5209 } 5210 } 5211 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg, 5212 STRUCT_SIZE(strpeek), copyflag); 5213 if (error) { 5214 return (error); 5215 } 5216 /* 5217 * If there is no message retrieved, set return code to 0 5218 * otherwise, set it to 1. 5219 */ 5220 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 && 5221 STRUCT_FGET(strpeek, databuf.len) == -1) 5222 *rvalp = 0; 5223 else 5224 *rvalp = 1; 5225 return (0); 5226 } 5227 5228 case I_FDINSERT: 5229 { 5230 STRUCT_DECL(strfdinsert, strfdinsert); 5231 struct file *resftp; 5232 struct stdata *resstp; 5233 t_uscalar_t ival; 5234 ssize_t msgsize; 5235 struct strbuf mctl; 5236 5237 STRUCT_INIT(strfdinsert, flag); 5238 if (stp->sd_flag & STRHUP) 5239 return (ENXIO); 5240 /* 5241 * STRDERR, STWRERR and STPLEX tested above. 5242 */ 5243 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert), 5244 STRUCT_SIZE(strfdinsert), copyflag); 5245 if (error) 5246 return (error); 5247 5248 if (STRUCT_FGET(strfdinsert, offset) < 0 || 5249 (STRUCT_FGET(strfdinsert, offset) % 5250 sizeof (t_uscalar_t)) != 0) 5251 return (EINVAL); 5252 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) { 5253 if ((resstp = resftp->f_vnode->v_stream) == NULL) { 5254 releasef(STRUCT_FGET(strfdinsert, fildes)); 5255 return (EINVAL); 5256 } 5257 } else 5258 return (EINVAL); 5259 5260 mutex_enter(&resstp->sd_lock); 5261 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) { 5262 error = strgeterr(resstp, 5263 STRDERR|STWRERR|STRHUP|STPLEX, 0); 5264 if (error != 0) { 5265 mutex_exit(&resstp->sd_lock); 5266 releasef(STRUCT_FGET(strfdinsert, fildes)); 5267 return (error); 5268 } 5269 } 5270 mutex_exit(&resstp->sd_lock); 5271 5272 #ifdef _ILP32 5273 { 5274 queue_t *q; 5275 queue_t *mate = NULL; 5276 5277 /* get read queue of stream terminus */ 5278 claimstr(resstp->sd_wrq); 5279 for (q = resstp->sd_wrq->q_next; q->q_next != NULL; 5280 q = q->q_next) 5281 if (!STRMATED(resstp) && STREAM(q) != resstp && 5282 mate == NULL) { 5283 ASSERT(q->q_qinfo->qi_srvp); 5284 ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp); 5285 claimstr(q); 5286 mate = q; 5287 } 5288 q = _RD(q); 5289 if (mate) 5290 releasestr(mate); 5291 releasestr(resstp->sd_wrq); 5292 ival = (t_uscalar_t)q; 5293 } 5294 #else 5295 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev); 5296 #endif /* _ILP32 */ 5297 5298 if (STRUCT_FGET(strfdinsert, ctlbuf.len) < 5299 STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) { 5300 releasef(STRUCT_FGET(strfdinsert, fildes)); 5301 return (EINVAL); 5302 } 5303 5304 /* 5305 * Check for legal flag value. 5306 */ 5307 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) { 5308 releasef(STRUCT_FGET(strfdinsert, fildes)); 5309 return (EINVAL); 5310 } 5311 5312 /* get these values from those cached in the stream head */ 5313 mutex_enter(QLOCK(stp->sd_wrq)); 5314 rmin = stp->sd_qn_minpsz; 5315 rmax = stp->sd_qn_maxpsz; 5316 mutex_exit(QLOCK(stp->sd_wrq)); 5317 5318 /* 5319 * Make sure ctl and data sizes together fall within 5320 * the limits of the max and min receive packet sizes 5321 * and do not exceed system limit. A negative data 5322 * length means that no data part is to be sent. 5323 */ 5324 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 5325 if (rmax == 0) { 5326 releasef(STRUCT_FGET(strfdinsert, fildes)); 5327 return (ERANGE); 5328 } 5329 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0) 5330 msgsize = 0; 5331 if ((msgsize < rmin) || 5332 ((msgsize > rmax) && (rmax != INFPSZ)) || 5333 (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) { 5334 releasef(STRUCT_FGET(strfdinsert, fildes)); 5335 return (ERANGE); 5336 } 5337 5338 mutex_enter(&stp->sd_lock); 5339 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) && 5340 !canputnext(stp->sd_wrq)) { 5341 if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, 5342 flag, -1, &done)) != 0 || done) { 5343 mutex_exit(&stp->sd_lock); 5344 releasef(STRUCT_FGET(strfdinsert, fildes)); 5345 return (error); 5346 } 5347 if ((error = i_straccess(stp, access)) != 0) { 5348 mutex_exit(&stp->sd_lock); 5349 releasef( 5350 STRUCT_FGET(strfdinsert, fildes)); 5351 return (error); 5352 } 5353 } 5354 mutex_exit(&stp->sd_lock); 5355 5356 /* 5357 * Copy strfdinsert.ctlbuf into native form of 5358 * ctlbuf to pass down into strmakemsg(). 5359 */ 5360 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen); 5361 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len); 5362 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf); 5363 5364 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf); 5365 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len); 5366 uio.uio_iov = &iov; 5367 uio.uio_iovcnt = 1; 5368 uio.uio_loffset = 0; 5369 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 5370 UIO_SYSSPACE; 5371 uio.uio_fmode = 0; 5372 uio.uio_extflg = UIO_COPY_CACHED; 5373 uio.uio_resid = iov.iov_len; 5374 if ((error = strmakemsg(&mctl, 5375 &msgsize, &uio, stp, 5376 STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) { 5377 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5378 releasef(STRUCT_FGET(strfdinsert, fildes)); 5379 return (error); 5380 } 5381 5382 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5383 5384 /* 5385 * Place the possibly reencoded queue pointer 'offset' bytes 5386 * from the start of the control portion of the message. 5387 */ 5388 *((t_uscalar_t *)(mp->b_rptr + 5389 STRUCT_FGET(strfdinsert, offset))) = ival; 5390 5391 /* 5392 * Put message downstream. 5393 */ 5394 stream_willservice(stp); 5395 putnext(stp->sd_wrq, mp); 5396 stream_runservice(stp); 5397 releasef(STRUCT_FGET(strfdinsert, fildes)); 5398 return (error); 5399 } 5400 5401 case I_SENDFD: 5402 { 5403 struct file *fp; 5404 5405 if ((fp = getf((int)arg)) == NULL) 5406 return (EBADF); 5407 error = do_sendfp(stp, fp, crp); 5408 if (audit_active) { 5409 audit_fdsend((int)arg, fp, error); 5410 } 5411 releasef((int)arg); 5412 return (error); 5413 } 5414 5415 case I_RECVFD: 5416 case I_E_RECVFD: 5417 { 5418 struct k_strrecvfd *srf; 5419 int i, fd; 5420 5421 mutex_enter(&stp->sd_lock); 5422 while (!(mp = getq(rdq))) { 5423 if (stp->sd_flag & (STRHUP|STREOF)) { 5424 mutex_exit(&stp->sd_lock); 5425 return (ENXIO); 5426 } 5427 if ((error = strwaitq(stp, GETWAIT, (ssize_t)0, 5428 flag, -1, &done)) != 0 || done) { 5429 mutex_exit(&stp->sd_lock); 5430 return (error); 5431 } 5432 if ((error = i_straccess(stp, access)) != 0) { 5433 mutex_exit(&stp->sd_lock); 5434 return (error); 5435 } 5436 } 5437 if (mp->b_datap->db_type != M_PASSFP) { 5438 putback(stp, rdq, mp, mp->b_band); 5439 mutex_exit(&stp->sd_lock); 5440 return (EBADMSG); 5441 } 5442 mutex_exit(&stp->sd_lock); 5443 5444 srf = (struct k_strrecvfd *)mp->b_rptr; 5445 if ((fd = ufalloc(0)) == -1) { 5446 mutex_enter(&stp->sd_lock); 5447 putback(stp, rdq, mp, mp->b_band); 5448 mutex_exit(&stp->sd_lock); 5449 return (EMFILE); 5450 } 5451 if (cmd == I_RECVFD) { 5452 struct o_strrecvfd ostrfd; 5453 5454 /* check to see if uid/gid values are too large. */ 5455 5456 if (srf->uid > (o_uid_t)USHRT_MAX || 5457 srf->gid > (o_gid_t)USHRT_MAX) { 5458 mutex_enter(&stp->sd_lock); 5459 putback(stp, rdq, mp, mp->b_band); 5460 mutex_exit(&stp->sd_lock); 5461 setf(fd, NULL); /* release fd entry */ 5462 return (EOVERFLOW); 5463 } 5464 5465 ostrfd.fd = fd; 5466 ostrfd.uid = (o_uid_t)srf->uid; 5467 ostrfd.gid = (o_gid_t)srf->gid; 5468 5469 /* Null the filler bits */ 5470 for (i = 0; i < 8; i++) 5471 ostrfd.fill[i] = 0; 5472 5473 error = strcopyout(&ostrfd, (void *)arg, 5474 sizeof (struct o_strrecvfd), copyflag); 5475 } else { /* I_E_RECVFD */ 5476 struct strrecvfd strfd; 5477 5478 strfd.fd = fd; 5479 strfd.uid = srf->uid; 5480 strfd.gid = srf->gid; 5481 5482 /* null the filler bits */ 5483 for (i = 0; i < 8; i++) 5484 strfd.fill[i] = 0; 5485 5486 error = strcopyout(&strfd, (void *)arg, 5487 sizeof (struct strrecvfd), copyflag); 5488 } 5489 5490 if (error) { 5491 setf(fd, NULL); /* release fd entry */ 5492 mutex_enter(&stp->sd_lock); 5493 putback(stp, rdq, mp, mp->b_band); 5494 mutex_exit(&stp->sd_lock); 5495 return (error); 5496 } 5497 if (audit_active) { 5498 audit_fdrecv(fd, srf->fp); 5499 } 5500 5501 /* 5502 * Always increment f_count since the freemsg() below will 5503 * always call free_passfp() which performs a closef(). 5504 */ 5505 mutex_enter(&srf->fp->f_tlock); 5506 srf->fp->f_count++; 5507 mutex_exit(&srf->fp->f_tlock); 5508 setf(fd, srf->fp); 5509 freemsg(mp); 5510 return (0); 5511 } 5512 5513 case I_SWROPT: 5514 /* 5515 * Set/clear the write options. arg is a bit 5516 * mask with any of the following bits set... 5517 * SNDZERO - send zero length message 5518 * SNDPIPE - send sigpipe to process if 5519 * sd_werror is set and process is 5520 * doing a write or putmsg. 5521 * The new stream head write options should reflect 5522 * what is in arg. 5523 */ 5524 if (arg & ~(SNDZERO|SNDPIPE)) 5525 return (EINVAL); 5526 5527 mutex_enter(&stp->sd_lock); 5528 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO); 5529 if (arg & SNDZERO) 5530 stp->sd_wput_opt |= SW_SNDZERO; 5531 if (arg & SNDPIPE) 5532 stp->sd_wput_opt |= SW_SIGPIPE; 5533 mutex_exit(&stp->sd_lock); 5534 return (0); 5535 5536 case I_GWROPT: 5537 { 5538 int wropt = 0; 5539 5540 if (stp->sd_wput_opt & SW_SNDZERO) 5541 wropt |= SNDZERO; 5542 if (stp->sd_wput_opt & SW_SIGPIPE) 5543 wropt |= SNDPIPE; 5544 return (strcopyout(&wropt, (void *)arg, sizeof (wropt), 5545 copyflag)); 5546 } 5547 5548 case I_LIST: 5549 /* 5550 * Returns all the modules found on this stream, 5551 * upto the driver. If argument is NULL, return the 5552 * number of modules (including driver). If argument 5553 * is not NULL, copy the names into the structure 5554 * provided. 5555 */ 5556 5557 { 5558 queue_t *q; 5559 int num_modules, space_allocated; 5560 STRUCT_DECL(str_list, strlist); 5561 struct str_mlist *mlist_ptr; 5562 5563 if (arg == NULL) { /* Return number of modules plus driver */ 5564 q = stp->sd_wrq; 5565 if (stp->sd_vnode->v_type == VFIFO) { 5566 *rvalp = stp->sd_pushcnt; 5567 } else { 5568 *rvalp = stp->sd_pushcnt + 1; 5569 } 5570 } else { 5571 STRUCT_INIT(strlist, flag); 5572 5573 error = strcopyin((void *)arg, STRUCT_BUF(strlist), 5574 STRUCT_SIZE(strlist), copyflag); 5575 if (error) 5576 return (error); 5577 5578 space_allocated = STRUCT_FGET(strlist, sl_nmods); 5579 if ((space_allocated) <= 0) 5580 return (EINVAL); 5581 claimstr(stp->sd_wrq); 5582 q = stp->sd_wrq; 5583 num_modules = 0; 5584 while (_SAMESTR(q) && (space_allocated != 0)) { 5585 char *name = 5586 q->q_next->q_qinfo->qi_minfo->mi_idname; 5587 5588 mlist_ptr = STRUCT_FGETP(strlist, sl_modlist); 5589 5590 error = strcopyout(name, mlist_ptr, 5591 strlen(name) + 1, copyflag); 5592 5593 if (error) { 5594 releasestr(stp->sd_wrq); 5595 return (error); 5596 } 5597 q = q->q_next; 5598 space_allocated--; 5599 num_modules++; 5600 mlist_ptr = 5601 (struct str_mlist *)((uintptr_t)mlist_ptr + 5602 sizeof (struct str_mlist)); 5603 STRUCT_FSETP(strlist, sl_modlist, mlist_ptr); 5604 } 5605 releasestr(stp->sd_wrq); 5606 error = strcopyout(&num_modules, (void *)arg, 5607 sizeof (int), copyflag); 5608 } 5609 return (error); 5610 } 5611 5612 case I_CKBAND: 5613 { 5614 queue_t *q; 5615 qband_t *qbp; 5616 5617 if ((arg < 0) || (arg >= NBAND)) 5618 return (EINVAL); 5619 q = _RD(stp->sd_wrq); 5620 mutex_enter(QLOCK(q)); 5621 if (arg > (int)q->q_nband) { 5622 *rvalp = 0; 5623 } else { 5624 if (arg == 0) { 5625 if (q->q_first) 5626 *rvalp = 1; 5627 else 5628 *rvalp = 0; 5629 } else { 5630 qbp = q->q_bandp; 5631 while (--arg > 0) 5632 qbp = qbp->qb_next; 5633 if (qbp->qb_first) 5634 *rvalp = 1; 5635 else 5636 *rvalp = 0; 5637 } 5638 } 5639 mutex_exit(QLOCK(q)); 5640 return (0); 5641 } 5642 5643 case I_GETBAND: 5644 { 5645 int intpri; 5646 queue_t *q; 5647 5648 q = _RD(stp->sd_wrq); 5649 mutex_enter(QLOCK(q)); 5650 mp = q->q_first; 5651 if (!mp) { 5652 mutex_exit(QLOCK(q)); 5653 return (ENODATA); 5654 } 5655 intpri = (int)mp->b_band; 5656 error = strcopyout(&intpri, (void *)arg, sizeof (int), 5657 copyflag); 5658 mutex_exit(QLOCK(q)); 5659 return (error); 5660 } 5661 5662 case I_ATMARK: 5663 { 5664 queue_t *q; 5665 5666 if (arg & ~(ANYMARK|LASTMARK)) 5667 return (EINVAL); 5668 q = _RD(stp->sd_wrq); 5669 mutex_enter(&stp->sd_lock); 5670 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) { 5671 *rvalp = 1; 5672 } else { 5673 mutex_enter(QLOCK(q)); 5674 mp = q->q_first; 5675 5676 if (mp == NULL) 5677 *rvalp = 0; 5678 else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK)) 5679 *rvalp = 1; 5680 else if ((arg == LASTMARK) && (mp == stp->sd_mark)) 5681 *rvalp = 1; 5682 else 5683 *rvalp = 0; 5684 mutex_exit(QLOCK(q)); 5685 } 5686 mutex_exit(&stp->sd_lock); 5687 return (0); 5688 } 5689 5690 case I_CANPUT: 5691 { 5692 char band; 5693 5694 if ((arg < 0) || (arg >= NBAND)) 5695 return (EINVAL); 5696 band = (char)arg; 5697 *rvalp = bcanputnext(stp->sd_wrq, band); 5698 return (0); 5699 } 5700 5701 case I_SETCLTIME: 5702 { 5703 int closetime; 5704 5705 error = strcopyin((void *)arg, &closetime, sizeof (int), 5706 copyflag); 5707 if (error) 5708 return (error); 5709 if (closetime < 0) 5710 return (EINVAL); 5711 5712 stp->sd_closetime = closetime; 5713 return (0); 5714 } 5715 5716 case I_GETCLTIME: 5717 { 5718 int closetime; 5719 5720 closetime = stp->sd_closetime; 5721 return (strcopyout(&closetime, (void *)arg, sizeof (int), 5722 copyflag)); 5723 } 5724 5725 case TIOCGSID: 5726 { 5727 pid_t sid; 5728 5729 mutex_enter(&stp->sd_lock); 5730 if (stp->sd_sidp == NULL) { 5731 mutex_exit(&stp->sd_lock); 5732 return (ENOTTY); 5733 } 5734 sid = stp->sd_sidp->pid_id; 5735 mutex_exit(&stp->sd_lock); 5736 return (strcopyout(&sid, (void *)arg, sizeof (pid_t), 5737 copyflag)); 5738 } 5739 5740 case TIOCSPGRP: 5741 { 5742 pid_t pgrp; 5743 proc_t *q; 5744 pid_t sid, fg_pgid, bg_pgid; 5745 5746 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t), 5747 copyflag)) 5748 return (error); 5749 mutex_enter(&stp->sd_lock); 5750 mutex_enter(&pidlock); 5751 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) { 5752 mutex_exit(&pidlock); 5753 mutex_exit(&stp->sd_lock); 5754 return (ENOTTY); 5755 } 5756 if (pgrp == stp->sd_pgidp->pid_id) { 5757 mutex_exit(&pidlock); 5758 mutex_exit(&stp->sd_lock); 5759 return (0); 5760 } 5761 if (pgrp <= 0 || pgrp >= maxpid) { 5762 mutex_exit(&pidlock); 5763 mutex_exit(&stp->sd_lock); 5764 return (EINVAL); 5765 } 5766 if ((q = pgfind(pgrp)) == NULL || 5767 q->p_sessp != ttoproc(curthread)->p_sessp) { 5768 mutex_exit(&pidlock); 5769 mutex_exit(&stp->sd_lock); 5770 return (EPERM); 5771 } 5772 sid = stp->sd_sidp->pid_id; 5773 fg_pgid = q->p_pgrp; 5774 bg_pgid = stp->sd_pgidp->pid_id; 5775 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid); 5776 PID_RELE(stp->sd_pgidp); 5777 ctty_clear_sighuped(); 5778 stp->sd_pgidp = q->p_pgidp; 5779 PID_HOLD(stp->sd_pgidp); 5780 mutex_exit(&pidlock); 5781 mutex_exit(&stp->sd_lock); 5782 return (0); 5783 } 5784 5785 case TIOCGPGRP: 5786 { 5787 pid_t pgrp; 5788 5789 mutex_enter(&stp->sd_lock); 5790 if (stp->sd_sidp == NULL) { 5791 mutex_exit(&stp->sd_lock); 5792 return (ENOTTY); 5793 } 5794 pgrp = stp->sd_pgidp->pid_id; 5795 mutex_exit(&stp->sd_lock); 5796 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t), 5797 copyflag)); 5798 } 5799 5800 case TIOCSCTTY: 5801 { 5802 return (strctty(stp)); 5803 } 5804 5805 case TIOCNOTTY: 5806 { 5807 /* freectty() always assumes curproc. */ 5808 if (freectty(B_FALSE) != 0) 5809 return (0); 5810 return (ENOTTY); 5811 } 5812 5813 case FIONBIO: 5814 case FIOASYNC: 5815 return (0); /* handled by the upper layer */ 5816 } 5817 } 5818 5819 /* 5820 * Custom free routine used for M_PASSFP messages. 5821 */ 5822 static void 5823 free_passfp(struct k_strrecvfd *srf) 5824 { 5825 (void) closef(srf->fp); 5826 kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t)); 5827 } 5828 5829 /* ARGSUSED */ 5830 int 5831 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr) 5832 { 5833 queue_t *qp, *nextqp; 5834 struct k_strrecvfd *srf; 5835 mblk_t *mp; 5836 frtn_t *frtnp; 5837 size_t bufsize; 5838 queue_t *mate = NULL; 5839 syncq_t *sq = NULL; 5840 int retval = 0; 5841 5842 if (stp->sd_flag & STRHUP) 5843 return (ENXIO); 5844 5845 claimstr(stp->sd_wrq); 5846 5847 /* Fastpath, we have a pipe, and we are already mated, use it. */ 5848 if (STRMATED(stp)) { 5849 qp = _RD(stp->sd_mate->sd_wrq); 5850 claimstr(qp); 5851 mate = qp; 5852 } else { /* Not already mated. */ 5853 5854 /* 5855 * Walk the stream to the end of this one. 5856 * assumes that the claimstr() will prevent 5857 * plumbing between the stream head and the 5858 * driver from changing 5859 */ 5860 qp = stp->sd_wrq; 5861 5862 /* 5863 * Loop until we reach the end of this stream. 5864 * On completion, qp points to the write queue 5865 * at the end of the stream, or the read queue 5866 * at the stream head if this is a fifo. 5867 */ 5868 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp)) 5869 ; 5870 5871 /* 5872 * Just in case we get a q_next which is NULL, but 5873 * not at the end of the stream. This is actually 5874 * broken, so we set an assert to catch it in 5875 * debug, and set an error and return if not debug. 5876 */ 5877 ASSERT(qp); 5878 if (qp == NULL) { 5879 releasestr(stp->sd_wrq); 5880 return (EINVAL); 5881 } 5882 5883 /* 5884 * Enter the syncq for the driver, so (hopefully) 5885 * the queue values will not change on us. 5886 * XXXX - This will only prevent the race IFF only 5887 * the write side modifies the q_next member, and 5888 * the put procedure is protected by at least 5889 * MT_PERQ. 5890 */ 5891 if ((sq = qp->q_syncq) != NULL) 5892 entersq(sq, SQ_PUT); 5893 5894 /* Now get the q_next value from this qp. */ 5895 nextqp = qp->q_next; 5896 5897 /* 5898 * If nextqp exists and the other stream is different 5899 * from this one claim the stream, set the mate, and 5900 * get the read queue at the stream head of the other 5901 * stream. Assumes that nextqp was at least valid when 5902 * we got it. Hopefully the entersq of the driver 5903 * will prevent it from changing on us. 5904 */ 5905 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) { 5906 ASSERT(qp->q_qinfo->qi_srvp); 5907 ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp); 5908 ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp); 5909 claimstr(nextqp); 5910 5911 /* Make sure we still have a q_next */ 5912 if (nextqp != qp->q_next) { 5913 releasestr(stp->sd_wrq); 5914 releasestr(nextqp); 5915 return (EINVAL); 5916 } 5917 5918 qp = _RD(STREAM(nextqp)->sd_wrq); 5919 mate = qp; 5920 } 5921 /* If we entered the synq above, leave it. */ 5922 if (sq != NULL) 5923 leavesq(sq, SQ_PUT); 5924 } /* STRMATED(STP) */ 5925 5926 /* XXX prevents substitution of the ops vector */ 5927 if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) { 5928 retval = EINVAL; 5929 goto out; 5930 } 5931 5932 if (qp->q_flag & QFULL) { 5933 retval = EAGAIN; 5934 goto out; 5935 } 5936 5937 /* 5938 * Since M_PASSFP messages include a file descriptor, we use 5939 * esballoc() and specify a custom free routine (free_passfp()) that 5940 * will close the descriptor as part of freeing the message. For 5941 * convenience, we stash the frtn_t right after the data block. 5942 */ 5943 bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t); 5944 srf = kmem_alloc(bufsize, KM_NOSLEEP); 5945 if (srf == NULL) { 5946 retval = EAGAIN; 5947 goto out; 5948 } 5949 5950 frtnp = (frtn_t *)(srf + 1); 5951 frtnp->free_arg = (caddr_t)srf; 5952 frtnp->free_func = free_passfp; 5953 5954 mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp); 5955 if (mp == NULL) { 5956 kmem_free(srf, bufsize); 5957 retval = EAGAIN; 5958 goto out; 5959 } 5960 mp->b_wptr += sizeof (struct k_strrecvfd); 5961 mp->b_datap->db_type = M_PASSFP; 5962 5963 srf->fp = fp; 5964 srf->uid = crgetuid(curthread->t_cred); 5965 srf->gid = crgetgid(curthread->t_cred); 5966 mutex_enter(&fp->f_tlock); 5967 fp->f_count++; 5968 mutex_exit(&fp->f_tlock); 5969 5970 put(qp, mp); 5971 out: 5972 releasestr(stp->sd_wrq); 5973 if (mate) 5974 releasestr(mate); 5975 return (retval); 5976 } 5977 5978 /* 5979 * Send an ioctl message downstream and wait for acknowledgement. 5980 * flags may be set to either U_TO_K or K_TO_K and a combination 5981 * of STR_NOERROR or STR_NOSIG 5982 * STR_NOSIG: Signals are essentially ignored or held and have 5983 * no effect for the duration of the call. 5984 * STR_NOERROR: Ignores stream head read, write and hup errors. 5985 * Additionally, if an existing ioctl times out, it is assumed 5986 * lost and and this ioctl will continue as if the previous ioctl had 5987 * finished. ETIME may be returned if this ioctl times out (i.e. 5988 * ic_timout is not INFTIM). Non-stream head errors may be returned if 5989 * the ioc_error indicates that the driver/module had problems, 5990 * an EFAULT was found when accessing user data, a lack of 5991 * resources, etc. 5992 */ 5993 int 5994 strdoioctl( 5995 struct stdata *stp, 5996 struct strioctl *strioc, 5997 int fflags, /* file flags with model info */ 5998 int flag, 5999 cred_t *crp, 6000 int *rvalp) 6001 { 6002 mblk_t *bp; 6003 struct iocblk *iocbp; 6004 struct copyreq *reqp; 6005 struct copyresp *resp; 6006 int id; 6007 int transparent = 0; 6008 int error = 0; 6009 int len = 0; 6010 caddr_t taddr; 6011 int copyflag = (flag & (U_TO_K | K_TO_K)); 6012 int sigflag = (flag & STR_NOSIG); 6013 int errs; 6014 uint_t waitflags; 6015 6016 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 6017 ASSERT((fflags & FMODELS) != 0); 6018 6019 TRACE_2(TR_FAC_STREAMS_FR, 6020 TR_STRDOIOCTL, 6021 "strdoioctl:stp %p strioc %p", stp, strioc); 6022 if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */ 6023 transparent = 1; 6024 strioc->ic_len = sizeof (intptr_t); 6025 } 6026 6027 if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz)) 6028 return (EINVAL); 6029 6030 if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error, 6031 crp)) == NULL) 6032 return (error); 6033 6034 bzero(bp->b_wptr, sizeof (union ioctypes)); 6035 6036 iocbp = (struct iocblk *)bp->b_wptr; 6037 iocbp->ioc_count = strioc->ic_len; 6038 iocbp->ioc_cmd = strioc->ic_cmd; 6039 iocbp->ioc_flag = (fflags & FMODELS); 6040 6041 crhold(crp); 6042 iocbp->ioc_cr = crp; 6043 DB_TYPE(bp) = M_IOCTL; 6044 DB_CPID(bp) = curproc->p_pid; 6045 bp->b_wptr += sizeof (struct iocblk); 6046 6047 if (flag & STR_NOERROR) 6048 errs = STPLEX; 6049 else 6050 errs = STRHUP|STRDERR|STWRERR|STPLEX; 6051 6052 /* 6053 * If there is data to copy into ioctl block, do so. 6054 */ 6055 if (iocbp->ioc_count > 0) { 6056 if (transparent) 6057 /* 6058 * Note: STR_NOERROR does not have an effect 6059 * in putiocd() 6060 */ 6061 id = K_TO_K | sigflag; 6062 else 6063 id = flag; 6064 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) { 6065 freemsg(bp); 6066 crfree(crp); 6067 return (error); 6068 } 6069 6070 /* 6071 * We could have slept copying in user pages. 6072 * Recheck the stream head state (the other end 6073 * of a pipe could have gone away). 6074 */ 6075 if (stp->sd_flag & errs) { 6076 mutex_enter(&stp->sd_lock); 6077 error = strgeterr(stp, errs, 0); 6078 mutex_exit(&stp->sd_lock); 6079 if (error != 0) { 6080 freemsg(bp); 6081 crfree(crp); 6082 return (error); 6083 } 6084 } 6085 } 6086 if (transparent) 6087 iocbp->ioc_count = TRANSPARENT; 6088 6089 /* 6090 * Block for up to STRTIMOUT milliseconds if there is an outstanding 6091 * ioctl for this stream already running. All processes 6092 * sleeping here will be awakened as a result of an ACK 6093 * or NAK being received for the outstanding ioctl, or 6094 * as a result of the timer expiring on the outstanding 6095 * ioctl (a failure), or as a result of any waiting 6096 * process's timer expiring (also a failure). 6097 */ 6098 6099 error = 0; 6100 mutex_enter(&stp->sd_lock); 6101 while (stp->sd_flag & (IOCWAIT | IOCWAITNE)) { 6102 clock_t cv_rval; 6103 6104 TRACE_0(TR_FAC_STREAMS_FR, 6105 TR_STRDOIOCTL_WAIT, 6106 "strdoioctl sleeps - IOCWAIT"); 6107 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock, 6108 STRTIMOUT, sigflag); 6109 if (cv_rval <= 0) { 6110 if (cv_rval == 0) { 6111 error = EINTR; 6112 } else { 6113 if (flag & STR_NOERROR) { 6114 /* 6115 * Terminating current ioctl in 6116 * progress -- assume it got lost and 6117 * wake up the other thread so that the 6118 * operation completes. 6119 */ 6120 if (!(stp->sd_flag & IOCWAITNE)) { 6121 stp->sd_flag |= IOCWAITNE; 6122 cv_broadcast(&stp->sd_monitor); 6123 } 6124 /* 6125 * Otherwise, there's a running 6126 * STR_NOERROR -- we have no choice 6127 * here but to wait forever (or until 6128 * interrupted). 6129 */ 6130 } else { 6131 /* 6132 * pending ioctl has caused 6133 * us to time out 6134 */ 6135 error = ETIME; 6136 } 6137 } 6138 } else if ((stp->sd_flag & errs)) { 6139 error = strgeterr(stp, errs, 0); 6140 } 6141 if (error) { 6142 mutex_exit(&stp->sd_lock); 6143 freemsg(bp); 6144 crfree(crp); 6145 return (error); 6146 } 6147 } 6148 6149 /* 6150 * Have control of ioctl mechanism. 6151 * Send down ioctl packet and wait for response. 6152 */ 6153 if (stp->sd_iocblk != (mblk_t *)-1) { 6154 freemsg(stp->sd_iocblk); 6155 } 6156 stp->sd_iocblk = NULL; 6157 6158 /* 6159 * If this is marked with 'noerror' (internal; mostly 6160 * I_{P,}{UN,}LINK), then make sure nobody else is able to get 6161 * in here by setting IOCWAITNE. 6162 */ 6163 waitflags = IOCWAIT; 6164 if (flag & STR_NOERROR) 6165 waitflags |= IOCWAITNE; 6166 6167 stp->sd_flag |= waitflags; 6168 6169 /* 6170 * Assign sequence number. 6171 */ 6172 iocbp->ioc_id = stp->sd_iocid = getiocseqno(); 6173 6174 mutex_exit(&stp->sd_lock); 6175 6176 TRACE_1(TR_FAC_STREAMS_FR, 6177 TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp); 6178 stream_willservice(stp); 6179 putnext(stp->sd_wrq, bp); 6180 stream_runservice(stp); 6181 6182 /* 6183 * Timed wait for acknowledgment. The wait time is limited by the 6184 * timeout value, which must be a positive integer (number of 6185 * milliseconds) to wait, or 0 (use default value of STRTIMOUT 6186 * milliseconds), or -1 (wait forever). This will be awakened 6187 * either by an ACK/NAK message arriving, the timer expiring, or 6188 * the timer expiring on another ioctl waiting for control of the 6189 * mechanism. 6190 */ 6191 waitioc: 6192 mutex_enter(&stp->sd_lock); 6193 6194 6195 /* 6196 * If the reply has already arrived, don't sleep. If awakened from 6197 * the sleep, fail only if the reply has not arrived by then. 6198 * Otherwise, process the reply. 6199 */ 6200 while (!stp->sd_iocblk) { 6201 clock_t cv_rval; 6202 6203 if (stp->sd_flag & errs) { 6204 error = strgeterr(stp, errs, 0); 6205 if (error != 0) { 6206 stp->sd_flag &= ~waitflags; 6207 cv_broadcast(&stp->sd_iocmonitor); 6208 mutex_exit(&stp->sd_lock); 6209 crfree(crp); 6210 return (error); 6211 } 6212 } 6213 6214 TRACE_0(TR_FAC_STREAMS_FR, 6215 TR_STRDOIOCTL_WAIT2, 6216 "strdoioctl sleeps awaiting reply"); 6217 ASSERT(error == 0); 6218 6219 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, 6220 (strioc->ic_timout ? 6221 strioc->ic_timout * 1000 : STRTIMOUT), sigflag); 6222 6223 /* 6224 * There are four possible cases here: interrupt, timeout, 6225 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a 6226 * valid M_IOCTL reply). 6227 * 6228 * If we've been awakened by a STR_NOERROR ioctl on some other 6229 * thread, then sd_iocblk will still be NULL, and IOCWAITNE 6230 * will be set. Pretend as if we just timed out. Note that 6231 * this other thread waited at least STRTIMOUT before trying to 6232 * awaken our thread, so this is indistinguishable (even for 6233 * INFTIM) from the case where we failed with ETIME waiting on 6234 * IOCWAIT in the prior loop. 6235 */ 6236 if (cv_rval > 0 && !(flag & STR_NOERROR) && 6237 stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) { 6238 cv_rval = -1; 6239 } 6240 6241 /* 6242 * note: STR_NOERROR does not protect 6243 * us here.. use ic_timout < 0 6244 */ 6245 if (cv_rval <= 0) { 6246 if (cv_rval == 0) { 6247 error = EINTR; 6248 } else { 6249 error = ETIME; 6250 } 6251 /* 6252 * A message could have come in after we were scheduled 6253 * but before we were actually run. 6254 */ 6255 bp = stp->sd_iocblk; 6256 stp->sd_iocblk = NULL; 6257 if (bp != NULL) { 6258 if ((bp->b_datap->db_type == M_COPYIN) || 6259 (bp->b_datap->db_type == M_COPYOUT)) { 6260 mutex_exit(&stp->sd_lock); 6261 if (bp->b_cont) { 6262 freemsg(bp->b_cont); 6263 bp->b_cont = NULL; 6264 } 6265 bp->b_datap->db_type = M_IOCDATA; 6266 bp->b_wptr = bp->b_rptr + 6267 sizeof (struct copyresp); 6268 resp = (struct copyresp *)bp->b_rptr; 6269 resp->cp_rval = 6270 (caddr_t)1; /* failure */ 6271 stream_willservice(stp); 6272 putnext(stp->sd_wrq, bp); 6273 stream_runservice(stp); 6274 mutex_enter(&stp->sd_lock); 6275 } else { 6276 freemsg(bp); 6277 } 6278 } 6279 stp->sd_flag &= ~waitflags; 6280 cv_broadcast(&stp->sd_iocmonitor); 6281 mutex_exit(&stp->sd_lock); 6282 crfree(crp); 6283 return (error); 6284 } 6285 } 6286 bp = stp->sd_iocblk; 6287 /* 6288 * Note: it is strictly impossible to get here with sd_iocblk set to 6289 * -1. This is because the initial loop above doesn't allow any new 6290 * ioctls into the fray until all others have passed this point. 6291 */ 6292 ASSERT(bp != NULL && bp != (mblk_t *)-1); 6293 TRACE_1(TR_FAC_STREAMS_FR, 6294 TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp); 6295 if ((bp->b_datap->db_type == M_IOCACK) || 6296 (bp->b_datap->db_type == M_IOCNAK)) { 6297 /* for detection of duplicate ioctl replies */ 6298 stp->sd_iocblk = (mblk_t *)-1; 6299 stp->sd_flag &= ~waitflags; 6300 cv_broadcast(&stp->sd_iocmonitor); 6301 mutex_exit(&stp->sd_lock); 6302 } else { 6303 /* 6304 * flags not cleared here because we're still doing 6305 * copy in/out for ioctl. 6306 */ 6307 stp->sd_iocblk = NULL; 6308 mutex_exit(&stp->sd_lock); 6309 } 6310 6311 6312 /* 6313 * Have received acknowledgment. 6314 */ 6315 6316 switch (bp->b_datap->db_type) { 6317 case M_IOCACK: 6318 /* 6319 * Positive ack. 6320 */ 6321 iocbp = (struct iocblk *)bp->b_rptr; 6322 6323 /* 6324 * Set error if indicated. 6325 */ 6326 if (iocbp->ioc_error) { 6327 error = iocbp->ioc_error; 6328 break; 6329 } 6330 6331 /* 6332 * Set return value. 6333 */ 6334 *rvalp = iocbp->ioc_rval; 6335 6336 /* 6337 * Data may have been returned in ACK message (ioc_count > 0). 6338 * If so, copy it out to the user's buffer. 6339 */ 6340 if (iocbp->ioc_count && !transparent) { 6341 if (error = getiocd(bp, strioc->ic_dp, copyflag)) 6342 break; 6343 } 6344 if (!transparent) { 6345 if (len) /* an M_COPYOUT was used with I_STR */ 6346 strioc->ic_len = len; 6347 else 6348 strioc->ic_len = (int)iocbp->ioc_count; 6349 } 6350 break; 6351 6352 case M_IOCNAK: 6353 /* 6354 * Negative ack. 6355 * 6356 * The only thing to do is set error as specified 6357 * in neg ack packet. 6358 */ 6359 iocbp = (struct iocblk *)bp->b_rptr; 6360 6361 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL); 6362 break; 6363 6364 case M_COPYIN: 6365 /* 6366 * Driver or module has requested user ioctl data. 6367 */ 6368 reqp = (struct copyreq *)bp->b_rptr; 6369 6370 /* 6371 * M_COPYIN should *never* have a message attached, though 6372 * it's harmless if it does -- thus, panic on a DEBUG 6373 * kernel and just free it on a non-DEBUG build. 6374 */ 6375 ASSERT(bp->b_cont == NULL); 6376 if (bp->b_cont != NULL) { 6377 freemsg(bp->b_cont); 6378 bp->b_cont = NULL; 6379 } 6380 6381 error = putiocd(bp, reqp->cq_addr, flag, crp); 6382 if (error && bp->b_cont) { 6383 freemsg(bp->b_cont); 6384 bp->b_cont = NULL; 6385 } 6386 6387 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6388 bp->b_datap->db_type = M_IOCDATA; 6389 6390 mblk_setcred(bp, crp); 6391 DB_CPID(bp) = curproc->p_pid; 6392 resp = (struct copyresp *)bp->b_rptr; 6393 resp->cp_rval = (caddr_t)(uintptr_t)error; 6394 resp->cp_flag = (fflags & FMODELS); 6395 6396 stream_willservice(stp); 6397 putnext(stp->sd_wrq, bp); 6398 stream_runservice(stp); 6399 6400 if (error) { 6401 mutex_enter(&stp->sd_lock); 6402 stp->sd_flag &= ~waitflags; 6403 cv_broadcast(&stp->sd_iocmonitor); 6404 mutex_exit(&stp->sd_lock); 6405 crfree(crp); 6406 return (error); 6407 } 6408 6409 goto waitioc; 6410 6411 case M_COPYOUT: 6412 /* 6413 * Driver or module has ioctl data for a user. 6414 */ 6415 reqp = (struct copyreq *)bp->b_rptr; 6416 ASSERT(bp->b_cont != NULL); 6417 6418 /* 6419 * Always (transparent or non-transparent ) 6420 * use the address specified in the request 6421 */ 6422 taddr = reqp->cq_addr; 6423 if (!transparent) 6424 len = (int)reqp->cq_size; 6425 6426 /* copyout data to the provided address */ 6427 error = getiocd(bp, taddr, copyflag); 6428 6429 freemsg(bp->b_cont); 6430 bp->b_cont = NULL; 6431 6432 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6433 bp->b_datap->db_type = M_IOCDATA; 6434 6435 mblk_setcred(bp, crp); 6436 DB_CPID(bp) = curproc->p_pid; 6437 resp = (struct copyresp *)bp->b_rptr; 6438 resp->cp_rval = (caddr_t)(uintptr_t)error; 6439 resp->cp_flag = (fflags & FMODELS); 6440 6441 stream_willservice(stp); 6442 putnext(stp->sd_wrq, bp); 6443 stream_runservice(stp); 6444 6445 if (error) { 6446 mutex_enter(&stp->sd_lock); 6447 stp->sd_flag &= ~waitflags; 6448 cv_broadcast(&stp->sd_iocmonitor); 6449 mutex_exit(&stp->sd_lock); 6450 crfree(crp); 6451 return (error); 6452 } 6453 goto waitioc; 6454 6455 default: 6456 ASSERT(0); 6457 mutex_enter(&stp->sd_lock); 6458 stp->sd_flag &= ~waitflags; 6459 cv_broadcast(&stp->sd_iocmonitor); 6460 mutex_exit(&stp->sd_lock); 6461 break; 6462 } 6463 6464 freemsg(bp); 6465 crfree(crp); 6466 return (error); 6467 } 6468 6469 /* 6470 * Send an M_CMD message downstream and wait for a reply. This is a ptools 6471 * special used to retrieve information from modules/drivers a stream without 6472 * being subjected to flow control or interfering with pending messages on the 6473 * stream (e.g. an ioctl in flight). 6474 */ 6475 int 6476 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp) 6477 { 6478 mblk_t *mp; 6479 struct cmdblk *cmdp; 6480 int error = 0; 6481 int errs = STRHUP|STRDERR|STWRERR|STPLEX; 6482 clock_t rval, timeout = STRTIMOUT; 6483 6484 if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) || 6485 scp->sc_timeout < -1) 6486 return (EINVAL); 6487 6488 if (scp->sc_timeout > 0) 6489 timeout = scp->sc_timeout * MILLISEC; 6490 6491 if ((mp = allocb_cred(sizeof (struct cmdblk), crp)) == NULL) 6492 return (ENOMEM); 6493 6494 crhold(crp); 6495 6496 cmdp = (struct cmdblk *)mp->b_wptr; 6497 cmdp->cb_cr = crp; 6498 cmdp->cb_cmd = scp->sc_cmd; 6499 cmdp->cb_len = scp->sc_len; 6500 cmdp->cb_error = 0; 6501 mp->b_wptr += sizeof (struct cmdblk); 6502 6503 DB_TYPE(mp) = M_CMD; 6504 DB_CPID(mp) = curproc->p_pid; 6505 6506 /* 6507 * Copy in the payload. 6508 */ 6509 if (cmdp->cb_len > 0) { 6510 mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp); 6511 if (mp->b_cont == NULL) { 6512 error = ENOMEM; 6513 goto out; 6514 } 6515 6516 /* cb_len comes from sc_len, which has already been checked */ 6517 ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf)); 6518 (void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len); 6519 mp->b_cont->b_wptr += cmdp->cb_len; 6520 DB_CPID(mp->b_cont) = curproc->p_pid; 6521 } 6522 6523 /* 6524 * Since this mechanism is strictly for ptools, and since only one 6525 * process can be grabbed at a time, we simply fail if there's 6526 * currently an operation pending. 6527 */ 6528 mutex_enter(&stp->sd_lock); 6529 if (stp->sd_flag & STRCMDWAIT) { 6530 mutex_exit(&stp->sd_lock); 6531 error = EBUSY; 6532 goto out; 6533 } 6534 stp->sd_flag |= STRCMDWAIT; 6535 ASSERT(stp->sd_cmdblk == NULL); 6536 mutex_exit(&stp->sd_lock); 6537 6538 putnext(stp->sd_wrq, mp); 6539 mp = NULL; 6540 6541 /* 6542 * Timed wait for acknowledgment. If the reply has already arrived, 6543 * don't sleep. If awakened from the sleep, fail only if the reply 6544 * has not arrived by then. Otherwise, process the reply. 6545 */ 6546 mutex_enter(&stp->sd_lock); 6547 while (stp->sd_cmdblk == NULL) { 6548 if (stp->sd_flag & errs) { 6549 if ((error = strgeterr(stp, errs, 0)) != 0) 6550 goto waitout; 6551 } 6552 6553 rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0); 6554 if (stp->sd_cmdblk != NULL) 6555 break; 6556 6557 if (rval <= 0) { 6558 error = (rval == 0) ? EINTR : ETIME; 6559 goto waitout; 6560 } 6561 } 6562 6563 /* 6564 * We received a reply. 6565 */ 6566 mp = stp->sd_cmdblk; 6567 stp->sd_cmdblk = NULL; 6568 ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD); 6569 ASSERT(stp->sd_flag & STRCMDWAIT); 6570 stp->sd_flag &= ~STRCMDWAIT; 6571 mutex_exit(&stp->sd_lock); 6572 6573 cmdp = (struct cmdblk *)mp->b_rptr; 6574 if ((error = cmdp->cb_error) != 0) 6575 goto out; 6576 6577 /* 6578 * Data may have been returned in the reply (cb_len > 0). 6579 * If so, copy it out to the user's buffer. 6580 */ 6581 if (cmdp->cb_len > 0) { 6582 if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) { 6583 error = EPROTO; 6584 goto out; 6585 } 6586 6587 cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf)); 6588 (void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len); 6589 } 6590 scp->sc_len = cmdp->cb_len; 6591 out: 6592 freemsg(mp); 6593 crfree(crp); 6594 return (error); 6595 waitout: 6596 ASSERT(stp->sd_cmdblk == NULL); 6597 stp->sd_flag &= ~STRCMDWAIT; 6598 mutex_exit(&stp->sd_lock); 6599 crfree(crp); 6600 return (error); 6601 } 6602 6603 /* 6604 * For the SunOS keyboard driver. 6605 * Return the next available "ioctl" sequence number. 6606 * Exported, so that streams modules can send "ioctl" messages 6607 * downstream from their open routine. 6608 */ 6609 int 6610 getiocseqno(void) 6611 { 6612 int i; 6613 6614 mutex_enter(&strresources); 6615 i = ++ioc_id; 6616 mutex_exit(&strresources); 6617 return (i); 6618 } 6619 6620 /* 6621 * Get the next message from the read queue. If the message is 6622 * priority, STRPRI will have been set by strrput(). This flag 6623 * should be reset only when the entire message at the front of the 6624 * queue as been consumed. 6625 * 6626 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 6627 */ 6628 int 6629 strgetmsg( 6630 struct vnode *vp, 6631 struct strbuf *mctl, 6632 struct strbuf *mdata, 6633 unsigned char *prip, 6634 int *flagsp, 6635 int fmode, 6636 rval_t *rvp) 6637 { 6638 struct stdata *stp; 6639 mblk_t *bp, *nbp; 6640 mblk_t *savemp = NULL; 6641 mblk_t *savemptail = NULL; 6642 uint_t old_sd_flag; 6643 int flg; 6644 int more = 0; 6645 int error = 0; 6646 char first = 1; 6647 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 6648 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 6649 unsigned char pri = 0; 6650 queue_t *q; 6651 int pr = 0; /* Partial read successful */ 6652 struct uio uios; 6653 struct uio *uiop = &uios; 6654 struct iovec iovs; 6655 unsigned char type; 6656 6657 TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER, 6658 "strgetmsg:%p", vp); 6659 6660 ASSERT(vp->v_stream); 6661 stp = vp->v_stream; 6662 rvp->r_val1 = 0; 6663 6664 mutex_enter(&stp->sd_lock); 6665 6666 if ((error = i_straccess(stp, JCREAD)) != 0) { 6667 mutex_exit(&stp->sd_lock); 6668 return (error); 6669 } 6670 6671 if (stp->sd_flag & (STRDERR|STPLEX)) { 6672 error = strgeterr(stp, STRDERR|STPLEX, 0); 6673 if (error != 0) { 6674 mutex_exit(&stp->sd_lock); 6675 return (error); 6676 } 6677 } 6678 mutex_exit(&stp->sd_lock); 6679 6680 switch (*flagsp) { 6681 case MSG_HIPRI: 6682 if (*prip != 0) 6683 return (EINVAL); 6684 break; 6685 6686 case MSG_ANY: 6687 case MSG_BAND: 6688 break; 6689 6690 default: 6691 return (EINVAL); 6692 } 6693 /* 6694 * Setup uio and iov for data part 6695 */ 6696 iovs.iov_base = mdata->buf; 6697 iovs.iov_len = mdata->maxlen; 6698 uios.uio_iov = &iovs; 6699 uios.uio_iovcnt = 1; 6700 uios.uio_loffset = 0; 6701 uios.uio_segflg = UIO_USERSPACE; 6702 uios.uio_fmode = 0; 6703 uios.uio_extflg = UIO_COPY_CACHED; 6704 uios.uio_resid = mdata->maxlen; 6705 uios.uio_offset = 0; 6706 6707 q = _RD(stp->sd_wrq); 6708 mutex_enter(&stp->sd_lock); 6709 old_sd_flag = stp->sd_flag; 6710 mark = 0; 6711 for (;;) { 6712 int done = 0; 6713 mblk_t *q_first = q->q_first; 6714 6715 /* 6716 * Get the next message of appropriate priority 6717 * from the stream head. If the caller is interested 6718 * in band or hipri messages, then they should already 6719 * be enqueued at the stream head. On the other hand 6720 * if the caller wants normal (band 0) messages, they 6721 * might be deferred in a synchronous stream and they 6722 * will need to be pulled up. 6723 * 6724 * After we have dequeued a message, we might find that 6725 * it was a deferred M_SIG that was enqueued at the 6726 * stream head. It must now be posted as part of the 6727 * read by calling strsignal_nolock(). 6728 * 6729 * Also note that strrput does not enqueue an M_PCSIG, 6730 * and there cannot be more than one hipri message, 6731 * so there was no need to have the M_PCSIG case. 6732 * 6733 * At some time it might be nice to try and wrap the 6734 * functionality of kstrgetmsg() and strgetmsg() into 6735 * a common routine so to reduce the amount of replicated 6736 * code (since they are extremely similar). 6737 */ 6738 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) { 6739 /* Asking for normal, band0 data */ 6740 bp = strget(stp, q, uiop, first, &error); 6741 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6742 if (bp != NULL) { 6743 ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); 6744 if (bp->b_datap->db_type == M_SIG) { 6745 strsignal_nolock(stp, *bp->b_rptr, 6746 (int32_t)bp->b_band); 6747 continue; 6748 } else { 6749 break; 6750 } 6751 } 6752 if (error != 0) { 6753 goto getmout; 6754 } 6755 6756 /* 6757 * We can't depend on the value of STRPRI here because 6758 * the stream head may be in transit. Therefore, we 6759 * must look at the type of the first message to 6760 * determine if a high priority messages is waiting 6761 */ 6762 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL && 6763 q_first->b_datap->db_type >= QPCTL && 6764 (bp = getq_noenab(q, 0)) != NULL) { 6765 /* Asked for HIPRI and got one */ 6766 ASSERT(bp->b_datap->db_type >= QPCTL); 6767 break; 6768 } else if ((*flagsp & MSG_BAND) && q_first != NULL && 6769 ((q_first->b_band >= *prip) || 6770 q_first->b_datap->db_type >= QPCTL) && 6771 (bp = getq_noenab(q, 0)) != NULL) { 6772 /* 6773 * Asked for at least band "prip" and got either at 6774 * least that band or a hipri message. 6775 */ 6776 ASSERT(bp->b_band >= *prip || 6777 bp->b_datap->db_type >= QPCTL); 6778 if (bp->b_datap->db_type == M_SIG) { 6779 strsignal_nolock(stp, *bp->b_rptr, 6780 (int32_t)bp->b_band); 6781 continue; 6782 } else { 6783 break; 6784 } 6785 } 6786 6787 /* No data. Time to sleep? */ 6788 qbackenable(q, 0); 6789 6790 /* 6791 * If STRHUP or STREOF, return 0 length control and data. 6792 * If resid is 0, then a read(fd,buf,0) was done. Do not 6793 * sleep to satisfy this request because by default we have 6794 * zero bytes to return. 6795 */ 6796 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 && 6797 mdata->maxlen == 0)) { 6798 mctl->len = mdata->len = 0; 6799 *flagsp = 0; 6800 mutex_exit(&stp->sd_lock); 6801 return (0); 6802 } 6803 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT, 6804 "strgetmsg calls strwaitq:%p, %p", 6805 vp, uiop); 6806 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1, 6807 &done)) != 0) || done) { 6808 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE, 6809 "strgetmsg error or done:%p, %p", 6810 vp, uiop); 6811 mutex_exit(&stp->sd_lock); 6812 return (error); 6813 } 6814 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE, 6815 "strgetmsg awakes:%p, %p", vp, uiop); 6816 if ((error = i_straccess(stp, JCREAD)) != 0) { 6817 mutex_exit(&stp->sd_lock); 6818 return (error); 6819 } 6820 first = 0; 6821 } 6822 ASSERT(bp != NULL); 6823 /* 6824 * Extract any mark information. If the message is not completely 6825 * consumed this information will be put in the mblk 6826 * that is putback. 6827 * If MSGMARKNEXT is set and the message is completely consumed 6828 * the STRATMARK flag will be set below. Likewise, if 6829 * MSGNOTMARKNEXT is set and the message is 6830 * completely consumed STRNOTATMARK will be set. 6831 */ 6832 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 6833 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 6834 (MSGMARKNEXT|MSGNOTMARKNEXT)); 6835 if (mark != 0 && bp == stp->sd_mark) { 6836 mark |= _LASTMARK; 6837 stp->sd_mark = NULL; 6838 } 6839 /* 6840 * keep track of the original message type and priority 6841 */ 6842 pri = bp->b_band; 6843 type = bp->b_datap->db_type; 6844 if (type == M_PASSFP) { 6845 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 6846 stp->sd_mark = bp; 6847 bp->b_flag |= mark & ~_LASTMARK; 6848 putback(stp, q, bp, pri); 6849 qbackenable(q, pri); 6850 mutex_exit(&stp->sd_lock); 6851 return (EBADMSG); 6852 } 6853 ASSERT(type != M_SIG); 6854 6855 /* 6856 * Set this flag so strrput will not generate signals. Need to 6857 * make sure this flag is cleared before leaving this routine 6858 * else signals will stop being sent. 6859 */ 6860 stp->sd_flag |= STRGETINPROG; 6861 mutex_exit(&stp->sd_lock); 6862 6863 if (STREAM_NEEDSERVICE(stp)) 6864 stream_runservice(stp); 6865 6866 /* 6867 * Set HIPRI flag if message is priority. 6868 */ 6869 if (type >= QPCTL) 6870 flg = MSG_HIPRI; 6871 else 6872 flg = MSG_BAND; 6873 6874 /* 6875 * First process PROTO or PCPROTO blocks, if any. 6876 */ 6877 if (mctl->maxlen >= 0 && type != M_DATA) { 6878 size_t n, bcnt; 6879 char *ubuf; 6880 6881 bcnt = mctl->maxlen; 6882 ubuf = mctl->buf; 6883 while (bp != NULL && bp->b_datap->db_type != M_DATA) { 6884 if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 && 6885 copyout(bp->b_rptr, ubuf, n)) { 6886 error = EFAULT; 6887 mutex_enter(&stp->sd_lock); 6888 /* 6889 * clear stream head pri flag based on 6890 * first message type 6891 */ 6892 if (type >= QPCTL) { 6893 ASSERT(type == M_PCPROTO); 6894 stp->sd_flag &= ~STRPRI; 6895 } 6896 more = 0; 6897 freemsg(bp); 6898 goto getmout; 6899 } 6900 ubuf += n; 6901 bp->b_rptr += n; 6902 if (bp->b_rptr >= bp->b_wptr) { 6903 nbp = bp; 6904 bp = bp->b_cont; 6905 freeb(nbp); 6906 } 6907 ASSERT(n <= bcnt); 6908 bcnt -= n; 6909 if (bcnt == 0) 6910 break; 6911 } 6912 mctl->len = mctl->maxlen - bcnt; 6913 } else 6914 mctl->len = -1; 6915 6916 if (bp && bp->b_datap->db_type != M_DATA) { 6917 /* 6918 * More PROTO blocks in msg. 6919 */ 6920 more |= MORECTL; 6921 savemp = bp; 6922 while (bp && bp->b_datap->db_type != M_DATA) { 6923 savemptail = bp; 6924 bp = bp->b_cont; 6925 } 6926 savemptail->b_cont = NULL; 6927 } 6928 6929 /* 6930 * Now process DATA blocks, if any. 6931 */ 6932 if (mdata->maxlen >= 0 && bp) { 6933 /* 6934 * struiocopyout will consume a potential zero-length 6935 * M_DATA even if uio_resid is zero. 6936 */ 6937 size_t oldresid = uiop->uio_resid; 6938 6939 bp = struiocopyout(bp, uiop, &error); 6940 if (error != 0) { 6941 mutex_enter(&stp->sd_lock); 6942 /* 6943 * clear stream head hi pri flag based on 6944 * first message 6945 */ 6946 if (type >= QPCTL) { 6947 ASSERT(type == M_PCPROTO); 6948 stp->sd_flag &= ~STRPRI; 6949 } 6950 more = 0; 6951 freemsg(savemp); 6952 goto getmout; 6953 } 6954 /* 6955 * (pr == 1) indicates a partial read. 6956 */ 6957 if (oldresid > uiop->uio_resid) 6958 pr = 1; 6959 mdata->len = mdata->maxlen - uiop->uio_resid; 6960 } else 6961 mdata->len = -1; 6962 6963 if (bp) { /* more data blocks in msg */ 6964 more |= MOREDATA; 6965 if (savemp) 6966 savemptail->b_cont = bp; 6967 else 6968 savemp = bp; 6969 } 6970 6971 mutex_enter(&stp->sd_lock); 6972 if (savemp) { 6973 if (pr && (savemp->b_datap->db_type == M_DATA) && 6974 msgnodata(savemp)) { 6975 /* 6976 * Avoid queuing a zero-length tail part of 6977 * a message. pr=1 indicates that we read some of 6978 * the message. 6979 */ 6980 freemsg(savemp); 6981 more &= ~MOREDATA; 6982 /* 6983 * clear stream head hi pri flag based on 6984 * first message 6985 */ 6986 if (type >= QPCTL) { 6987 ASSERT(type == M_PCPROTO); 6988 stp->sd_flag &= ~STRPRI; 6989 } 6990 } else { 6991 savemp->b_band = pri; 6992 /* 6993 * If the first message was HIPRI and the one we're 6994 * putting back isn't, then clear STRPRI, otherwise 6995 * set STRPRI again. Note that we must set STRPRI 6996 * again since the flush logic in strrput_nondata() 6997 * may have cleared it while we had sd_lock dropped. 6998 */ 6999 if (type >= QPCTL) { 7000 ASSERT(type == M_PCPROTO); 7001 if (queclass(savemp) < QPCTL) 7002 stp->sd_flag &= ~STRPRI; 7003 else 7004 stp->sd_flag |= STRPRI; 7005 } else if (queclass(savemp) >= QPCTL) { 7006 /* 7007 * The first message was not a HIPRI message, 7008 * but the one we are about to putback is. 7009 * For simplicitly, we do not allow for HIPRI 7010 * messages to be embedded in the message 7011 * body, so just force it to same type as 7012 * first message. 7013 */ 7014 ASSERT(type == M_DATA || type == M_PROTO); 7015 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 7016 savemp->b_datap->db_type = type; 7017 } 7018 if (mark != 0) { 7019 savemp->b_flag |= mark & ~_LASTMARK; 7020 if ((mark & _LASTMARK) && 7021 (stp->sd_mark == NULL)) { 7022 /* 7023 * If another marked message arrived 7024 * while sd_lock was not held sd_mark 7025 * would be non-NULL. 7026 */ 7027 stp->sd_mark = savemp; 7028 } 7029 } 7030 putback(stp, q, savemp, pri); 7031 } 7032 } else { 7033 /* 7034 * The complete message was consumed. 7035 * 7036 * If another M_PCPROTO arrived while sd_lock was not held 7037 * it would have been discarded since STRPRI was still set. 7038 * 7039 * Move the MSG*MARKNEXT information 7040 * to the stream head just in case 7041 * the read queue becomes empty. 7042 * clear stream head hi pri flag based on 7043 * first message 7044 * 7045 * If the stream head was at the mark 7046 * (STRATMARK) before we dropped sd_lock above 7047 * and some data was consumed then we have 7048 * moved past the mark thus STRATMARK is 7049 * cleared. However, if a message arrived in 7050 * strrput during the copyout above causing 7051 * STRATMARK to be set we can not clear that 7052 * flag. 7053 */ 7054 if (type >= QPCTL) { 7055 ASSERT(type == M_PCPROTO); 7056 stp->sd_flag &= ~STRPRI; 7057 } 7058 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 7059 if (mark & MSGMARKNEXT) { 7060 stp->sd_flag &= ~STRNOTATMARK; 7061 stp->sd_flag |= STRATMARK; 7062 } else if (mark & MSGNOTMARKNEXT) { 7063 stp->sd_flag &= ~STRATMARK; 7064 stp->sd_flag |= STRNOTATMARK; 7065 } else { 7066 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 7067 } 7068 } else if (pr && (old_sd_flag & STRATMARK)) { 7069 stp->sd_flag &= ~STRATMARK; 7070 } 7071 } 7072 7073 *flagsp = flg; 7074 *prip = pri; 7075 7076 /* 7077 * Getmsg cleanup processing - if the state of the queue has changed 7078 * some signals may need to be sent and/or poll awakened. 7079 */ 7080 getmout: 7081 qbackenable(q, pri); 7082 7083 /* 7084 * We dropped the stream head lock above. Send all M_SIG messages 7085 * before processing stream head for SIGPOLL messages. 7086 */ 7087 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7088 while ((bp = q->q_first) != NULL && 7089 (bp->b_datap->db_type == M_SIG)) { 7090 /* 7091 * sd_lock is held so the content of the read queue can not 7092 * change. 7093 */ 7094 bp = getq(q); 7095 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 7096 7097 strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band); 7098 mutex_exit(&stp->sd_lock); 7099 freemsg(bp); 7100 if (STREAM_NEEDSERVICE(stp)) 7101 stream_runservice(stp); 7102 mutex_enter(&stp->sd_lock); 7103 } 7104 7105 /* 7106 * stream head cannot change while we make the determination 7107 * whether or not to send a signal. Drop the flag to allow strrput 7108 * to send firstmsgsigs again. 7109 */ 7110 stp->sd_flag &= ~STRGETINPROG; 7111 7112 /* 7113 * If the type of message at the front of the queue changed 7114 * due to the receive the appropriate signals and pollwakeup events 7115 * are generated. The type of changes are: 7116 * Processed a hipri message, q_first is not hipri. 7117 * Processed a band X message, and q_first is band Y. 7118 * The generated signals and pollwakeups are identical to what 7119 * strrput() generates should the message that is now on q_first 7120 * arrive to an empty read queue. 7121 * 7122 * Note: only strrput will send a signal for a hipri message. 7123 */ 7124 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7125 strsigset_t signals = 0; 7126 strpollset_t pollwakeups = 0; 7127 7128 if (flg & MSG_HIPRI) { 7129 /* 7130 * Removed a hipri message. Regular data at 7131 * the front of the queue. 7132 */ 7133 if (bp->b_band == 0) { 7134 signals = S_INPUT | S_RDNORM; 7135 pollwakeups = POLLIN | POLLRDNORM; 7136 } else { 7137 signals = S_INPUT | S_RDBAND; 7138 pollwakeups = POLLIN | POLLRDBAND; 7139 } 7140 } else if (pri != bp->b_band) { 7141 /* 7142 * The band is different for the new q_first. 7143 */ 7144 if (bp->b_band == 0) { 7145 signals = S_RDNORM; 7146 pollwakeups = POLLIN | POLLRDNORM; 7147 } else { 7148 signals = S_RDBAND; 7149 pollwakeups = POLLIN | POLLRDBAND; 7150 } 7151 } 7152 7153 if (pollwakeups != 0) { 7154 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7155 if (!(stp->sd_rput_opt & SR_POLLIN)) 7156 goto no_pollwake; 7157 stp->sd_rput_opt &= ~SR_POLLIN; 7158 } 7159 mutex_exit(&stp->sd_lock); 7160 pollwakeup(&stp->sd_pollist, pollwakeups); 7161 mutex_enter(&stp->sd_lock); 7162 } 7163 no_pollwake: 7164 7165 if (stp->sd_sigflags & signals) 7166 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7167 } 7168 mutex_exit(&stp->sd_lock); 7169 7170 rvp->r_val1 = more; 7171 return (error); 7172 #undef _LASTMARK 7173 } 7174 7175 /* 7176 * Get the next message from the read queue. If the message is 7177 * priority, STRPRI will have been set by strrput(). This flag 7178 * should be reset only when the entire message at the front of the 7179 * queue as been consumed. 7180 * 7181 * If uiop is NULL all data is returned in mctlp. 7182 * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed 7183 * not enabled. 7184 * The timeout parameter is in milliseconds; -1 for infinity. 7185 * This routine handles the consolidation private flags: 7186 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7187 * MSG_DELAYERROR Defer the error check until the queue is empty. 7188 * MSG_HOLDSIG Hold signals while waiting for data. 7189 * MSG_IPEEK Only peek at messages. 7190 * MSG_DISCARDTAIL Discard the tail M_DATA part of the message 7191 * that doesn't fit. 7192 * MSG_NOMARK If the message is marked leave it on the queue. 7193 * 7194 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 7195 */ 7196 int 7197 kstrgetmsg( 7198 struct vnode *vp, 7199 mblk_t **mctlp, 7200 struct uio *uiop, 7201 unsigned char *prip, 7202 int *flagsp, 7203 clock_t timout, 7204 rval_t *rvp) 7205 { 7206 struct stdata *stp; 7207 mblk_t *bp, *nbp; 7208 mblk_t *savemp = NULL; 7209 mblk_t *savemptail = NULL; 7210 int flags; 7211 uint_t old_sd_flag; 7212 int flg; 7213 int more = 0; 7214 int error = 0; 7215 char first = 1; 7216 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 7217 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 7218 unsigned char pri = 0; 7219 queue_t *q; 7220 int pr = 0; /* Partial read successful */ 7221 unsigned char type; 7222 7223 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER, 7224 "kstrgetmsg:%p", vp); 7225 7226 ASSERT(vp->v_stream); 7227 stp = vp->v_stream; 7228 rvp->r_val1 = 0; 7229 7230 mutex_enter(&stp->sd_lock); 7231 7232 if ((error = i_straccess(stp, JCREAD)) != 0) { 7233 mutex_exit(&stp->sd_lock); 7234 return (error); 7235 } 7236 7237 flags = *flagsp; 7238 if (stp->sd_flag & (STRDERR|STPLEX)) { 7239 if ((stp->sd_flag & STPLEX) || 7240 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) { 7241 error = strgeterr(stp, STRDERR|STPLEX, 7242 (flags & MSG_IPEEK)); 7243 if (error != 0) { 7244 mutex_exit(&stp->sd_lock); 7245 return (error); 7246 } 7247 } 7248 } 7249 mutex_exit(&stp->sd_lock); 7250 7251 switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) { 7252 case MSG_HIPRI: 7253 if (*prip != 0) 7254 return (EINVAL); 7255 break; 7256 7257 case MSG_ANY: 7258 case MSG_BAND: 7259 break; 7260 7261 default: 7262 return (EINVAL); 7263 } 7264 7265 retry: 7266 q = _RD(stp->sd_wrq); 7267 mutex_enter(&stp->sd_lock); 7268 old_sd_flag = stp->sd_flag; 7269 mark = 0; 7270 for (;;) { 7271 int done = 0; 7272 int waitflag; 7273 int fmode; 7274 mblk_t *q_first = q->q_first; 7275 7276 /* 7277 * This section of the code operates just like the code 7278 * in strgetmsg(). There is a comment there about what 7279 * is going on here. 7280 */ 7281 if (!(flags & (MSG_HIPRI|MSG_BAND))) { 7282 /* Asking for normal, band0 data */ 7283 bp = strget(stp, q, uiop, first, &error); 7284 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7285 if (bp != NULL) { 7286 if (bp->b_datap->db_type == M_SIG) { 7287 strsignal_nolock(stp, *bp->b_rptr, 7288 (int32_t)bp->b_band); 7289 continue; 7290 } else { 7291 break; 7292 } 7293 } 7294 if (error != 0) { 7295 goto getmout; 7296 } 7297 /* 7298 * We can't depend on the value of STRPRI here because 7299 * the stream head may be in transit. Therefore, we 7300 * must look at the type of the first message to 7301 * determine if a high priority messages is waiting 7302 */ 7303 } else if ((flags & MSG_HIPRI) && q_first != NULL && 7304 q_first->b_datap->db_type >= QPCTL && 7305 (bp = getq_noenab(q, 0)) != NULL) { 7306 ASSERT(bp->b_datap->db_type >= QPCTL); 7307 break; 7308 } else if ((flags & MSG_BAND) && q_first != NULL && 7309 ((q_first->b_band >= *prip) || 7310 q_first->b_datap->db_type >= QPCTL) && 7311 (bp = getq_noenab(q, 0)) != NULL) { 7312 /* 7313 * Asked for at least band "prip" and got either at 7314 * least that band or a hipri message. 7315 */ 7316 ASSERT(bp->b_band >= *prip || 7317 bp->b_datap->db_type >= QPCTL); 7318 if (bp->b_datap->db_type == M_SIG) { 7319 strsignal_nolock(stp, *bp->b_rptr, 7320 (int32_t)bp->b_band); 7321 continue; 7322 } else { 7323 break; 7324 } 7325 } 7326 7327 /* No data. Time to sleep? */ 7328 qbackenable(q, 0); 7329 7330 /* 7331 * Delayed error notification? 7332 */ 7333 if ((stp->sd_flag & (STRDERR|STPLEX)) && 7334 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) { 7335 error = strgeterr(stp, STRDERR|STPLEX, 7336 (flags & MSG_IPEEK)); 7337 if (error != 0) { 7338 mutex_exit(&stp->sd_lock); 7339 return (error); 7340 } 7341 } 7342 7343 /* 7344 * If STRHUP or STREOF, return 0 length control and data. 7345 * If a read(fd,buf,0) has been done, do not sleep, just 7346 * return. 7347 * 7348 * If mctlp == NULL and uiop == NULL, then the code will 7349 * do the strwaitq. This is an understood way of saying 7350 * sleep "polling" until a message is received. 7351 */ 7352 if ((stp->sd_flag & (STRHUP|STREOF)) || 7353 (uiop != NULL && uiop->uio_resid == 0)) { 7354 if (mctlp != NULL) 7355 *mctlp = NULL; 7356 *flagsp = 0; 7357 mutex_exit(&stp->sd_lock); 7358 return (0); 7359 } 7360 7361 waitflag = GETWAIT; 7362 if (flags & 7363 (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) { 7364 if (flags & MSG_HOLDSIG) 7365 waitflag |= STR_NOSIG; 7366 if (flags & MSG_IGNERROR) 7367 waitflag |= STR_NOERROR; 7368 if (flags & MSG_IPEEK) 7369 waitflag |= STR_PEEK; 7370 if (flags & MSG_DELAYERROR) 7371 waitflag |= STR_DELAYERR; 7372 } 7373 if (uiop != NULL) 7374 fmode = uiop->uio_fmode; 7375 else 7376 fmode = 0; 7377 7378 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT, 7379 "kstrgetmsg calls strwaitq:%p, %p", 7380 vp, uiop); 7381 if (((error = strwaitq(stp, waitflag, (ssize_t)0, 7382 fmode, timout, &done))) != 0 || done) { 7383 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE, 7384 "kstrgetmsg error or done:%p, %p", 7385 vp, uiop); 7386 mutex_exit(&stp->sd_lock); 7387 return (error); 7388 } 7389 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE, 7390 "kstrgetmsg awakes:%p, %p", vp, uiop); 7391 if ((error = i_straccess(stp, JCREAD)) != 0) { 7392 mutex_exit(&stp->sd_lock); 7393 return (error); 7394 } 7395 first = 0; 7396 } 7397 ASSERT(bp != NULL); 7398 /* 7399 * Extract any mark information. If the message is not completely 7400 * consumed this information will be put in the mblk 7401 * that is putback. 7402 * If MSGMARKNEXT is set and the message is completely consumed 7403 * the STRATMARK flag will be set below. Likewise, if 7404 * MSGNOTMARKNEXT is set and the message is 7405 * completely consumed STRNOTATMARK will be set. 7406 */ 7407 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 7408 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 7409 (MSGMARKNEXT|MSGNOTMARKNEXT)); 7410 pri = bp->b_band; 7411 if (mark != 0) { 7412 /* 7413 * If the caller doesn't want the mark return. 7414 * Used to implement MSG_WAITALL in sockets. 7415 */ 7416 if (flags & MSG_NOMARK) { 7417 putback(stp, q, bp, pri); 7418 qbackenable(q, pri); 7419 mutex_exit(&stp->sd_lock); 7420 return (EWOULDBLOCK); 7421 } 7422 if (bp == stp->sd_mark) { 7423 mark |= _LASTMARK; 7424 stp->sd_mark = NULL; 7425 } 7426 } 7427 7428 /* 7429 * keep track of the first message type 7430 */ 7431 type = bp->b_datap->db_type; 7432 7433 if (bp->b_datap->db_type == M_PASSFP) { 7434 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7435 stp->sd_mark = bp; 7436 bp->b_flag |= mark & ~_LASTMARK; 7437 putback(stp, q, bp, pri); 7438 qbackenable(q, pri); 7439 mutex_exit(&stp->sd_lock); 7440 return (EBADMSG); 7441 } 7442 ASSERT(type != M_SIG); 7443 7444 if (flags & MSG_IPEEK) { 7445 /* 7446 * Clear any struioflag - we do the uiomove over again 7447 * when peeking since it simplifies the code. 7448 * 7449 * Dup the message and put the original back on the queue. 7450 * If dupmsg() fails, try again with copymsg() to see if 7451 * there is indeed a shortage of memory. dupmsg() may fail 7452 * if db_ref in any of the messages reaches its limit. 7453 */ 7454 7455 ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); 7456 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) { 7457 /* 7458 * Restore the state of the stream head since we 7459 * need to drop sd_lock (strwaitbuf is sleeping). 7460 */ 7461 size_t size = msgdsize(bp); 7462 7463 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7464 stp->sd_mark = bp; 7465 bp->b_flag |= mark & ~_LASTMARK; 7466 putback(stp, q, bp, pri); 7467 mutex_exit(&stp->sd_lock); 7468 error = strwaitbuf(size, BPRI_HI); 7469 if (error) { 7470 /* 7471 * There is no net change to the queue thus 7472 * no need to qbackenable. 7473 */ 7474 return (error); 7475 } 7476 goto retry; 7477 } 7478 7479 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7480 stp->sd_mark = bp; 7481 bp->b_flag |= mark & ~_LASTMARK; 7482 putback(stp, q, bp, pri); 7483 bp = nbp; 7484 } 7485 7486 /* 7487 * Set this flag so strrput will not generate signals. Need to 7488 * make sure this flag is cleared before leaving this routine 7489 * else signals will stop being sent. 7490 */ 7491 stp->sd_flag |= STRGETINPROG; 7492 mutex_exit(&stp->sd_lock); 7493 7494 if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) { 7495 mblk_t *tmp, *prevmp; 7496 7497 /* 7498 * Put first non-data mblk back to stream head and 7499 * cut the mblk chain so sd_rputdatafunc only sees 7500 * M_DATA mblks. We can skip the first mblk since it 7501 * is M_DATA according to the condition above. 7502 */ 7503 for (prevmp = bp, tmp = bp->b_cont; tmp != NULL; 7504 prevmp = tmp, tmp = tmp->b_cont) { 7505 if (DB_TYPE(tmp) != M_DATA) { 7506 prevmp->b_cont = NULL; 7507 mutex_enter(&stp->sd_lock); 7508 putback(stp, q, tmp, tmp->b_band); 7509 mutex_exit(&stp->sd_lock); 7510 break; 7511 } 7512 } 7513 7514 ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); 7515 bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp, 7516 NULL, NULL, NULL, NULL); 7517 7518 if (bp == NULL) 7519 goto retry; 7520 } 7521 7522 if (STREAM_NEEDSERVICE(stp)) 7523 stream_runservice(stp); 7524 7525 /* 7526 * Set HIPRI flag if message is priority. 7527 */ 7528 if (type >= QPCTL) 7529 flg = MSG_HIPRI; 7530 else 7531 flg = MSG_BAND; 7532 7533 /* 7534 * First process PROTO or PCPROTO blocks, if any. 7535 */ 7536 if (mctlp != NULL && type != M_DATA) { 7537 mblk_t *nbp; 7538 7539 *mctlp = bp; 7540 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA) 7541 bp = bp->b_cont; 7542 nbp = bp->b_cont; 7543 bp->b_cont = NULL; 7544 bp = nbp; 7545 } 7546 7547 if (bp && bp->b_datap->db_type != M_DATA) { 7548 /* 7549 * More PROTO blocks in msg. Will only happen if mctlp is NULL. 7550 */ 7551 more |= MORECTL; 7552 savemp = bp; 7553 while (bp && bp->b_datap->db_type != M_DATA) { 7554 savemptail = bp; 7555 bp = bp->b_cont; 7556 } 7557 savemptail->b_cont = NULL; 7558 } 7559 7560 /* 7561 * Now process DATA blocks, if any. 7562 */ 7563 if (uiop == NULL) { 7564 /* Append data to tail of mctlp */ 7565 7566 ASSERT(bp == NULL || !(bp->b_datap->db_flags & DBLK_UIOA)); 7567 if (mctlp != NULL) { 7568 mblk_t **mpp = mctlp; 7569 7570 while (*mpp != NULL) 7571 mpp = &((*mpp)->b_cont); 7572 *mpp = bp; 7573 bp = NULL; 7574 } 7575 } else if (bp && (bp->b_datap->db_flags & DBLK_UIOA)) { 7576 /* 7577 * A uioa mblk_t chain, as uio processing has already 7578 * been done we simple skip over processing. 7579 */ 7580 bp = NULL; 7581 pr = 0; 7582 7583 } else if (uiop->uio_resid >= 0 && bp) { 7584 size_t oldresid = uiop->uio_resid; 7585 7586 /* 7587 * If a streams message is likely to consist 7588 * of many small mblks, it is pulled up into 7589 * one continuous chunk of memory. 7590 * see longer comment at top of page 7591 * by mblk_pull_len declaration. 7592 */ 7593 7594 if (MBLKL(bp) < mblk_pull_len) { 7595 (void) pullupmsg(bp, -1); 7596 } 7597 7598 bp = struiocopyout(bp, uiop, &error); 7599 if (error != 0) { 7600 if (mctlp != NULL) { 7601 freemsg(*mctlp); 7602 *mctlp = NULL; 7603 } else 7604 freemsg(savemp); 7605 mutex_enter(&stp->sd_lock); 7606 /* 7607 * clear stream head hi pri flag based on 7608 * first message 7609 */ 7610 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7611 ASSERT(type == M_PCPROTO); 7612 stp->sd_flag &= ~STRPRI; 7613 } 7614 more = 0; 7615 goto getmout; 7616 } 7617 /* 7618 * (pr == 1) indicates a partial read. 7619 */ 7620 if (oldresid > uiop->uio_resid) 7621 pr = 1; 7622 } 7623 7624 if (bp) { /* more data blocks in msg */ 7625 more |= MOREDATA; 7626 if (savemp) 7627 savemptail->b_cont = bp; 7628 else 7629 savemp = bp; 7630 } 7631 7632 mutex_enter(&stp->sd_lock); 7633 if (savemp) { 7634 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) { 7635 /* 7636 * When MSG_DISCARDTAIL is set or 7637 * when peeking discard any tail. When peeking this 7638 * is the tail of the dup that was copied out - the 7639 * message has already been putback on the queue. 7640 * Return MOREDATA to the caller even though the data 7641 * is discarded. This is used by sockets (to 7642 * set MSG_TRUNC). 7643 */ 7644 freemsg(savemp); 7645 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7646 ASSERT(type == M_PCPROTO); 7647 stp->sd_flag &= ~STRPRI; 7648 } 7649 } else if (pr && (savemp->b_datap->db_type == M_DATA) && 7650 msgnodata(savemp)) { 7651 /* 7652 * Avoid queuing a zero-length tail part of 7653 * a message. pr=1 indicates that we read some of 7654 * the message. 7655 */ 7656 freemsg(savemp); 7657 more &= ~MOREDATA; 7658 if (type >= QPCTL) { 7659 ASSERT(type == M_PCPROTO); 7660 stp->sd_flag &= ~STRPRI; 7661 } 7662 } else { 7663 savemp->b_band = pri; 7664 /* 7665 * If the first message was HIPRI and the one we're 7666 * putting back isn't, then clear STRPRI, otherwise 7667 * set STRPRI again. Note that we must set STRPRI 7668 * again since the flush logic in strrput_nondata() 7669 * may have cleared it while we had sd_lock dropped. 7670 */ 7671 7672 ASSERT(!(savemp->b_datap->db_flags & DBLK_UIOA)); 7673 if (type >= QPCTL) { 7674 ASSERT(type == M_PCPROTO); 7675 if (queclass(savemp) < QPCTL) 7676 stp->sd_flag &= ~STRPRI; 7677 else 7678 stp->sd_flag |= STRPRI; 7679 } else if (queclass(savemp) >= QPCTL) { 7680 /* 7681 * The first message was not a HIPRI message, 7682 * but the one we are about to putback is. 7683 * For simplicitly, we do not allow for HIPRI 7684 * messages to be embedded in the message 7685 * body, so just force it to same type as 7686 * first message. 7687 */ 7688 ASSERT(type == M_DATA || type == M_PROTO); 7689 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 7690 savemp->b_datap->db_type = type; 7691 } 7692 if (mark != 0) { 7693 if ((mark & _LASTMARK) && 7694 (stp->sd_mark == NULL)) { 7695 /* 7696 * If another marked message arrived 7697 * while sd_lock was not held sd_mark 7698 * would be non-NULL. 7699 */ 7700 stp->sd_mark = savemp; 7701 } 7702 savemp->b_flag |= mark & ~_LASTMARK; 7703 } 7704 putback(stp, q, savemp, pri); 7705 } 7706 } else if (!(flags & MSG_IPEEK)) { 7707 /* 7708 * The complete message was consumed. 7709 * 7710 * If another M_PCPROTO arrived while sd_lock was not held 7711 * it would have been discarded since STRPRI was still set. 7712 * 7713 * Move the MSG*MARKNEXT information 7714 * to the stream head just in case 7715 * the read queue becomes empty. 7716 * clear stream head hi pri flag based on 7717 * first message 7718 * 7719 * If the stream head was at the mark 7720 * (STRATMARK) before we dropped sd_lock above 7721 * and some data was consumed then we have 7722 * moved past the mark thus STRATMARK is 7723 * cleared. However, if a message arrived in 7724 * strrput during the copyout above causing 7725 * STRATMARK to be set we can not clear that 7726 * flag. 7727 * XXX A "perimeter" would help by single-threading strrput, 7728 * strread, strgetmsg and kstrgetmsg. 7729 */ 7730 if (type >= QPCTL) { 7731 ASSERT(type == M_PCPROTO); 7732 stp->sd_flag &= ~STRPRI; 7733 } 7734 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 7735 if (mark & MSGMARKNEXT) { 7736 stp->sd_flag &= ~STRNOTATMARK; 7737 stp->sd_flag |= STRATMARK; 7738 } else if (mark & MSGNOTMARKNEXT) { 7739 stp->sd_flag &= ~STRATMARK; 7740 stp->sd_flag |= STRNOTATMARK; 7741 } else { 7742 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 7743 } 7744 } else if (pr && (old_sd_flag & STRATMARK)) { 7745 stp->sd_flag &= ~STRATMARK; 7746 } 7747 } 7748 7749 *flagsp = flg; 7750 *prip = pri; 7751 7752 /* 7753 * Getmsg cleanup processing - if the state of the queue has changed 7754 * some signals may need to be sent and/or poll awakened. 7755 */ 7756 getmout: 7757 qbackenable(q, pri); 7758 7759 /* 7760 * We dropped the stream head lock above. Send all M_SIG messages 7761 * before processing stream head for SIGPOLL messages. 7762 */ 7763 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7764 while ((bp = q->q_first) != NULL && 7765 (bp->b_datap->db_type == M_SIG)) { 7766 /* 7767 * sd_lock is held so the content of the read queue can not 7768 * change. 7769 */ 7770 bp = getq(q); 7771 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 7772 7773 strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band); 7774 mutex_exit(&stp->sd_lock); 7775 freemsg(bp); 7776 if (STREAM_NEEDSERVICE(stp)) 7777 stream_runservice(stp); 7778 mutex_enter(&stp->sd_lock); 7779 } 7780 7781 /* 7782 * stream head cannot change while we make the determination 7783 * whether or not to send a signal. Drop the flag to allow strrput 7784 * to send firstmsgsigs again. 7785 */ 7786 stp->sd_flag &= ~STRGETINPROG; 7787 7788 /* 7789 * If the type of message at the front of the queue changed 7790 * due to the receive the appropriate signals and pollwakeup events 7791 * are generated. The type of changes are: 7792 * Processed a hipri message, q_first is not hipri. 7793 * Processed a band X message, and q_first is band Y. 7794 * The generated signals and pollwakeups are identical to what 7795 * strrput() generates should the message that is now on q_first 7796 * arrive to an empty read queue. 7797 * 7798 * Note: only strrput will send a signal for a hipri message. 7799 */ 7800 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7801 strsigset_t signals = 0; 7802 strpollset_t pollwakeups = 0; 7803 7804 if (flg & MSG_HIPRI) { 7805 /* 7806 * Removed a hipri message. Regular data at 7807 * the front of the queue. 7808 */ 7809 if (bp->b_band == 0) { 7810 signals = S_INPUT | S_RDNORM; 7811 pollwakeups = POLLIN | POLLRDNORM; 7812 } else { 7813 signals = S_INPUT | S_RDBAND; 7814 pollwakeups = POLLIN | POLLRDBAND; 7815 } 7816 } else if (pri != bp->b_band) { 7817 /* 7818 * The band is different for the new q_first. 7819 */ 7820 if (bp->b_band == 0) { 7821 signals = S_RDNORM; 7822 pollwakeups = POLLIN | POLLRDNORM; 7823 } else { 7824 signals = S_RDBAND; 7825 pollwakeups = POLLIN | POLLRDBAND; 7826 } 7827 } 7828 7829 if (pollwakeups != 0) { 7830 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7831 if (!(stp->sd_rput_opt & SR_POLLIN)) 7832 goto no_pollwake; 7833 stp->sd_rput_opt &= ~SR_POLLIN; 7834 } 7835 mutex_exit(&stp->sd_lock); 7836 pollwakeup(&stp->sd_pollist, pollwakeups); 7837 mutex_enter(&stp->sd_lock); 7838 } 7839 no_pollwake: 7840 7841 if (stp->sd_sigflags & signals) 7842 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7843 } 7844 mutex_exit(&stp->sd_lock); 7845 7846 rvp->r_val1 = more; 7847 return (error); 7848 #undef _LASTMARK 7849 } 7850 7851 /* 7852 * Put a message downstream. 7853 * 7854 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7855 */ 7856 int 7857 strputmsg( 7858 struct vnode *vp, 7859 struct strbuf *mctl, 7860 struct strbuf *mdata, 7861 unsigned char pri, 7862 int flag, 7863 int fmode) 7864 { 7865 struct stdata *stp; 7866 queue_t *wqp; 7867 mblk_t *mp; 7868 ssize_t msgsize; 7869 ssize_t rmin, rmax; 7870 int error; 7871 struct uio uios; 7872 struct uio *uiop = &uios; 7873 struct iovec iovs; 7874 int xpg4 = 0; 7875 7876 ASSERT(vp->v_stream); 7877 stp = vp->v_stream; 7878 wqp = stp->sd_wrq; 7879 7880 /* 7881 * If it is an XPG4 application, we need to send 7882 * SIGPIPE below 7883 */ 7884 7885 xpg4 = (flag & MSG_XPG4) ? 1 : 0; 7886 flag &= ~MSG_XPG4; 7887 7888 if (audit_active) 7889 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode); 7890 7891 mutex_enter(&stp->sd_lock); 7892 7893 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7894 mutex_exit(&stp->sd_lock); 7895 return (error); 7896 } 7897 7898 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7899 error = strwriteable(stp, B_FALSE, xpg4); 7900 if (error != 0) { 7901 mutex_exit(&stp->sd_lock); 7902 return (error); 7903 } 7904 } 7905 7906 mutex_exit(&stp->sd_lock); 7907 7908 /* 7909 * Check for legal flag value. 7910 */ 7911 switch (flag) { 7912 case MSG_HIPRI: 7913 if ((mctl->len < 0) || (pri != 0)) 7914 return (EINVAL); 7915 break; 7916 case MSG_BAND: 7917 break; 7918 7919 default: 7920 return (EINVAL); 7921 } 7922 7923 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN, 7924 "strputmsg in:stp %p", stp); 7925 7926 /* get these values from those cached in the stream head */ 7927 rmin = stp->sd_qn_minpsz; 7928 rmax = stp->sd_qn_maxpsz; 7929 7930 /* 7931 * Make sure ctl and data sizes together fall within the 7932 * limits of the max and min receive packet sizes and do 7933 * not exceed system limit. 7934 */ 7935 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 7936 if (rmax == 0) { 7937 return (ERANGE); 7938 } 7939 /* 7940 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 7941 * Needed to prevent partial failures in the strmakedata loop. 7942 */ 7943 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 7944 rmax = stp->sd_maxblk; 7945 7946 if ((msgsize = mdata->len) < 0) { 7947 msgsize = 0; 7948 rmin = 0; /* no range check for NULL data part */ 7949 } 7950 if ((msgsize < rmin) || 7951 ((msgsize > rmax) && (rmax != INFPSZ)) || 7952 (mctl->len > strctlsz)) { 7953 return (ERANGE); 7954 } 7955 7956 /* 7957 * Setup uio and iov for data part 7958 */ 7959 iovs.iov_base = mdata->buf; 7960 iovs.iov_len = msgsize; 7961 uios.uio_iov = &iovs; 7962 uios.uio_iovcnt = 1; 7963 uios.uio_loffset = 0; 7964 uios.uio_segflg = UIO_USERSPACE; 7965 uios.uio_fmode = fmode; 7966 uios.uio_extflg = UIO_COPY_DEFAULT; 7967 uios.uio_resid = msgsize; 7968 uios.uio_offset = 0; 7969 7970 /* Ignore flow control in strput for HIPRI */ 7971 if (flag & MSG_HIPRI) 7972 flag |= MSG_IGNFLOW; 7973 7974 for (;;) { 7975 int done = 0; 7976 7977 /* 7978 * strput will always free the ctl mblk - even when strput 7979 * fails. 7980 */ 7981 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) { 7982 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7983 "strputmsg out:stp %p out %d error %d", 7984 stp, 1, error); 7985 return (error); 7986 } 7987 /* 7988 * Verify that the whole message can be transferred by 7989 * strput. 7990 */ 7991 ASSERT(stp->sd_maxblk == INFPSZ || 7992 stp->sd_maxblk >= mdata->len); 7993 7994 msgsize = mdata->len; 7995 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 7996 mdata->len = msgsize; 7997 7998 if (error == 0) 7999 break; 8000 8001 if (error != EWOULDBLOCK) 8002 goto out; 8003 8004 mutex_enter(&stp->sd_lock); 8005 /* 8006 * Check for a missed wakeup. 8007 * Needed since strput did not hold sd_lock across 8008 * the canputnext. 8009 */ 8010 if (bcanputnext(wqp, pri)) { 8011 /* Try again */ 8012 mutex_exit(&stp->sd_lock); 8013 continue; 8014 } 8015 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT, 8016 "strputmsg wait:stp %p waits pri %d", stp, pri); 8017 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1, 8018 &done)) != 0) || done) { 8019 mutex_exit(&stp->sd_lock); 8020 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 8021 "strputmsg out:q %p out %d error %d", 8022 stp, 0, error); 8023 return (error); 8024 } 8025 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE, 8026 "strputmsg wake:stp %p wakes", stp); 8027 if ((error = i_straccess(stp, JCWRITE)) != 0) { 8028 mutex_exit(&stp->sd_lock); 8029 return (error); 8030 } 8031 mutex_exit(&stp->sd_lock); 8032 } 8033 out: 8034 /* 8035 * For historic reasons, applications expect EAGAIN 8036 * when data mblk could not be allocated. so change 8037 * ENOMEM back to EAGAIN 8038 */ 8039 if (error == ENOMEM) 8040 error = EAGAIN; 8041 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 8042 "strputmsg out:stp %p out %d error %d", stp, 2, error); 8043 return (error); 8044 } 8045 8046 /* 8047 * Put a message downstream. 8048 * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop. 8049 * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio 8050 * and the fmode parameter. 8051 * 8052 * This routine handles the consolidation private flags: 8053 * MSG_IGNERROR Ignore any stream head error except STPLEX. 8054 * MSG_HOLDSIG Hold signals while waiting for data. 8055 * MSG_IGNFLOW Don't check streams flow control. 8056 * 8057 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 8058 */ 8059 int 8060 kstrputmsg( 8061 struct vnode *vp, 8062 mblk_t *mctl, 8063 struct uio *uiop, 8064 ssize_t msgsize, 8065 unsigned char pri, 8066 int flag, 8067 int fmode) 8068 { 8069 struct stdata *stp; 8070 queue_t *wqp; 8071 ssize_t rmin, rmax; 8072 int error; 8073 8074 ASSERT(vp->v_stream); 8075 stp = vp->v_stream; 8076 wqp = stp->sd_wrq; 8077 if (audit_active) 8078 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode); 8079 if (mctl == NULL) 8080 return (EINVAL); 8081 8082 mutex_enter(&stp->sd_lock); 8083 8084 if ((error = i_straccess(stp, JCWRITE)) != 0) { 8085 mutex_exit(&stp->sd_lock); 8086 freemsg(mctl); 8087 return (error); 8088 } 8089 8090 if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) { 8091 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 8092 error = strwriteable(stp, B_FALSE, B_TRUE); 8093 if (error != 0) { 8094 mutex_exit(&stp->sd_lock); 8095 freemsg(mctl); 8096 return (error); 8097 } 8098 } 8099 } 8100 8101 mutex_exit(&stp->sd_lock); 8102 8103 /* 8104 * Check for legal flag value. 8105 */ 8106 switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) { 8107 case MSG_HIPRI: 8108 if (pri != 0) { 8109 freemsg(mctl); 8110 return (EINVAL); 8111 } 8112 break; 8113 case MSG_BAND: 8114 break; 8115 default: 8116 freemsg(mctl); 8117 return (EINVAL); 8118 } 8119 8120 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN, 8121 "kstrputmsg in:stp %p", stp); 8122 8123 /* get these values from those cached in the stream head */ 8124 rmin = stp->sd_qn_minpsz; 8125 rmax = stp->sd_qn_maxpsz; 8126 8127 /* 8128 * Make sure ctl and data sizes together fall within the 8129 * limits of the max and min receive packet sizes and do 8130 * not exceed system limit. 8131 */ 8132 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 8133 if (rmax == 0) { 8134 freemsg(mctl); 8135 return (ERANGE); 8136 } 8137 /* 8138 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 8139 * Needed to prevent partial failures in the strmakedata loop. 8140 */ 8141 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 8142 rmax = stp->sd_maxblk; 8143 8144 if (uiop == NULL) { 8145 msgsize = -1; 8146 rmin = -1; /* no range check for NULL data part */ 8147 } else { 8148 /* Use uio flags as well as the fmode parameter flags */ 8149 fmode |= uiop->uio_fmode; 8150 8151 if ((msgsize < rmin) || 8152 ((msgsize > rmax) && (rmax != INFPSZ))) { 8153 freemsg(mctl); 8154 return (ERANGE); 8155 } 8156 } 8157 8158 /* Ignore flow control in strput for HIPRI */ 8159 if (flag & MSG_HIPRI) 8160 flag |= MSG_IGNFLOW; 8161 8162 for (;;) { 8163 int done = 0; 8164 int waitflag; 8165 mblk_t *mp; 8166 8167 /* 8168 * strput will always free the ctl mblk - even when strput 8169 * fails. If MSG_IGNFLOW is set then any error returned 8170 * will cause us to break the loop, so we don't need a copy 8171 * of the message. If MSG_IGNFLOW is not set, then we can 8172 * get hit by flow control and be forced to try again. In 8173 * this case we need to have a copy of the message. We 8174 * do this using copymsg since the message may get modified 8175 * by something below us. 8176 * 8177 * We've observed that many TPI providers do not check db_ref 8178 * on the control messages but blindly reuse them for the 8179 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more 8180 * friendly to such providers than using dupmsg. Also, note 8181 * that sockfs uses MSG_IGNFLOW for all TPI control messages. 8182 * Only data messages are subject to flow control, hence 8183 * subject to this copymsg. 8184 */ 8185 if (flag & MSG_IGNFLOW) { 8186 mp = mctl; 8187 mctl = NULL; 8188 } else { 8189 do { 8190 /* 8191 * If a message has a free pointer, the message 8192 * must be dupmsg to maintain this pointer. 8193 * Code using this facility must be sure 8194 * that modules below will not change the 8195 * contents of the dblk without checking db_ref 8196 * first. If db_ref is > 1, then the module 8197 * needs to do a copymsg first. Otherwise, 8198 * the contents of the dblk may become 8199 * inconsistent because the freesmg/freeb below 8200 * may end up calling atomic_add_32_nv. 8201 * The atomic_add_32_nv in freeb (accessing 8202 * all of db_ref, db_type, db_flags, and 8203 * db_struioflag) does not prevent other threads 8204 * from concurrently trying to modify e.g. 8205 * db_type. 8206 */ 8207 if (mctl->b_datap->db_frtnp != NULL) 8208 mp = dupmsg(mctl); 8209 else 8210 mp = copymsg(mctl); 8211 8212 if (mp != NULL) 8213 break; 8214 8215 error = strwaitbuf(msgdsize(mctl), BPRI_MED); 8216 if (error) { 8217 freemsg(mctl); 8218 return (error); 8219 } 8220 } while (mp == NULL); 8221 } 8222 /* 8223 * Verify that all of msgsize can be transferred by 8224 * strput. 8225 */ 8226 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize); 8227 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 8228 if (error == 0) 8229 break; 8230 8231 if (error != EWOULDBLOCK) 8232 goto out; 8233 8234 /* 8235 * IF MSG_IGNFLOW is set we should have broken out of loop 8236 * above. 8237 */ 8238 ASSERT(!(flag & MSG_IGNFLOW)); 8239 mutex_enter(&stp->sd_lock); 8240 /* 8241 * Check for a missed wakeup. 8242 * Needed since strput did not hold sd_lock across 8243 * the canputnext. 8244 */ 8245 if (bcanputnext(wqp, pri)) { 8246 /* Try again */ 8247 mutex_exit(&stp->sd_lock); 8248 continue; 8249 } 8250 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT, 8251 "kstrputmsg wait:stp %p waits pri %d", stp, pri); 8252 8253 waitflag = WRITEWAIT; 8254 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) { 8255 if (flag & MSG_HOLDSIG) 8256 waitflag |= STR_NOSIG; 8257 if (flag & MSG_IGNERROR) 8258 waitflag |= STR_NOERROR; 8259 } 8260 if (((error = strwaitq(stp, waitflag, 8261 (ssize_t)0, fmode, -1, &done)) != 0) || done) { 8262 mutex_exit(&stp->sd_lock); 8263 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8264 "kstrputmsg out:stp %p out %d error %d", 8265 stp, 0, error); 8266 freemsg(mctl); 8267 return (error); 8268 } 8269 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE, 8270 "kstrputmsg wake:stp %p wakes", stp); 8271 if ((error = i_straccess(stp, JCWRITE)) != 0) { 8272 mutex_exit(&stp->sd_lock); 8273 freemsg(mctl); 8274 return (error); 8275 } 8276 mutex_exit(&stp->sd_lock); 8277 } 8278 out: 8279 freemsg(mctl); 8280 /* 8281 * For historic reasons, applications expect EAGAIN 8282 * when data mblk could not be allocated. so change 8283 * ENOMEM back to EAGAIN 8284 */ 8285 if (error == ENOMEM) 8286 error = EAGAIN; 8287 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 8288 "kstrputmsg out:stp %p out %d error %d", stp, 2, error); 8289 return (error); 8290 } 8291 8292 /* 8293 * Determines whether the necessary conditions are set on a stream 8294 * for it to be readable, writeable, or have exceptions. 8295 * 8296 * strpoll handles the consolidation private events: 8297 * POLLNOERR Do not return POLLERR even if there are stream 8298 * head errors. 8299 * Used by sockfs. 8300 * POLLRDDATA Do not return POLLIN unless at least one message on 8301 * the queue contains one or more M_DATA mblks. Thus 8302 * when this flag is set a queue with only 8303 * M_PROTO/M_PCPROTO mblks does not return POLLIN. 8304 * Used by sockfs to ignore T_EXDATA_IND messages. 8305 * 8306 * Note: POLLRDDATA assumes that synch streams only return messages with 8307 * an M_DATA attached (i.e. not messages consisting of only 8308 * an M_PROTO/M_PCPROTO part). 8309 */ 8310 int 8311 strpoll( 8312 struct stdata *stp, 8313 short events_arg, 8314 int anyyet, 8315 short *reventsp, 8316 struct pollhead **phpp) 8317 { 8318 int events = (ushort_t)events_arg; 8319 int retevents = 0; 8320 mblk_t *mp; 8321 qband_t *qbp; 8322 long sd_flags = stp->sd_flag; 8323 int headlocked = 0; 8324 8325 /* 8326 * For performance, a single 'if' tests for most possible edge 8327 * conditions in one shot 8328 */ 8329 if (sd_flags & (STPLEX | STRDERR | STWRERR)) { 8330 if (sd_flags & STPLEX) { 8331 *reventsp = POLLNVAL; 8332 return (EINVAL); 8333 } 8334 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) && 8335 (sd_flags & STRDERR)) || 8336 ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) && 8337 (sd_flags & STWRERR))) { 8338 if (!(events & POLLNOERR)) { 8339 *reventsp = POLLERR; 8340 return (0); 8341 } 8342 } 8343 } 8344 if (sd_flags & STRHUP) { 8345 retevents |= POLLHUP; 8346 } else if (events & (POLLWRNORM | POLLWRBAND)) { 8347 queue_t *tq; 8348 queue_t *qp = stp->sd_wrq; 8349 8350 claimstr(qp); 8351 /* Find next module forward that has a service procedure */ 8352 tq = qp->q_next->q_nfsrv; 8353 ASSERT(tq != NULL); 8354 8355 polllock(&stp->sd_pollist, QLOCK(tq)); 8356 if (events & POLLWRNORM) { 8357 queue_t *sqp; 8358 8359 if (tq->q_flag & QFULL) 8360 /* ensure backq svc procedure runs */ 8361 tq->q_flag |= QWANTW; 8362 else if ((sqp = stp->sd_struiowrq) != NULL) { 8363 /* Check sync stream barrier write q */ 8364 mutex_exit(QLOCK(tq)); 8365 polllock(&stp->sd_pollist, QLOCK(sqp)); 8366 if (sqp->q_flag & QFULL) 8367 /* ensure pollwakeup() is done */ 8368 sqp->q_flag |= QWANTWSYNC; 8369 else 8370 retevents |= POLLOUT; 8371 /* More write events to process ??? */ 8372 if (! (events & POLLWRBAND)) { 8373 mutex_exit(QLOCK(sqp)); 8374 releasestr(qp); 8375 goto chkrd; 8376 } 8377 mutex_exit(QLOCK(sqp)); 8378 polllock(&stp->sd_pollist, QLOCK(tq)); 8379 } else 8380 retevents |= POLLOUT; 8381 } 8382 if (events & POLLWRBAND) { 8383 qbp = tq->q_bandp; 8384 if (qbp) { 8385 while (qbp) { 8386 if (qbp->qb_flag & QB_FULL) 8387 qbp->qb_flag |= QB_WANTW; 8388 else 8389 retevents |= POLLWRBAND; 8390 qbp = qbp->qb_next; 8391 } 8392 } else { 8393 retevents |= POLLWRBAND; 8394 } 8395 } 8396 mutex_exit(QLOCK(tq)); 8397 releasestr(qp); 8398 } 8399 chkrd: 8400 if (sd_flags & STRPRI) { 8401 retevents |= (events & POLLPRI); 8402 } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) { 8403 queue_t *qp = _RD(stp->sd_wrq); 8404 int normevents = (events & (POLLIN | POLLRDNORM)); 8405 8406 /* 8407 * Note: Need to do polllock() here since ps_lock may be 8408 * held. See bug 4191544. 8409 */ 8410 polllock(&stp->sd_pollist, &stp->sd_lock); 8411 headlocked = 1; 8412 mp = qp->q_first; 8413 while (mp) { 8414 /* 8415 * For POLLRDDATA we scan b_cont and b_next until we 8416 * find an M_DATA. 8417 */ 8418 if ((events & POLLRDDATA) && 8419 mp->b_datap->db_type != M_DATA) { 8420 mblk_t *nmp = mp->b_cont; 8421 8422 while (nmp != NULL && 8423 nmp->b_datap->db_type != M_DATA) 8424 nmp = nmp->b_cont; 8425 if (nmp == NULL) { 8426 mp = mp->b_next; 8427 continue; 8428 } 8429 } 8430 if (mp->b_band == 0) 8431 retevents |= normevents; 8432 else 8433 retevents |= (events & (POLLIN | POLLRDBAND)); 8434 break; 8435 } 8436 if (! (retevents & normevents) && 8437 (stp->sd_wakeq & RSLEEP)) { 8438 /* 8439 * Sync stream barrier read queue has data. 8440 */ 8441 retevents |= normevents; 8442 } 8443 /* Treat eof as normal data */ 8444 if (sd_flags & STREOF) 8445 retevents |= normevents; 8446 } 8447 8448 *reventsp = (short)retevents; 8449 if (retevents) { 8450 if (headlocked) 8451 mutex_exit(&stp->sd_lock); 8452 return (0); 8453 } 8454 8455 /* 8456 * If poll() has not found any events yet, set up event cell 8457 * to wake up the poll if a requested event occurs on this 8458 * stream. Check for collisions with outstanding poll requests. 8459 */ 8460 if (!anyyet) { 8461 *phpp = &stp->sd_pollist; 8462 if (headlocked == 0) { 8463 polllock(&stp->sd_pollist, &stp->sd_lock); 8464 headlocked = 1; 8465 } 8466 stp->sd_rput_opt |= SR_POLLIN; 8467 } 8468 if (headlocked) 8469 mutex_exit(&stp->sd_lock); 8470 return (0); 8471 } 8472 8473 /* 8474 * The purpose of putback() is to assure sleeping polls/reads 8475 * are awakened when there are no new messages arriving at the, 8476 * stream head, and a message is placed back on the read queue. 8477 * 8478 * sd_lock must be held when messages are placed back on stream 8479 * head. (getq() holds sd_lock when it removes messages from 8480 * the queue) 8481 */ 8482 8483 static void 8484 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band) 8485 { 8486 mblk_t *qfirst; 8487 ASSERT(MUTEX_HELD(&stp->sd_lock)); 8488 8489 /* 8490 * As a result of lock-step ordering around q_lock and sd_lock, 8491 * it's possible for function calls like putnext() and 8492 * canputnext() to get an inaccurate picture of how much 8493 * data is really being processed at the stream head. 8494 * We only consolidate with existing messages on the queue 8495 * if the length of the message we want to put back is smaller 8496 * than the queue hiwater mark. 8497 */ 8498 if ((stp->sd_rput_opt & SR_CONSOL_DATA) && 8499 (DB_TYPE(bp) == M_DATA) && ((qfirst = q->q_first) != NULL) && 8500 (DB_TYPE(qfirst) == M_DATA) && 8501 ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0) && 8502 ((bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT)) == 0) && 8503 (mp_cont_len(bp, NULL) < q->q_hiwat)) { 8504 /* 8505 * We use the same logic as defined in strrput() 8506 * but in reverse as we are putting back onto the 8507 * queue and want to retain byte ordering. 8508 * Consolidate M_DATA messages with M_DATA ONLY. 8509 * strrput() allows the consolidation of M_DATA onto 8510 * M_PROTO | M_PCPROTO but not the other way round. 8511 * 8512 * The consolidation does not take place if the message 8513 * we are returning to the queue is marked with either 8514 * of the marks or the delim flag or if q_first 8515 * is marked with MSGMARK. The MSGMARK check is needed to 8516 * handle the odd semantics of MSGMARK where essentially 8517 * the whole message is to be treated as marked. 8518 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first 8519 * to the front of the b_cont chain. 8520 */ 8521 rmvq_noenab(q, qfirst); 8522 8523 /* 8524 * The first message in the b_cont list 8525 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 8526 * We need to handle the case where we 8527 * are appending: 8528 * 8529 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 8530 * 2) a MSGMARKNEXT to a plain message. 8531 * 3) a MSGNOTMARKNEXT to a plain message 8532 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 8533 * message. 8534 * 8535 * Thus we never append a MSGMARKNEXT or 8536 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 8537 */ 8538 if (qfirst->b_flag & MSGMARKNEXT) { 8539 bp->b_flag |= MSGMARKNEXT; 8540 bp->b_flag &= ~MSGNOTMARKNEXT; 8541 qfirst->b_flag &= ~MSGMARKNEXT; 8542 } else if (qfirst->b_flag & MSGNOTMARKNEXT) { 8543 bp->b_flag |= MSGNOTMARKNEXT; 8544 qfirst->b_flag &= ~MSGNOTMARKNEXT; 8545 } 8546 8547 linkb(bp, qfirst); 8548 } 8549 (void) putbq(q, bp); 8550 8551 /* 8552 * A message may have come in when the sd_lock was dropped in the 8553 * calling routine. If this is the case and STR*ATMARK info was 8554 * received, need to move that from the stream head to the q_last 8555 * so that SIOCATMARK can return the proper value. 8556 */ 8557 if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) { 8558 unsigned short *flagp = &q->q_last->b_flag; 8559 uint_t b_flag = (uint_t)*flagp; 8560 8561 if (stp->sd_flag & STRATMARK) { 8562 b_flag &= ~MSGNOTMARKNEXT; 8563 b_flag |= MSGMARKNEXT; 8564 stp->sd_flag &= ~STRATMARK; 8565 } else { 8566 b_flag &= ~MSGMARKNEXT; 8567 b_flag |= MSGNOTMARKNEXT; 8568 stp->sd_flag &= ~STRNOTATMARK; 8569 } 8570 *flagp = (unsigned short) b_flag; 8571 } 8572 8573 #ifdef DEBUG 8574 /* 8575 * Make sure that the flags are not messed up. 8576 */ 8577 { 8578 mblk_t *mp; 8579 mp = q->q_last; 8580 while (mp != NULL) { 8581 ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 8582 (MSGMARKNEXT|MSGNOTMARKNEXT)); 8583 mp = mp->b_cont; 8584 } 8585 } 8586 #endif 8587 if (q->q_first == bp) { 8588 short pollevents; 8589 8590 if (stp->sd_flag & RSLEEP) { 8591 stp->sd_flag &= ~RSLEEP; 8592 cv_broadcast(&q->q_wait); 8593 } 8594 if (stp->sd_flag & STRPRI) { 8595 pollevents = POLLPRI; 8596 } else { 8597 if (band == 0) { 8598 if (!(stp->sd_rput_opt & SR_POLLIN)) 8599 return; 8600 stp->sd_rput_opt &= ~SR_POLLIN; 8601 pollevents = POLLIN | POLLRDNORM; 8602 } else { 8603 pollevents = POLLIN | POLLRDBAND; 8604 } 8605 } 8606 mutex_exit(&stp->sd_lock); 8607 pollwakeup(&stp->sd_pollist, pollevents); 8608 mutex_enter(&stp->sd_lock); 8609 } 8610 } 8611 8612 /* 8613 * Return the held vnode attached to the stream head of a 8614 * given queue 8615 * It is the responsibility of the calling routine to ensure 8616 * that the queue does not go away (e.g. pop). 8617 */ 8618 vnode_t * 8619 strq2vp(queue_t *qp) 8620 { 8621 vnode_t *vp; 8622 vp = STREAM(qp)->sd_vnode; 8623 ASSERT(vp != NULL); 8624 VN_HOLD(vp); 8625 return (vp); 8626 } 8627 8628 /* 8629 * return the stream head write queue for the given vp 8630 * It is the responsibility of the calling routine to ensure 8631 * that the stream or vnode do not close. 8632 */ 8633 queue_t * 8634 strvp2wq(vnode_t *vp) 8635 { 8636 ASSERT(vp->v_stream != NULL); 8637 return (vp->v_stream->sd_wrq); 8638 } 8639 8640 /* 8641 * pollwakeup stream head 8642 * It is the responsibility of the calling routine to ensure 8643 * that the stream or vnode do not close. 8644 */ 8645 void 8646 strpollwakeup(vnode_t *vp, short event) 8647 { 8648 ASSERT(vp->v_stream); 8649 pollwakeup(&vp->v_stream->sd_pollist, event); 8650 } 8651 8652 /* 8653 * Mate the stream heads of two vnodes together. If the two vnodes are the 8654 * same, we just make the write-side point at the read-side -- otherwise, 8655 * we do a full mate. Only works on vnodes associated with streams that are 8656 * still being built and thus have only a stream head. 8657 */ 8658 void 8659 strmate(vnode_t *vp1, vnode_t *vp2) 8660 { 8661 queue_t *wrq1 = strvp2wq(vp1); 8662 queue_t *wrq2 = strvp2wq(vp2); 8663 8664 /* 8665 * Verify that there are no modules on the stream yet. We also 8666 * rely on the stream head always having a service procedure to 8667 * avoid tweaking q_nfsrv. 8668 */ 8669 ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL); 8670 ASSERT(wrq1->q_qinfo->qi_srvp != NULL); 8671 ASSERT(wrq2->q_qinfo->qi_srvp != NULL); 8672 8673 /* 8674 * If the queues are the same, just twist; otherwise do a full mate. 8675 */ 8676 if (wrq1 == wrq2) { 8677 wrq1->q_next = _RD(wrq1); 8678 } else { 8679 wrq1->q_next = _RD(wrq2); 8680 wrq2->q_next = _RD(wrq1); 8681 STREAM(wrq1)->sd_mate = STREAM(wrq2); 8682 STREAM(wrq1)->sd_flag |= STRMATE; 8683 STREAM(wrq2)->sd_mate = STREAM(wrq1); 8684 STREAM(wrq2)->sd_flag |= STRMATE; 8685 } 8686 } 8687 8688 /* 8689 * XXX will go away when console is correctly fixed. 8690 * Clean up the console PIDS, from previous I_SETSIG, 8691 * called only for cnopen which never calls strclean(). 8692 */ 8693 void 8694 str_cn_clean(struct vnode *vp) 8695 { 8696 strsig_t *ssp, *pssp, *tssp; 8697 struct stdata *stp; 8698 struct pid *pidp; 8699 int update = 0; 8700 8701 ASSERT(vp->v_stream); 8702 stp = vp->v_stream; 8703 pssp = NULL; 8704 mutex_enter(&stp->sd_lock); 8705 ssp = stp->sd_siglist; 8706 while (ssp) { 8707 mutex_enter(&pidlock); 8708 pidp = ssp->ss_pidp; 8709 /* 8710 * Get rid of PID if the proc is gone. 8711 */ 8712 if (pidp->pid_prinactive) { 8713 tssp = ssp->ss_next; 8714 if (pssp) 8715 pssp->ss_next = tssp; 8716 else 8717 stp->sd_siglist = tssp; 8718 ASSERT(pidp->pid_ref <= 1); 8719 PID_RELE(ssp->ss_pidp); 8720 mutex_exit(&pidlock); 8721 kmem_free(ssp, sizeof (strsig_t)); 8722 update = 1; 8723 ssp = tssp; 8724 continue; 8725 } else 8726 mutex_exit(&pidlock); 8727 pssp = ssp; 8728 ssp = ssp->ss_next; 8729 } 8730 if (update) { 8731 stp->sd_sigflags = 0; 8732 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 8733 stp->sd_sigflags |= ssp->ss_events; 8734 } 8735 mutex_exit(&stp->sd_lock); 8736 } 8737 8738 /* 8739 * Return B_TRUE if there is data in the message, B_FALSE otherwise. 8740 */ 8741 static boolean_t 8742 msghasdata(mblk_t *bp) 8743 { 8744 for (; bp; bp = bp->b_cont) 8745 if (bp->b_datap->db_type == M_DATA) { 8746 ASSERT(bp->b_wptr >= bp->b_rptr); 8747 if (bp->b_wptr > bp->b_rptr) 8748 return (B_TRUE); 8749 } 8750 return (B_FALSE); 8751 } 8752 8753 /* 8754 * Called on the first strget() of a sodirect/uioa enabled streamhead, 8755 * if any mblk_t(s) enqueued they must first be uioamove()d before uioa 8756 * can be enabled for the underlying transport's use. 8757 */ 8758 void 8759 struioainit(queue_t *q, sodirect_t *sodp, uio_t *uiop) 8760 { 8761 uioa_t *uioap = (uioa_t *)uiop; 8762 mblk_t *bp = q->q_first; 8763 mblk_t *lbp = NULL; 8764 mblk_t *nbp, *wbp; 8765 int len; 8766 int error; 8767 8768 ASSERT(MUTEX_HELD(sodp->sod_lock)); 8769 ASSERT(&sodp->sod_uioa == uioap); 8770 8771 /* 8772 * Walk the b_next/b_prev doubly linked list of b_cont chain(s) 8773 * and schedule any M_DATA mblk_t's for uio asynchronous move. 8774 */ 8775 do { 8776 /* Next mblk_t chain */ 8777 nbp = bp->b_next; 8778 /* Walk the chain */ 8779 wbp = bp; 8780 do { 8781 if (wbp->b_datap->db_type != M_DATA) { 8782 /* Not M_DATA, no more uioa */ 8783 goto nouioa; 8784 } 8785 if ((len = wbp->b_wptr - wbp->b_rptr) > 0) { 8786 /* Have a M_DATA mblk_t with data */ 8787 if (len > uioap->uio_resid) { 8788 /* Not enough uio sapce */ 8789 goto nouioa; 8790 } 8791 error = uioamove(wbp->b_rptr, len, 8792 UIO_READ, uioap); 8793 if (!error) { 8794 /* Scheduled, mark dblk_t as such */ 8795 wbp->b_datap->db_flags |= DBLK_UIOA; 8796 } else { 8797 /* Error of some sort, no more uioa */ 8798 uioap->uioa_state &= UIOA_CLR; 8799 uioap->uioa_state |= UIOA_FINI; 8800 return; 8801 } 8802 } 8803 /* Save last wbp processed */ 8804 lbp = wbp; 8805 } while ((wbp = wbp->b_cont) != NULL); 8806 } while ((bp = nbp) != NULL); 8807 8808 return; 8809 8810 nouioa: 8811 /* No more uioa */ 8812 uioap->uioa_state &= UIOA_CLR; 8813 uioap->uioa_state |= UIOA_FINI; 8814 8815 /* 8816 * If we processed 1 or more mblk_t(s) then we need to split the 8817 * current mblk_t chain in 2 so that all the uioamove()ed mblk_t(s) 8818 * are in the current chain and the rest are in the following new 8819 * chain. 8820 */ 8821 if (lbp != NULL) { 8822 /* New end of current chain */ 8823 lbp->b_cont = NULL; 8824 8825 /* Insert new chain wbp after bp */ 8826 if ((wbp->b_next = nbp) != NULL) 8827 nbp->b_prev = wbp; 8828 else 8829 q->q_last = wbp; 8830 wbp->b_prev = bp; 8831 bp->b_next = wbp; 8832 } 8833 } 8834