1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/sysmacros.h> 34 #include <sys/param.h> 35 #include <sys/errno.h> 36 #include <sys/signal.h> 37 #include <sys/stat.h> 38 #include <sys/proc.h> 39 #include <sys/cred.h> 40 #include <sys/user.h> 41 #include <sys/vnode.h> 42 #include <sys/file.h> 43 #include <sys/stream.h> 44 #include <sys/strsubr.h> 45 #include <sys/stropts.h> 46 #include <sys/tihdr.h> 47 #include <sys/var.h> 48 #include <sys/poll.h> 49 #include <sys/termio.h> 50 #include <sys/ttold.h> 51 #include <sys/systm.h> 52 #include <sys/uio.h> 53 #include <sys/cmn_err.h> 54 #include <sys/sad.h> 55 #include <sys/netstack.h> 56 #include <sys/priocntl.h> 57 #include <sys/jioctl.h> 58 #include <sys/procset.h> 59 #include <sys/session.h> 60 #include <sys/kmem.h> 61 #include <sys/filio.h> 62 #include <sys/vtrace.h> 63 #include <sys/debug.h> 64 #include <sys/strredir.h> 65 #include <sys/fs/fifonode.h> 66 #include <sys/fs/snode.h> 67 #include <sys/strlog.h> 68 #include <sys/strsun.h> 69 #include <sys/project.h> 70 #include <sys/kbio.h> 71 #include <sys/msio.h> 72 #include <sys/tty.h> 73 #include <sys/ptyvar.h> 74 #include <sys/vuid_event.h> 75 #include <sys/modctl.h> 76 #include <sys/sunddi.h> 77 #include <sys/sunldi_impl.h> 78 #include <sys/autoconf.h> 79 #include <sys/policy.h> 80 #include <sys/zone.h> 81 82 83 /* 84 * This define helps improve the readability of streams code while 85 * still maintaining a very old streams performance enhancement. The 86 * performance enhancement basically involved having all callers 87 * of straccess() perform the first check that straccess() will do 88 * locally before actually calling straccess(). (There by reducing 89 * the number of unnecessary calls to straccess().) 90 */ 91 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \ 92 (stp->sd_vnode->v_type == VFIFO) ? 0 : \ 93 straccess((x), (y))) 94 95 /* 96 * what is mblk_pull_len? 97 * 98 * If a streams message consists of many short messages, 99 * a performance degradation occurs from copyout overhead. 100 * To decrease the per mblk overhead, messages that are 101 * likely to consist of many small mblks are pulled up into 102 * one continuous chunk of memory. 103 * 104 * To avoid the processing overhead of examining every 105 * mblk, a quick heuristic is used. If the first mblk in 106 * the message is shorter than mblk_pull_len, it is likely 107 * that the rest of the mblk will be short. 108 * 109 * This heuristic was decided upon after performance tests 110 * indicated that anything more complex slowed down the main 111 * code path. 112 */ 113 #define MBLK_PULL_LEN 64 114 uint32_t mblk_pull_len = MBLK_PULL_LEN; 115 116 /* 117 * The sgttyb_handling flag controls the handling of the old BSD 118 * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows: 119 * 120 * 0 - Emit no warnings at all and retain old, broken behavior. 121 * 1 - Emit no warnings and silently handle new semantics. 122 * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used 123 * (once per system invocation). Handle with new semantics. 124 * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is 125 * made (so that offenders drop core and are easy to debug). 126 * 127 * The "new semantics" are that TIOCGETP returns B38400 for 128 * sg_[io]speed if the corresponding value is over B38400, and that 129 * TIOCSET[PN] accept B38400 in these cases to mean "retain current 130 * bit rate." 131 */ 132 int sgttyb_handling = 1; 133 static boolean_t sgttyb_complaint; 134 135 /* don't push drcompat module by default on Style-2 streams */ 136 static int push_drcompat = 0; 137 138 /* 139 * id value used to distinguish between different ioctl messages 140 */ 141 static uint32_t ioc_id; 142 143 static void putback(struct stdata *, queue_t *, mblk_t *, int); 144 static void strcleanall(struct vnode *); 145 static int strwsrv(queue_t *); 146 147 /* 148 * qinit and module_info structures for stream head read and write queues 149 */ 150 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW }; 151 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 }; 152 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info }; 153 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info }; 154 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT, 155 FIFOLOWAT }; 156 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 }; 157 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info }; 158 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info }; 159 160 extern kmutex_t strresources; /* protects global resources */ 161 extern kmutex_t muxifier; /* single-threads multiplexor creation */ 162 163 static boolean_t msghasdata(mblk_t *bp); 164 #define msgnodata(bp) (!msghasdata(bp)) 165 166 /* 167 * Stream head locking notes: 168 * There are four monitors associated with the stream head: 169 * 1. v_stream monitor: in stropen() and strclose() v_lock 170 * is held while the association of vnode and stream 171 * head is established or tested for. 172 * 2. open/close/push/pop monitor: sd_lock is held while each 173 * thread bids for exclusive access to this monitor 174 * for opening or closing a stream. In addition, this 175 * monitor is entered during pushes and pops. This 176 * guarantees that during plumbing operations there 177 * is only one thread trying to change the plumbing. 178 * Any other threads present in the stream are only 179 * using the plumbing. 180 * 3. read/write monitor: in the case of read, a thread holds 181 * sd_lock while trying to get data from the stream 182 * head queue. if there is none to fulfill a read 183 * request, it sets RSLEEP and calls cv_wait_sig() down 184 * in strwaitq() to await the arrival of new data. 185 * when new data arrives in strrput(), sd_lock is acquired 186 * before testing for RSLEEP and calling cv_broadcast(). 187 * the behavior of strwrite(), strwsrv(), and WSLEEP 188 * mirror this. 189 * 4. ioctl monitor: sd_lock is gotten to ensure that only one 190 * thread is doing an ioctl at a time. 191 */ 192 193 static int 194 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name, 195 int anchor, cred_t *crp, uint_t anchor_zoneid) 196 { 197 int error; 198 fmodsw_impl_t *fp; 199 200 if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) { 201 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO; 202 return (error); 203 } 204 if (stp->sd_pushcnt >= nstrpush) { 205 return (EINVAL); 206 } 207 208 if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) { 209 stp->sd_flag |= STREOPENFAIL; 210 return (EINVAL); 211 } 212 213 /* 214 * push new module and call its open routine via qattach 215 */ 216 if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0) 217 return (error); 218 219 /* 220 * Check to see if caller wants a STREAMS anchor 221 * put at this place in the stream, and add if so. 222 */ 223 mutex_enter(&stp->sd_lock); 224 if (anchor == stp->sd_pushcnt) { 225 stp->sd_anchor = stp->sd_pushcnt; 226 stp->sd_anchorzone = anchor_zoneid; 227 } 228 mutex_exit(&stp->sd_lock); 229 230 return (0); 231 } 232 233 /* 234 * Open a stream device. 235 */ 236 int 237 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp) 238 { 239 struct stdata *stp; 240 queue_t *qp; 241 int s; 242 dev_t dummydev; 243 struct autopush *ap; 244 int error = 0; 245 ssize_t rmin, rmax; 246 int cloneopen; 247 queue_t *brq; 248 major_t major; 249 str_stack_t *ss; 250 zoneid_t zoneid; 251 uint_t anchor; 252 253 if (audit_active) 254 audit_stropen(vp, devp, flag, crp); 255 256 /* 257 * If the stream already exists, wait for any open in progress 258 * to complete, then call the open function of each module and 259 * driver in the stream. Otherwise create the stream. 260 */ 261 TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp); 262 retry: 263 mutex_enter(&vp->v_lock); 264 if ((stp = vp->v_stream) != NULL) { 265 266 /* 267 * Waiting for stream to be created to device 268 * due to another open. 269 */ 270 mutex_exit(&vp->v_lock); 271 272 if (STRMATED(stp)) { 273 struct stdata *strmatep = stp->sd_mate; 274 275 STRLOCKMATES(stp); 276 if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 277 if (flag & (FNDELAY|FNONBLOCK)) { 278 error = EAGAIN; 279 mutex_exit(&strmatep->sd_lock); 280 goto ckreturn; 281 } 282 mutex_exit(&stp->sd_lock); 283 if (!cv_wait_sig(&strmatep->sd_monitor, 284 &strmatep->sd_lock)) { 285 error = EINTR; 286 mutex_exit(&strmatep->sd_lock); 287 mutex_enter(&stp->sd_lock); 288 goto ckreturn; 289 } 290 mutex_exit(&strmatep->sd_lock); 291 goto retry; 292 } 293 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 294 if (flag & (FNDELAY|FNONBLOCK)) { 295 error = EAGAIN; 296 mutex_exit(&strmatep->sd_lock); 297 goto ckreturn; 298 } 299 mutex_exit(&strmatep->sd_lock); 300 if (!cv_wait_sig(&stp->sd_monitor, 301 &stp->sd_lock)) { 302 error = EINTR; 303 goto ckreturn; 304 } 305 mutex_exit(&stp->sd_lock); 306 goto retry; 307 } 308 309 if (stp->sd_flag & (STRDERR|STWRERR)) { 310 error = EIO; 311 mutex_exit(&strmatep->sd_lock); 312 goto ckreturn; 313 } 314 315 stp->sd_flag |= STWOPEN; 316 STRUNLOCKMATES(stp); 317 } else { 318 mutex_enter(&stp->sd_lock); 319 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 320 if (flag & (FNDELAY|FNONBLOCK)) { 321 error = EAGAIN; 322 goto ckreturn; 323 } 324 if (!cv_wait_sig(&stp->sd_monitor, 325 &stp->sd_lock)) { 326 error = EINTR; 327 goto ckreturn; 328 } 329 mutex_exit(&stp->sd_lock); 330 goto retry; /* could be clone! */ 331 } 332 333 if (stp->sd_flag & (STRDERR|STWRERR)) { 334 error = EIO; 335 goto ckreturn; 336 } 337 338 stp->sd_flag |= STWOPEN; 339 mutex_exit(&stp->sd_lock); 340 } 341 342 /* 343 * Open all modules and devices down stream to notify 344 * that another user is streaming. For modules, set the 345 * last argument to MODOPEN and do not pass any open flags. 346 * Ignore dummydev since this is not the first open. 347 */ 348 claimstr(stp->sd_wrq); 349 qp = stp->sd_wrq; 350 while (_SAMESTR(qp)) { 351 qp = qp->q_next; 352 if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0) 353 break; 354 } 355 releasestr(stp->sd_wrq); 356 mutex_enter(&stp->sd_lock); 357 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR); 358 stp->sd_rerror = 0; 359 stp->sd_werror = 0; 360 ckreturn: 361 cv_broadcast(&stp->sd_monitor); 362 mutex_exit(&stp->sd_lock); 363 return (error); 364 } 365 366 /* 367 * This vnode isn't streaming. SPECFS already 368 * checked for multiple vnodes pointing to the 369 * same stream, so create a stream to the driver. 370 */ 371 qp = allocq(); 372 stp = shalloc(qp); 373 374 /* 375 * Initialize stream head. shalloc() has given us 376 * exclusive access, and we have the vnode locked; 377 * we can do whatever we want with stp. 378 */ 379 stp->sd_flag = STWOPEN; 380 stp->sd_siglist = NULL; 381 stp->sd_pollist.ph_list = NULL; 382 stp->sd_sigflags = 0; 383 stp->sd_mark = NULL; 384 stp->sd_closetime = STRTIMOUT; 385 stp->sd_sidp = NULL; 386 stp->sd_pgidp = NULL; 387 stp->sd_vnode = vp; 388 stp->sd_rerror = 0; 389 stp->sd_werror = 0; 390 stp->sd_wroff = 0; 391 stp->sd_tail = 0; 392 stp->sd_iocblk = NULL; 393 stp->sd_pushcnt = 0; 394 stp->sd_qn_minpsz = 0; 395 stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */ 396 stp->sd_maxblk = INFPSZ; 397 qp->q_ptr = _WR(qp)->q_ptr = stp; 398 STREAM(qp) = STREAM(_WR(qp)) = stp; 399 vp->v_stream = stp; 400 mutex_exit(&vp->v_lock); 401 if (vp->v_type == VFIFO) { 402 stp->sd_flag |= OLDNDELAY; 403 /* 404 * This means, both for pipes and fifos 405 * strwrite will send SIGPIPE if the other 406 * end is closed. For putmsg it depends 407 * on whether it is a XPG4_2 application 408 * or not 409 */ 410 stp->sd_wput_opt = SW_SIGPIPE; 411 412 /* setq might sleep in kmem_alloc - avoid holding locks. */ 413 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE, 414 SQ_CI|SQ_CO, B_FALSE); 415 416 set_qend(qp); 417 stp->sd_strtab = fifo_getinfo(); 418 _WR(qp)->q_nfsrv = _WR(qp); 419 qp->q_nfsrv = qp; 420 /* 421 * Wake up others that are waiting for stream to be created. 422 */ 423 mutex_enter(&stp->sd_lock); 424 /* 425 * nothing is be pushed on stream yet, so 426 * optimized stream head packetsizes are just that 427 * of the read queue 428 */ 429 stp->sd_qn_minpsz = qp->q_minpsz; 430 stp->sd_qn_maxpsz = qp->q_maxpsz; 431 stp->sd_flag &= ~STWOPEN; 432 goto fifo_opendone; 433 } 434 /* setq might sleep in kmem_alloc - avoid holding locks. */ 435 setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE); 436 437 set_qend(qp); 438 439 /* 440 * Open driver and create stream to it (via qattach). 441 */ 442 cloneopen = (getmajor(*devp) == clone_major); 443 if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) { 444 mutex_enter(&vp->v_lock); 445 vp->v_stream = NULL; 446 mutex_exit(&vp->v_lock); 447 mutex_enter(&stp->sd_lock); 448 cv_broadcast(&stp->sd_monitor); 449 mutex_exit(&stp->sd_lock); 450 freeq(_RD(qp)); 451 shfree(stp); 452 return (error); 453 } 454 /* 455 * Set sd_strtab after open in order to handle clonable drivers 456 */ 457 stp->sd_strtab = STREAMSTAB(getmajor(*devp)); 458 459 /* 460 * Historical note: dummydev used to be be prior to the initial 461 * open (via qattach above), which made the value seen 462 * inconsistent between an I_PUSH and an autopush of a module. 463 */ 464 dummydev = *devp; 465 466 /* 467 * For clone open of old style (Q not associated) network driver, 468 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH 469 */ 470 brq = _RD(_WR(qp)->q_next); 471 major = getmajor(*devp); 472 if (push_drcompat && cloneopen && NETWORK_DRV(major) && 473 ((brq->q_flag & _QASSOCIATED) == 0)) { 474 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0) 475 cmn_err(CE_WARN, "cannot push " DRMODNAME 476 " streams module"); 477 } 478 479 /* 480 * Check for autopush. Start with the global zone. If not found 481 * check in the local zone. 482 */ 483 zoneid = GLOBAL_ZONEID; 484 retryap: 485 ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))-> 486 netstack_str; 487 if ((ap = sad_ap_find_by_dev(*devp, ss)) == NULL) { 488 netstack_rele(ss->ss_netstack); 489 if (zoneid == GLOBAL_ZONEID) { 490 /* 491 * None found. Also look in the zone's autopush table. 492 */ 493 zoneid = crgetzoneid(crp); 494 if (zoneid != GLOBAL_ZONEID) 495 goto retryap; 496 } 497 goto opendone; 498 } 499 anchor = ap->ap_anchor; 500 zoneid = crgetzoneid(crp); 501 for (s = 0; s < ap->ap_npush; s++) { 502 error = push_mod(qp, &dummydev, stp, ap->ap_list[s], 503 anchor, crp, zoneid); 504 if (error != 0) 505 break; 506 } 507 sad_ap_rele(ap, ss); 508 netstack_rele(ss->ss_netstack); 509 510 /* 511 * let specfs know that open failed part way through 512 */ 513 if (error) { 514 mutex_enter(&stp->sd_lock); 515 stp->sd_flag |= STREOPENFAIL; 516 mutex_exit(&stp->sd_lock); 517 } 518 519 opendone: 520 521 /* 522 * Wake up others that are waiting for stream to be created. 523 */ 524 mutex_enter(&stp->sd_lock); 525 stp->sd_flag &= ~STWOPEN; 526 527 /* 528 * As a performance concern we are caching the values of 529 * q_minpsz and q_maxpsz of the module below the stream 530 * head in the stream head. 531 */ 532 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 533 rmin = stp->sd_wrq->q_next->q_minpsz; 534 rmax = stp->sd_wrq->q_next->q_maxpsz; 535 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 536 537 /* do this processing here as a performance concern */ 538 if (strmsgsz != 0) { 539 if (rmax == INFPSZ) 540 rmax = strmsgsz; 541 else 542 rmax = MIN(strmsgsz, rmax); 543 } 544 545 mutex_enter(QLOCK(stp->sd_wrq)); 546 stp->sd_qn_minpsz = rmin; 547 stp->sd_qn_maxpsz = rmax; 548 mutex_exit(QLOCK(stp->sd_wrq)); 549 550 fifo_opendone: 551 cv_broadcast(&stp->sd_monitor); 552 mutex_exit(&stp->sd_lock); 553 return (error); 554 } 555 556 static int strsink(queue_t *, mblk_t *); 557 static struct qinit deadrend = { 558 strsink, NULL, NULL, NULL, NULL, &strm_info, NULL 559 }; 560 static struct qinit deadwend = { 561 NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL 562 }; 563 564 /* 565 * Close a stream. 566 * This is called from closef() on the last close of an open stream. 567 * Strclean() will already have removed the siglist and pollist 568 * information, so all that remains is to remove all multiplexor links 569 * for the stream, pop all the modules (and the driver), and free the 570 * stream structure. 571 */ 572 573 int 574 strclose(struct vnode *vp, int flag, cred_t *crp) 575 { 576 struct stdata *stp; 577 queue_t *qp; 578 int rval; 579 int freestp = 1; 580 queue_t *rmq; 581 582 if (audit_active) 583 audit_strclose(vp, flag, crp); 584 585 TRACE_1(TR_FAC_STREAMS_FR, 586 TR_STRCLOSE, "strclose:%p", vp); 587 ASSERT(vp->v_stream); 588 589 stp = vp->v_stream; 590 ASSERT(!(stp->sd_flag & STPLEX)); 591 qp = stp->sd_wrq; 592 593 /* 594 * Needed so that strpoll will return non-zero for this fd. 595 * Note that with POLLNOERR STRHUP does still cause POLLHUP. 596 */ 597 mutex_enter(&stp->sd_lock); 598 stp->sd_flag |= STRHUP; 599 mutex_exit(&stp->sd_lock); 600 601 /* 602 * If the registered process or process group did not have an 603 * open instance of this stream then strclean would not be 604 * called. Thus at the time of closing all remaining siglist entries 605 * are removed. 606 */ 607 if (stp->sd_siglist != NULL) 608 strcleanall(vp); 609 610 ASSERT(stp->sd_siglist == NULL); 611 ASSERT(stp->sd_sigflags == 0); 612 613 if (STRMATED(stp)) { 614 struct stdata *strmatep = stp->sd_mate; 615 int waited = 1; 616 617 STRLOCKMATES(stp); 618 while (waited) { 619 waited = 0; 620 while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) { 621 mutex_exit(&strmatep->sd_lock); 622 cv_wait(&stp->sd_monitor, &stp->sd_lock); 623 mutex_exit(&stp->sd_lock); 624 STRLOCKMATES(stp); 625 waited = 1; 626 } 627 while (strmatep->sd_flag & 628 (STWOPEN|STRCLOSE|STRPLUMB)) { 629 mutex_exit(&stp->sd_lock); 630 cv_wait(&strmatep->sd_monitor, 631 &strmatep->sd_lock); 632 mutex_exit(&strmatep->sd_lock); 633 STRLOCKMATES(stp); 634 waited = 1; 635 } 636 } 637 stp->sd_flag |= STRCLOSE; 638 STRUNLOCKMATES(stp); 639 } else { 640 mutex_enter(&stp->sd_lock); 641 stp->sd_flag |= STRCLOSE; 642 mutex_exit(&stp->sd_lock); 643 } 644 645 ASSERT(qp->q_first == NULL); /* No more delayed write */ 646 647 /* Check if an I_LINK was ever done on this stream */ 648 if (stp->sd_flag & STRHASLINKS) { 649 netstack_t *ns; 650 str_stack_t *ss; 651 652 ns = netstack_find_by_cred(crp); 653 ASSERT(ns != NULL); 654 ss = ns->netstack_str; 655 ASSERT(ss != NULL); 656 657 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss); 658 netstack_rele(ss->ss_netstack); 659 } 660 661 while (_SAMESTR(qp)) { 662 /* 663 * Holding sd_lock prevents q_next from changing in 664 * this stream. 665 */ 666 mutex_enter(&stp->sd_lock); 667 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) { 668 669 /* 670 * sleep until awakened by strwsrv() or timeout 671 */ 672 for (;;) { 673 mutex_enter(QLOCK(qp->q_next)); 674 if (!(qp->q_next->q_mblkcnt)) { 675 mutex_exit(QLOCK(qp->q_next)); 676 break; 677 } 678 stp->sd_flag |= WSLEEP; 679 680 /* ensure strwsrv gets enabled */ 681 qp->q_next->q_flag |= QWANTW; 682 mutex_exit(QLOCK(qp->q_next)); 683 /* get out if we timed out or recv'd a signal */ 684 if (str_cv_wait(&qp->q_wait, &stp->sd_lock, 685 stp->sd_closetime, 0) <= 0) { 686 break; 687 } 688 } 689 stp->sd_flag &= ~WSLEEP; 690 } 691 mutex_exit(&stp->sd_lock); 692 693 rmq = qp->q_next; 694 if (rmq->q_flag & QISDRV) { 695 ASSERT(!_SAMESTR(rmq)); 696 wait_sq_svc(_RD(qp)->q_syncq); 697 } 698 699 qdetach(_RD(rmq), 1, flag, crp, B_FALSE); 700 } 701 702 /* 703 * Since we call pollwakeup in close() now, the poll list should 704 * be empty in most cases. The only exception is the layered devices 705 * (e.g. the console drivers with redirection modules pushed on top 706 * of it). We have to do this after calling qdetach() because 707 * the redirection module won't have torn down the console 708 * redirection until after qdetach() has been invoked. 709 */ 710 if (stp->sd_pollist.ph_list != NULL) { 711 pollwakeup(&stp->sd_pollist, POLLERR); 712 pollhead_clean(&stp->sd_pollist); 713 } 714 ASSERT(stp->sd_pollist.ph_list == NULL); 715 ASSERT(stp->sd_sidp == NULL); 716 ASSERT(stp->sd_pgidp == NULL); 717 718 /* Prevent qenable from re-enabling the stream head queue */ 719 disable_svc(_RD(qp)); 720 721 /* 722 * Wait until service procedure of each queue is 723 * run, if QINSERVICE is set. 724 */ 725 wait_svc(_RD(qp)); 726 727 /* 728 * Now, flush both queues. 729 */ 730 flushq(_RD(qp), FLUSHALL); 731 flushq(qp, FLUSHALL); 732 733 /* 734 * If the write queue of the stream head is pointing to a 735 * read queue, we have a twisted stream. If the read queue 736 * is alive, convert the stream head queues into a dead end. 737 * If the read queue is dead, free the dead pair. 738 */ 739 if (qp->q_next && !_SAMESTR(qp)) { 740 if (qp->q_next->q_qinfo == &deadrend) { /* half-closed pipe */ 741 flushq(qp->q_next, FLUSHALL); /* ensure no message */ 742 shfree(qp->q_next->q_stream); 743 freeq(qp->q_next); 744 freeq(_RD(qp)); 745 } else if (qp->q_next == _RD(qp)) { /* fifo */ 746 freeq(_RD(qp)); 747 } else { /* pipe */ 748 freestp = 0; 749 /* 750 * The q_info pointers are never accessed when 751 * SQLOCK is held. 752 */ 753 ASSERT(qp->q_syncq == _RD(qp)->q_syncq); 754 mutex_enter(SQLOCK(qp->q_syncq)); 755 qp->q_qinfo = &deadwend; 756 _RD(qp)->q_qinfo = &deadrend; 757 mutex_exit(SQLOCK(qp->q_syncq)); 758 } 759 } else { 760 freeq(_RD(qp)); /* free stream head queue pair */ 761 } 762 763 mutex_enter(&vp->v_lock); 764 if (stp->sd_iocblk) { 765 if (stp->sd_iocblk != (mblk_t *)-1) { 766 freemsg(stp->sd_iocblk); 767 } 768 stp->sd_iocblk = NULL; 769 } 770 stp->sd_vnode = NULL; 771 vp->v_stream = NULL; 772 mutex_exit(&vp->v_lock); 773 mutex_enter(&stp->sd_lock); 774 stp->sd_flag &= ~STRCLOSE; 775 cv_broadcast(&stp->sd_monitor); 776 mutex_exit(&stp->sd_lock); 777 778 if (freestp) 779 shfree(stp); 780 return (0); 781 } 782 783 static int 784 strsink(queue_t *q, mblk_t *bp) 785 { 786 struct copyresp *resp; 787 788 switch (bp->b_datap->db_type) { 789 case M_FLUSH: 790 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 791 *bp->b_rptr &= ~FLUSHR; 792 bp->b_flag |= MSGNOLOOP; 793 /* 794 * Protect against the driver passing up 795 * messages after it has done a qprocsoff. 796 */ 797 if (_OTHERQ(q)->q_next == NULL) 798 freemsg(bp); 799 else 800 qreply(q, bp); 801 } else { 802 freemsg(bp); 803 } 804 break; 805 806 case M_COPYIN: 807 case M_COPYOUT: 808 if (bp->b_cont) { 809 freemsg(bp->b_cont); 810 bp->b_cont = NULL; 811 } 812 bp->b_datap->db_type = M_IOCDATA; 813 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 814 resp = (struct copyresp *)bp->b_rptr; 815 resp->cp_rval = (caddr_t)1; /* failure */ 816 /* 817 * Protect against the driver passing up 818 * messages after it has done a qprocsoff. 819 */ 820 if (_OTHERQ(q)->q_next == NULL) 821 freemsg(bp); 822 else 823 qreply(q, bp); 824 break; 825 826 case M_IOCTL: 827 if (bp->b_cont) { 828 freemsg(bp->b_cont); 829 bp->b_cont = NULL; 830 } 831 bp->b_datap->db_type = M_IOCNAK; 832 /* 833 * Protect against the driver passing up 834 * messages after it has done a qprocsoff. 835 */ 836 if (_OTHERQ(q)->q_next == NULL) 837 freemsg(bp); 838 else 839 qreply(q, bp); 840 break; 841 842 default: 843 freemsg(bp); 844 break; 845 } 846 847 return (0); 848 } 849 850 /* 851 * Clean up after a process when it closes a stream. This is called 852 * from closef for all closes, whereas strclose is called only for the 853 * last close on a stream. The siglist is scanned for entries for the 854 * current process, and these are removed. 855 */ 856 void 857 strclean(struct vnode *vp) 858 { 859 strsig_t *ssp, *pssp, *tssp; 860 stdata_t *stp; 861 int update = 0; 862 863 TRACE_1(TR_FAC_STREAMS_FR, 864 TR_STRCLEAN, "strclean:%p", vp); 865 stp = vp->v_stream; 866 pssp = NULL; 867 mutex_enter(&stp->sd_lock); 868 ssp = stp->sd_siglist; 869 while (ssp) { 870 if (ssp->ss_pidp == curproc->p_pidp) { 871 tssp = ssp->ss_next; 872 if (pssp) 873 pssp->ss_next = tssp; 874 else 875 stp->sd_siglist = tssp; 876 mutex_enter(&pidlock); 877 PID_RELE(ssp->ss_pidp); 878 mutex_exit(&pidlock); 879 kmem_free(ssp, sizeof (strsig_t)); 880 update = 1; 881 ssp = tssp; 882 } else { 883 pssp = ssp; 884 ssp = ssp->ss_next; 885 } 886 } 887 if (update) { 888 stp->sd_sigflags = 0; 889 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 890 stp->sd_sigflags |= ssp->ss_events; 891 } 892 mutex_exit(&stp->sd_lock); 893 } 894 895 /* 896 * Used on the last close to remove any remaining items on the siglist. 897 * These could be present on the siglist due to I_ESETSIG calls that 898 * use process groups or processed that do not have an open file descriptor 899 * for this stream (Such entries would not be removed by strclean). 900 */ 901 static void 902 strcleanall(struct vnode *vp) 903 { 904 strsig_t *ssp, *nssp; 905 stdata_t *stp; 906 907 stp = vp->v_stream; 908 mutex_enter(&stp->sd_lock); 909 ssp = stp->sd_siglist; 910 stp->sd_siglist = NULL; 911 while (ssp) { 912 nssp = ssp->ss_next; 913 mutex_enter(&pidlock); 914 PID_RELE(ssp->ss_pidp); 915 mutex_exit(&pidlock); 916 kmem_free(ssp, sizeof (strsig_t)); 917 ssp = nssp; 918 } 919 stp->sd_sigflags = 0; 920 mutex_exit(&stp->sd_lock); 921 } 922 923 /* 924 * Retrieve the next message from the logical stream head read queue 925 * using either rwnext (if sync stream) or getq_noenab. 926 * It is the callers responsibility to call qbackenable after 927 * it is finished with the message. The caller should not call 928 * qbackenable until after any putback calls to avoid spurious backenabling. 929 */ 930 mblk_t * 931 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first, 932 int *errorp) 933 { 934 mblk_t *bp; 935 int error; 936 937 ASSERT(MUTEX_HELD(&stp->sd_lock)); 938 /* Holding sd_lock prevents the read queue from changing */ 939 940 if (uiop != NULL && stp->sd_struiordq != NULL && 941 q->q_first == NULL && 942 (!first || (stp->sd_wakeq & RSLEEP))) { 943 /* 944 * Stream supports rwnext() for the read side. 945 * If this is the first time we're called by e.g. strread 946 * only do the downcall if there is a deferred wakeup 947 * (registered in sd_wakeq). 948 */ 949 struiod_t uiod; 950 951 if (first) 952 stp->sd_wakeq &= ~RSLEEP; 953 954 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 955 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 956 uiod.d_mp = 0; 957 /* 958 * Mark that a thread is in rwnext on the read side 959 * to prevent strrput from nacking ioctls immediately. 960 * When the last concurrent rwnext returns 961 * the ioctls are nack'ed. 962 */ 963 ASSERT(MUTEX_HELD(&stp->sd_lock)); 964 stp->sd_struiodnak++; 965 /* 966 * Note: rwnext will drop sd_lock. 967 */ 968 error = rwnext(q, &uiod); 969 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 970 mutex_enter(&stp->sd_lock); 971 stp->sd_struiodnak--; 972 while (stp->sd_struiodnak == 0 && 973 ((bp = stp->sd_struionak) != NULL)) { 974 stp->sd_struionak = bp->b_next; 975 bp->b_next = NULL; 976 bp->b_datap->db_type = M_IOCNAK; 977 /* 978 * Protect against the driver passing up 979 * messages after it has done a qprocsoff. 980 */ 981 if (_OTHERQ(q)->q_next == NULL) 982 freemsg(bp); 983 else { 984 mutex_exit(&stp->sd_lock); 985 qreply(q, bp); 986 mutex_enter(&stp->sd_lock); 987 } 988 } 989 ASSERT(MUTEX_HELD(&stp->sd_lock)); 990 if (error == 0 || error == EWOULDBLOCK) { 991 if ((bp = uiod.d_mp) != NULL) { 992 *errorp = 0; 993 ASSERT(MUTEX_HELD(&stp->sd_lock)); 994 return (bp); 995 } 996 error = 0; 997 } else if (error == EINVAL) { 998 /* 999 * The stream plumbing must have 1000 * changed while we were away, so 1001 * just turn off rwnext()s. 1002 */ 1003 error = 0; 1004 } else if (error == EBUSY) { 1005 /* 1006 * The module might have data in transit using putnext 1007 * Fall back on waiting + getq. 1008 */ 1009 error = 0; 1010 } else { 1011 *errorp = error; 1012 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1013 return (NULL); 1014 } 1015 /* 1016 * Try a getq in case a rwnext() generated mblk 1017 * has bubbled up via strrput(). 1018 */ 1019 } 1020 *errorp = 0; 1021 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1022 return (getq_noenab(q)); 1023 } 1024 1025 /* 1026 * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'. 1027 * If the message does not fit in the uio the remainder of it is returned; 1028 * otherwise NULL is returned. Any embedded zero-length mblk_t's are 1029 * consumed, even if uio_resid reaches zero. On error, `*errorp' is set to 1030 * the error code, the message is consumed, and NULL is returned. 1031 */ 1032 static mblk_t * 1033 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp) 1034 { 1035 int error; 1036 ptrdiff_t n; 1037 mblk_t *nbp; 1038 1039 ASSERT(bp->b_wptr >= bp->b_rptr); 1040 1041 do { 1042 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) { 1043 ASSERT(n > 0); 1044 1045 error = uiomove(bp->b_rptr, n, UIO_READ, uiop); 1046 if (error != 0) { 1047 freemsg(bp); 1048 *errorp = error; 1049 return (NULL); 1050 } 1051 } 1052 1053 bp->b_rptr += n; 1054 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) { 1055 nbp = bp; 1056 bp = bp->b_cont; 1057 freeb(nbp); 1058 } 1059 } while (bp != NULL && uiop->uio_resid > 0); 1060 1061 *errorp = 0; 1062 return (bp); 1063 } 1064 1065 /* 1066 * Read a stream according to the mode flags in sd_flag: 1067 * 1068 * (default mode) - Byte stream, msg boundaries are ignored 1069 * RD_MSGDIS (msg discard) - Read on msg boundaries and throw away 1070 * any data remaining in msg 1071 * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back 1072 * any remaining data on head of read queue 1073 * 1074 * Consume readable messages on the front of the queue until 1075 * ttolwp(curthread)->lwp_count 1076 * is satisfied, the readable messages are exhausted, or a message 1077 * boundary is reached in a message mode. If no data was read and 1078 * the stream was not opened with the NDELAY flag, block until data arrives. 1079 * Otherwise return the data read and update the count. 1080 * 1081 * In default mode a 0 length message signifies end-of-file and terminates 1082 * a read in progress. The 0 length message is removed from the queue 1083 * only if it is the only message read (no data is read). 1084 * 1085 * An attempt to read an M_PROTO or M_PCPROTO message results in an 1086 * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set. 1087 * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data. 1088 * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message 1089 * are unlinked from and M_DATA blocks in the message, the protos are 1090 * thrown away, and the data is read. 1091 */ 1092 /* ARGSUSED */ 1093 int 1094 strread(struct vnode *vp, struct uio *uiop, cred_t *crp) 1095 { 1096 struct stdata *stp; 1097 mblk_t *bp, *nbp; 1098 queue_t *q; 1099 int error = 0; 1100 uint_t old_sd_flag; 1101 int first; 1102 char rflg; 1103 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 1104 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 1105 short delim; 1106 unsigned char pri = 0; 1107 char waitflag; 1108 unsigned char type; 1109 1110 TRACE_1(TR_FAC_STREAMS_FR, 1111 TR_STRREAD_ENTER, "strread:%p", vp); 1112 ASSERT(vp->v_stream); 1113 stp = vp->v_stream; 1114 1115 mutex_enter(&stp->sd_lock); 1116 1117 if ((error = i_straccess(stp, JCREAD)) != 0) { 1118 mutex_exit(&stp->sd_lock); 1119 return (error); 1120 } 1121 1122 if (stp->sd_flag & (STRDERR|STPLEX)) { 1123 error = strgeterr(stp, STRDERR|STPLEX, 0); 1124 if (error != 0) { 1125 mutex_exit(&stp->sd_lock); 1126 return (error); 1127 } 1128 } 1129 1130 /* 1131 * Loop terminates when uiop->uio_resid == 0. 1132 */ 1133 rflg = 0; 1134 waitflag = READWAIT; 1135 q = _RD(stp->sd_wrq); 1136 for (;;) { 1137 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1138 old_sd_flag = stp->sd_flag; 1139 mark = 0; 1140 delim = 0; 1141 first = 1; 1142 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) { 1143 int done = 0; 1144 1145 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1146 1147 if (error != 0) 1148 goto oops; 1149 1150 if (stp->sd_flag & (STRHUP|STREOF)) { 1151 goto oops; 1152 } 1153 if (rflg && !(stp->sd_flag & STRDELIM)) { 1154 goto oops; 1155 } 1156 /* 1157 * If a read(fd,buf,0) has been done, there is no 1158 * need to sleep. We always have zero bytes to 1159 * return. 1160 */ 1161 if (uiop->uio_resid == 0) { 1162 goto oops; 1163 } 1164 1165 qbackenable(q, 0); 1166 1167 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT, 1168 "strread calls strwaitq:%p, %p, %p", 1169 vp, uiop, crp); 1170 if ((error = strwaitq(stp, waitflag, uiop->uio_resid, 1171 uiop->uio_fmode, -1, &done)) != 0 || done) { 1172 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE, 1173 "strread error or done:%p, %p, %p", 1174 vp, uiop, crp); 1175 if ((uiop->uio_fmode & FNDELAY) && 1176 (stp->sd_flag & OLDNDELAY) && 1177 (error == EAGAIN)) 1178 error = 0; 1179 goto oops; 1180 } 1181 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE, 1182 "strread awakes:%p, %p, %p", vp, uiop, crp); 1183 if ((error = i_straccess(stp, JCREAD)) != 0) { 1184 goto oops; 1185 } 1186 first = 0; 1187 } 1188 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1189 ASSERT(bp); 1190 pri = bp->b_band; 1191 /* 1192 * Extract any mark information. If the message is not 1193 * completely consumed this information will be put in the mblk 1194 * that is putback. 1195 * If MSGMARKNEXT is set and the message is completely consumed 1196 * the STRATMARK flag will be set below. Likewise, if 1197 * MSGNOTMARKNEXT is set and the message is 1198 * completely consumed STRNOTATMARK will be set. 1199 * 1200 * For some unknown reason strread only breaks the read at the 1201 * last mark. 1202 */ 1203 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 1204 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 1205 (MSGMARKNEXT|MSGNOTMARKNEXT)); 1206 if (mark != 0 && bp == stp->sd_mark) { 1207 if (rflg) { 1208 putback(stp, q, bp, pri); 1209 goto oops; 1210 } 1211 mark |= _LASTMARK; 1212 stp->sd_mark = NULL; 1213 } 1214 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM)) 1215 delim = 1; 1216 mutex_exit(&stp->sd_lock); 1217 1218 if (STREAM_NEEDSERVICE(stp)) 1219 stream_runservice(stp); 1220 1221 type = bp->b_datap->db_type; 1222 1223 switch (type) { 1224 1225 case M_DATA: 1226 ismdata: 1227 if (msgnodata(bp)) { 1228 if (mark || delim) { 1229 freemsg(bp); 1230 } else if (rflg) { 1231 1232 /* 1233 * If already read data put zero 1234 * length message back on queue else 1235 * free msg and return 0. 1236 */ 1237 bp->b_band = pri; 1238 mutex_enter(&stp->sd_lock); 1239 putback(stp, q, bp, pri); 1240 mutex_exit(&stp->sd_lock); 1241 } else { 1242 freemsg(bp); 1243 } 1244 error = 0; 1245 goto oops1; 1246 } 1247 1248 rflg = 1; 1249 waitflag |= NOINTR; 1250 bp = struiocopyout(bp, uiop, &error); 1251 if (error != 0) 1252 goto oops1; 1253 1254 mutex_enter(&stp->sd_lock); 1255 if (bp) { 1256 /* 1257 * Have remaining data in message. 1258 * Free msg if in discard mode. 1259 */ 1260 if (stp->sd_read_opt & RD_MSGDIS) { 1261 freemsg(bp); 1262 } else { 1263 bp->b_band = pri; 1264 if ((mark & _LASTMARK) && 1265 (stp->sd_mark == NULL)) 1266 stp->sd_mark = bp; 1267 bp->b_flag |= mark & ~_LASTMARK; 1268 if (delim) 1269 bp->b_flag |= MSGDELIM; 1270 if (msgnodata(bp)) 1271 freemsg(bp); 1272 else 1273 putback(stp, q, bp, pri); 1274 } 1275 } else { 1276 /* 1277 * Consumed the complete message. 1278 * Move the MSG*MARKNEXT information 1279 * to the stream head just in case 1280 * the read queue becomes empty. 1281 * 1282 * If the stream head was at the mark 1283 * (STRATMARK) before we dropped sd_lock above 1284 * and some data was consumed then we have 1285 * moved past the mark thus STRATMARK is 1286 * cleared. However, if a message arrived in 1287 * strrput during the copyout above causing 1288 * STRATMARK to be set we can not clear that 1289 * flag. 1290 */ 1291 if (mark & 1292 (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 1293 if (mark & MSGMARKNEXT) { 1294 stp->sd_flag &= ~STRNOTATMARK; 1295 stp->sd_flag |= STRATMARK; 1296 } else if (mark & MSGNOTMARKNEXT) { 1297 stp->sd_flag &= ~STRATMARK; 1298 stp->sd_flag |= STRNOTATMARK; 1299 } else { 1300 stp->sd_flag &= 1301 ~(STRATMARK|STRNOTATMARK); 1302 } 1303 } else if (rflg && (old_sd_flag & STRATMARK)) { 1304 stp->sd_flag &= ~STRATMARK; 1305 } 1306 } 1307 1308 /* 1309 * Check for signal messages at the front of the read 1310 * queue and generate the signal(s) if appropriate. 1311 * The only signal that can be on queue is M_SIG at 1312 * this point. 1313 */ 1314 while ((((bp = q->q_first)) != NULL) && 1315 (bp->b_datap->db_type == M_SIG)) { 1316 bp = getq_noenab(q); 1317 /* 1318 * sd_lock is held so the content of the 1319 * read queue can not change. 1320 */ 1321 ASSERT(bp != NULL && 1322 bp->b_datap->db_type == M_SIG); 1323 strsignal_nolock(stp, *bp->b_rptr, 1324 (int32_t)bp->b_band); 1325 mutex_exit(&stp->sd_lock); 1326 freemsg(bp); 1327 if (STREAM_NEEDSERVICE(stp)) 1328 stream_runservice(stp); 1329 mutex_enter(&stp->sd_lock); 1330 } 1331 1332 if ((uiop->uio_resid == 0) || (mark & _LASTMARK) || 1333 delim || 1334 (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) { 1335 goto oops; 1336 } 1337 continue; 1338 1339 case M_SIG: 1340 strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band); 1341 freemsg(bp); 1342 mutex_enter(&stp->sd_lock); 1343 continue; 1344 1345 case M_PROTO: 1346 case M_PCPROTO: 1347 /* 1348 * Only data messages are readable. 1349 * Any others generate an error, unless 1350 * RD_PROTDIS or RD_PROTDAT is set. 1351 */ 1352 if (stp->sd_read_opt & RD_PROTDAT) { 1353 for (nbp = bp; nbp; nbp = nbp->b_next) { 1354 if ((nbp->b_datap->db_type == 1355 M_PROTO) || 1356 (nbp->b_datap->db_type == 1357 M_PCPROTO)) { 1358 nbp->b_datap->db_type = M_DATA; 1359 } else { 1360 break; 1361 } 1362 } 1363 /* 1364 * clear stream head hi pri flag based on 1365 * first message 1366 */ 1367 if (type == M_PCPROTO) { 1368 mutex_enter(&stp->sd_lock); 1369 stp->sd_flag &= ~STRPRI; 1370 mutex_exit(&stp->sd_lock); 1371 } 1372 goto ismdata; 1373 } else if (stp->sd_read_opt & RD_PROTDIS) { 1374 /* 1375 * discard non-data messages 1376 */ 1377 while (bp && 1378 ((bp->b_datap->db_type == M_PROTO) || 1379 (bp->b_datap->db_type == M_PCPROTO))) { 1380 nbp = unlinkb(bp); 1381 freeb(bp); 1382 bp = nbp; 1383 } 1384 /* 1385 * clear stream head hi pri flag based on 1386 * first message 1387 */ 1388 if (type == M_PCPROTO) { 1389 mutex_enter(&stp->sd_lock); 1390 stp->sd_flag &= ~STRPRI; 1391 mutex_exit(&stp->sd_lock); 1392 } 1393 if (bp) { 1394 bp->b_band = pri; 1395 goto ismdata; 1396 } else { 1397 break; 1398 } 1399 } 1400 /* FALLTHRU */ 1401 case M_PASSFP: 1402 if ((bp->b_datap->db_type == M_PASSFP) && 1403 (stp->sd_read_opt & RD_PROTDIS)) { 1404 freemsg(bp); 1405 break; 1406 } 1407 mutex_enter(&stp->sd_lock); 1408 putback(stp, q, bp, pri); 1409 mutex_exit(&stp->sd_lock); 1410 if (rflg == 0) 1411 error = EBADMSG; 1412 goto oops1; 1413 1414 default: 1415 /* 1416 * Garbage on stream head read queue. 1417 */ 1418 cmn_err(CE_WARN, "bad %x found at stream head\n", 1419 bp->b_datap->db_type); 1420 freemsg(bp); 1421 goto oops1; 1422 } 1423 mutex_enter(&stp->sd_lock); 1424 } 1425 oops: 1426 mutex_exit(&stp->sd_lock); 1427 oops1: 1428 qbackenable(q, pri); 1429 return (error); 1430 #undef _LASTMARK 1431 } 1432 1433 /* 1434 * Default processing of M_PROTO/M_PCPROTO messages. 1435 * Determine which wakeups and signals are needed. 1436 * This can be replaced by a user-specified procedure for kernel users 1437 * of STREAMS. 1438 */ 1439 /* ARGSUSED */ 1440 mblk_t * 1441 strrput_proto(vnode_t *vp, mblk_t *mp, 1442 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1443 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1444 { 1445 *wakeups = RSLEEP; 1446 *allmsgsigs = 0; 1447 1448 switch (mp->b_datap->db_type) { 1449 case M_PROTO: 1450 if (mp->b_band == 0) { 1451 *firstmsgsigs = S_INPUT | S_RDNORM; 1452 *pollwakeups = POLLIN | POLLRDNORM; 1453 } else { 1454 *firstmsgsigs = S_INPUT | S_RDBAND; 1455 *pollwakeups = POLLIN | POLLRDBAND; 1456 } 1457 break; 1458 case M_PCPROTO: 1459 *firstmsgsigs = S_HIPRI; 1460 *pollwakeups = POLLPRI; 1461 break; 1462 } 1463 return (mp); 1464 } 1465 1466 /* 1467 * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and 1468 * M_PASSFP messages. 1469 * Determine which wakeups and signals are needed. 1470 * This can be replaced by a user-specified procedure for kernel users 1471 * of STREAMS. 1472 */ 1473 /* ARGSUSED */ 1474 mblk_t * 1475 strrput_misc(vnode_t *vp, mblk_t *mp, 1476 strwakeup_t *wakeups, strsigset_t *firstmsgsigs, 1477 strsigset_t *allmsgsigs, strpollset_t *pollwakeups) 1478 { 1479 *wakeups = 0; 1480 *firstmsgsigs = 0; 1481 *allmsgsigs = 0; 1482 *pollwakeups = 0; 1483 return (mp); 1484 } 1485 1486 /* 1487 * Stream read put procedure. Called from downstream driver/module 1488 * with messages for the stream head. Data, protocol, and in-stream 1489 * signal messages are placed on the queue, others are handled directly. 1490 */ 1491 int 1492 strrput(queue_t *q, mblk_t *bp) 1493 { 1494 struct stdata *stp; 1495 ulong_t rput_opt; 1496 strwakeup_t wakeups; 1497 strsigset_t firstmsgsigs; /* Signals if first message on queue */ 1498 strsigset_t allmsgsigs; /* Signals for all messages */ 1499 strsigset_t signals; /* Signals events to generate */ 1500 strpollset_t pollwakeups; 1501 mblk_t *nextbp; 1502 uchar_t band = 0; 1503 int hipri_sig; 1504 1505 stp = (struct stdata *)q->q_ptr; 1506 /* 1507 * Use rput_opt for optimized access to the SR_ flags except 1508 * SR_POLLIN. That flag has to be checked under sd_lock since it 1509 * is modified by strpoll(). 1510 */ 1511 rput_opt = stp->sd_rput_opt; 1512 1513 ASSERT(qclaimed(q)); 1514 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER, 1515 "strrput called with message type:q %p bp %p", q, bp); 1516 1517 /* 1518 * Perform initial processing and pass to the parameterized functions. 1519 */ 1520 ASSERT(bp->b_next == NULL); 1521 1522 switch (bp->b_datap->db_type) { 1523 case M_DATA: 1524 /* 1525 * sockfs is the only consumer of STREOF and when it is set, 1526 * it implies that the receiver is not interested in receiving 1527 * any more data, hence the mblk is freed to prevent unnecessary 1528 * message queueing at the stream head. 1529 */ 1530 if (stp->sd_flag == STREOF) { 1531 freemsg(bp); 1532 return (0); 1533 } 1534 if ((rput_opt & SR_IGN_ZEROLEN) && 1535 bp->b_rptr == bp->b_wptr && msgnodata(bp)) { 1536 /* 1537 * Ignore zero-length M_DATA messages. These might be 1538 * generated by some transports. 1539 * The zero-length M_DATA messages, even if they 1540 * are ignored, should effect the atmark tracking and 1541 * should wake up a thread sleeping in strwaitmark. 1542 */ 1543 mutex_enter(&stp->sd_lock); 1544 if (bp->b_flag & MSGMARKNEXT) { 1545 /* 1546 * Record the position of the mark either 1547 * in q_last or in STRATMARK. 1548 */ 1549 if (q->q_last != NULL) { 1550 q->q_last->b_flag &= ~MSGNOTMARKNEXT; 1551 q->q_last->b_flag |= MSGMARKNEXT; 1552 } else { 1553 stp->sd_flag &= ~STRNOTATMARK; 1554 stp->sd_flag |= STRATMARK; 1555 } 1556 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1557 /* 1558 * Record that this is not the position of 1559 * the mark either in q_last or in 1560 * STRNOTATMARK. 1561 */ 1562 if (q->q_last != NULL) { 1563 q->q_last->b_flag &= ~MSGMARKNEXT; 1564 q->q_last->b_flag |= MSGNOTMARKNEXT; 1565 } else { 1566 stp->sd_flag &= ~STRATMARK; 1567 stp->sd_flag |= STRNOTATMARK; 1568 } 1569 } 1570 if (stp->sd_flag & RSLEEP) { 1571 stp->sd_flag &= ~RSLEEP; 1572 cv_broadcast(&q->q_wait); 1573 } 1574 mutex_exit(&stp->sd_lock); 1575 freemsg(bp); 1576 return (0); 1577 } 1578 wakeups = RSLEEP; 1579 if (bp->b_band == 0) { 1580 firstmsgsigs = S_INPUT | S_RDNORM; 1581 pollwakeups = POLLIN | POLLRDNORM; 1582 } else { 1583 firstmsgsigs = S_INPUT | S_RDBAND; 1584 pollwakeups = POLLIN | POLLRDBAND; 1585 } 1586 if (rput_opt & SR_SIGALLDATA) 1587 allmsgsigs = firstmsgsigs; 1588 else 1589 allmsgsigs = 0; 1590 1591 mutex_enter(&stp->sd_lock); 1592 if ((rput_opt & SR_CONSOL_DATA) && 1593 (q->q_last != NULL) && 1594 (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) { 1595 /* 1596 * Consolidate an M_DATA message onto an M_DATA, 1597 * M_PROTO, or M_PCPROTO by merging it with q_last. 1598 * The consolidation does not take place if 1599 * the old message is marked with either of the 1600 * marks or the delim flag or if the new 1601 * message is marked with MSGMARK. The MSGMARK 1602 * check is needed to handle the odd semantics of 1603 * MSGMARK where essentially the whole message 1604 * is to be treated as marked. 1605 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from the 1606 * new message to the front of the b_cont chain. 1607 */ 1608 mblk_t *lbp = q->q_last; 1609 unsigned char db_type = lbp->b_datap->db_type; 1610 1611 if ((db_type == M_DATA || db_type == M_PROTO || 1612 db_type == M_PCPROTO) && 1613 !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) { 1614 rmvq_noenab(q, lbp); 1615 /* 1616 * The first message in the b_cont list 1617 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 1618 * We need to handle the case where we 1619 * are appending: 1620 * 1621 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 1622 * 2) a MSGMARKNEXT to a plain message. 1623 * 3) a MSGNOTMARKNEXT to a plain message 1624 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 1625 * message. 1626 * 1627 * Thus we never append a MSGMARKNEXT or 1628 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 1629 */ 1630 if (bp->b_flag & MSGMARKNEXT) { 1631 lbp->b_flag |= MSGMARKNEXT; 1632 lbp->b_flag &= ~MSGNOTMARKNEXT; 1633 bp->b_flag &= ~MSGMARKNEXT; 1634 } else if (bp->b_flag & MSGNOTMARKNEXT) { 1635 lbp->b_flag |= MSGNOTMARKNEXT; 1636 bp->b_flag &= ~MSGNOTMARKNEXT; 1637 } 1638 1639 linkb(lbp, bp); 1640 bp = lbp; 1641 /* 1642 * The new message logically isn't the first 1643 * even though the q_first check below thinks 1644 * it is. Clear the firstmsgsigs to make it 1645 * not appear to be first. 1646 */ 1647 firstmsgsigs = 0; 1648 } 1649 } 1650 break; 1651 1652 case M_PASSFP: 1653 wakeups = RSLEEP; 1654 allmsgsigs = 0; 1655 if (bp->b_band == 0) { 1656 firstmsgsigs = S_INPUT | S_RDNORM; 1657 pollwakeups = POLLIN | POLLRDNORM; 1658 } else { 1659 firstmsgsigs = S_INPUT | S_RDBAND; 1660 pollwakeups = POLLIN | POLLRDBAND; 1661 } 1662 mutex_enter(&stp->sd_lock); 1663 break; 1664 1665 case M_PROTO: 1666 case M_PCPROTO: 1667 ASSERT(stp->sd_rprotofunc != NULL); 1668 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp, 1669 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1670 #define ALLSIG (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\ 1671 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG) 1672 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\ 1673 POLLWRBAND) 1674 1675 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1676 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1677 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1678 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1679 1680 mutex_enter(&stp->sd_lock); 1681 break; 1682 1683 default: 1684 ASSERT(stp->sd_rmiscfunc != NULL); 1685 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp, 1686 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups); 1687 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0); 1688 ASSERT((firstmsgsigs & ~ALLSIG) == 0); 1689 ASSERT((allmsgsigs & ~ALLSIG) == 0); 1690 ASSERT((pollwakeups & ~ALLPOLL) == 0); 1691 #undef ALLSIG 1692 #undef ALLPOLL 1693 mutex_enter(&stp->sd_lock); 1694 break; 1695 } 1696 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1697 1698 /* By default generate superset of signals */ 1699 signals = (firstmsgsigs | allmsgsigs); 1700 1701 /* 1702 * The proto and misc functions can return multiple messages 1703 * as a b_next chain. Such messages are processed separately. 1704 */ 1705 one_more: 1706 hipri_sig = 0; 1707 if (bp == NULL) { 1708 nextbp = NULL; 1709 } else { 1710 nextbp = bp->b_next; 1711 bp->b_next = NULL; 1712 1713 switch (bp->b_datap->db_type) { 1714 case M_PCPROTO: 1715 /* 1716 * Only one priority protocol message is allowed at the 1717 * stream head at a time. 1718 */ 1719 if (stp->sd_flag & STRPRI) { 1720 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR, 1721 "M_PCPROTO already at head"); 1722 freemsg(bp); 1723 mutex_exit(&stp->sd_lock); 1724 goto done; 1725 } 1726 stp->sd_flag |= STRPRI; 1727 hipri_sig = 1; 1728 /* FALLTHRU */ 1729 case M_DATA: 1730 case M_PROTO: 1731 case M_PASSFP: 1732 band = bp->b_band; 1733 /* 1734 * Marking doesn't work well when messages 1735 * are marked in more than one band. We only 1736 * remember the last message received, even if 1737 * it is placed on the queue ahead of other 1738 * marked messages. 1739 */ 1740 if (bp->b_flag & MSGMARK) 1741 stp->sd_mark = bp; 1742 (void) putq(q, bp); 1743 1744 /* 1745 * If message is a PCPROTO message, always use 1746 * firstmsgsigs to determine if a signal should be 1747 * sent as strrput is the only place to send 1748 * signals for PCPROTO. Other messages are based on 1749 * the STRGETINPROG flag. The flag determines if 1750 * strrput or (k)strgetmsg will be responsible for 1751 * sending the signals, in the firstmsgsigs case. 1752 */ 1753 if ((hipri_sig == 1) || 1754 (((stp->sd_flag & STRGETINPROG) == 0) && 1755 (q->q_first == bp))) 1756 signals = (firstmsgsigs | allmsgsigs); 1757 else 1758 signals = allmsgsigs; 1759 break; 1760 1761 default: 1762 mutex_exit(&stp->sd_lock); 1763 (void) strrput_nondata(q, bp); 1764 mutex_enter(&stp->sd_lock); 1765 break; 1766 } 1767 } 1768 ASSERT(MUTEX_HELD(&stp->sd_lock)); 1769 /* 1770 * Wake sleeping read/getmsg and cancel deferred wakeup 1771 */ 1772 if (wakeups & RSLEEP) 1773 stp->sd_wakeq &= ~RSLEEP; 1774 1775 wakeups &= stp->sd_flag; 1776 if (wakeups & RSLEEP) { 1777 stp->sd_flag &= ~RSLEEP; 1778 cv_broadcast(&q->q_wait); 1779 } 1780 if (wakeups & WSLEEP) { 1781 stp->sd_flag &= ~WSLEEP; 1782 cv_broadcast(&_WR(q)->q_wait); 1783 } 1784 1785 if (pollwakeups != 0) { 1786 if (pollwakeups == (POLLIN | POLLRDNORM)) { 1787 /* 1788 * Can't use rput_opt since it was not 1789 * read when sd_lock was held and SR_POLLIN is changed 1790 * by strpoll() under sd_lock. 1791 */ 1792 if (!(stp->sd_rput_opt & SR_POLLIN)) 1793 goto no_pollwake; 1794 stp->sd_rput_opt &= ~SR_POLLIN; 1795 } 1796 mutex_exit(&stp->sd_lock); 1797 pollwakeup(&stp->sd_pollist, pollwakeups); 1798 mutex_enter(&stp->sd_lock); 1799 } 1800 no_pollwake: 1801 1802 /* 1803 * strsendsig can handle multiple signals with a 1804 * single call. 1805 */ 1806 if (stp->sd_sigflags & signals) 1807 strsendsig(stp->sd_siglist, signals, band, 0); 1808 mutex_exit(&stp->sd_lock); 1809 1810 1811 done: 1812 if (nextbp == NULL) 1813 return (0); 1814 1815 /* 1816 * Any signals were handled the first time. 1817 * Wakeups and pollwakeups are redone to avoid any race 1818 * conditions - all the messages are not queued until the 1819 * last message has been processed by strrput. 1820 */ 1821 bp = nextbp; 1822 signals = firstmsgsigs = allmsgsigs = 0; 1823 mutex_enter(&stp->sd_lock); 1824 goto one_more; 1825 } 1826 1827 static void 1828 log_dupioc(queue_t *rq, mblk_t *bp) 1829 { 1830 queue_t *wq, *qp; 1831 char *modnames, *mnp, *dname; 1832 size_t maxmodstr; 1833 boolean_t islast; 1834 1835 /* 1836 * Allocate a buffer large enough to hold the names of nstrpush modules 1837 * and one driver, with spaces between and NUL terminator. If we can't 1838 * get memory, then we'll just log the driver name. 1839 */ 1840 maxmodstr = nstrpush * (FMNAMESZ + 1); 1841 mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP); 1842 1843 /* march down write side to print log message down to the driver */ 1844 wq = WR(rq); 1845 1846 /* make sure q_next doesn't shift around while we're grabbing data */ 1847 claimstr(wq); 1848 qp = wq->q_next; 1849 do { 1850 if ((dname = qp->q_qinfo->qi_minfo->mi_idname) == NULL) 1851 dname = "?"; 1852 islast = !SAMESTR(qp) || qp->q_next == NULL; 1853 if (modnames == NULL) { 1854 /* 1855 * If we don't have memory, then get the driver name in 1856 * the log where we can see it. Note that memory 1857 * pressure is a possible cause of these sorts of bugs. 1858 */ 1859 if (islast) { 1860 modnames = dname; 1861 maxmodstr = 0; 1862 } 1863 } else { 1864 mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname); 1865 if (!islast) 1866 *mnp++ = ' '; 1867 } 1868 qp = qp->q_next; 1869 } while (!islast); 1870 releasestr(wq); 1871 /* Cannot happen unless stream head is corrupt. */ 1872 ASSERT(modnames != NULL); 1873 (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1, 1874 SL_CONSOLE|SL_TRACE|SL_ERROR, 1875 "Warning: stream %p received duplicate %X M_IOC%s; module list: %s", 1876 rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd, 1877 (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames); 1878 if (maxmodstr != 0) 1879 kmem_free(modnames, maxmodstr); 1880 } 1881 1882 int 1883 strrput_nondata(queue_t *q, mblk_t *bp) 1884 { 1885 struct stdata *stp; 1886 struct iocblk *iocbp; 1887 struct stroptions *sop; 1888 struct copyreq *reqp; 1889 struct copyresp *resp; 1890 unsigned char bpri; 1891 unsigned char flushed_already = 0; 1892 1893 stp = (struct stdata *)q->q_ptr; 1894 1895 ASSERT(!(stp->sd_flag & STPLEX)); 1896 ASSERT(qclaimed(q)); 1897 1898 switch (bp->b_datap->db_type) { 1899 case M_ERROR: 1900 /* 1901 * An error has occurred downstream, the errno is in the first 1902 * bytes of the message. 1903 */ 1904 if ((bp->b_wptr - bp->b_rptr) == 2) { /* New flavor */ 1905 unsigned char rw = 0; 1906 1907 mutex_enter(&stp->sd_lock); 1908 if (*bp->b_rptr != NOERROR) { /* read error */ 1909 if (*bp->b_rptr != 0) { 1910 if (stp->sd_flag & STRDERR) 1911 flushed_already |= FLUSHR; 1912 stp->sd_flag |= STRDERR; 1913 rw |= FLUSHR; 1914 } else { 1915 stp->sd_flag &= ~STRDERR; 1916 } 1917 stp->sd_rerror = *bp->b_rptr; 1918 } 1919 bp->b_rptr++; 1920 if (*bp->b_rptr != NOERROR) { /* write error */ 1921 if (*bp->b_rptr != 0) { 1922 if (stp->sd_flag & STWRERR) 1923 flushed_already |= FLUSHW; 1924 stp->sd_flag |= STWRERR; 1925 rw |= FLUSHW; 1926 } else { 1927 stp->sd_flag &= ~STWRERR; 1928 } 1929 stp->sd_werror = *bp->b_rptr; 1930 } 1931 if (rw) { 1932 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE, 1933 "strrput cv_broadcast:q %p, bp %p", 1934 q, bp); 1935 cv_broadcast(&q->q_wait); /* readers */ 1936 cv_broadcast(&_WR(q)->q_wait); /* writers */ 1937 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 1938 1939 mutex_exit(&stp->sd_lock); 1940 pollwakeup(&stp->sd_pollist, POLLERR); 1941 mutex_enter(&stp->sd_lock); 1942 1943 if (stp->sd_sigflags & S_ERROR) 1944 strsendsig(stp->sd_siglist, S_ERROR, 0, 1945 ((rw & FLUSHR) ? stp->sd_rerror : 1946 stp->sd_werror)); 1947 mutex_exit(&stp->sd_lock); 1948 /* 1949 * Send the M_FLUSH only 1950 * for the first M_ERROR 1951 * message on the stream 1952 */ 1953 if (flushed_already == rw) { 1954 freemsg(bp); 1955 return (0); 1956 } 1957 1958 bp->b_datap->db_type = M_FLUSH; 1959 *bp->b_rptr = rw; 1960 bp->b_wptr = bp->b_rptr + 1; 1961 /* 1962 * Protect against the driver 1963 * passing up messages after 1964 * it has done a qprocsoff 1965 */ 1966 if (_OTHERQ(q)->q_next == NULL) 1967 freemsg(bp); 1968 else 1969 qreply(q, bp); 1970 return (0); 1971 } else 1972 mutex_exit(&stp->sd_lock); 1973 } else if (*bp->b_rptr != 0) { /* Old flavor */ 1974 if (stp->sd_flag & (STRDERR|STWRERR)) 1975 flushed_already = FLUSHRW; 1976 mutex_enter(&stp->sd_lock); 1977 stp->sd_flag |= (STRDERR|STWRERR); 1978 stp->sd_rerror = *bp->b_rptr; 1979 stp->sd_werror = *bp->b_rptr; 1980 TRACE_2(TR_FAC_STREAMS_FR, 1981 TR_STRRPUT_WAKE2, 1982 "strrput wakeup #2:q %p, bp %p", q, bp); 1983 cv_broadcast(&q->q_wait); /* the readers */ 1984 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 1985 cv_broadcast(&stp->sd_monitor); /* ioctllers */ 1986 1987 mutex_exit(&stp->sd_lock); 1988 pollwakeup(&stp->sd_pollist, POLLERR); 1989 mutex_enter(&stp->sd_lock); 1990 1991 if (stp->sd_sigflags & S_ERROR) 1992 strsendsig(stp->sd_siglist, S_ERROR, 0, 1993 (stp->sd_werror ? stp->sd_werror : 1994 stp->sd_rerror)); 1995 mutex_exit(&stp->sd_lock); 1996 1997 /* 1998 * Send the M_FLUSH only 1999 * for the first M_ERROR 2000 * message on the stream 2001 */ 2002 if (flushed_already != FLUSHRW) { 2003 bp->b_datap->db_type = M_FLUSH; 2004 *bp->b_rptr = FLUSHRW; 2005 /* 2006 * Protect against the driver passing up 2007 * messages after it has done a 2008 * qprocsoff. 2009 */ 2010 if (_OTHERQ(q)->q_next == NULL) 2011 freemsg(bp); 2012 else 2013 qreply(q, bp); 2014 return (0); 2015 } 2016 } 2017 freemsg(bp); 2018 return (0); 2019 2020 case M_HANGUP: 2021 2022 freemsg(bp); 2023 mutex_enter(&stp->sd_lock); 2024 stp->sd_werror = ENXIO; 2025 stp->sd_flag |= STRHUP; 2026 stp->sd_flag &= ~(WSLEEP|RSLEEP); 2027 2028 /* 2029 * send signal if controlling tty 2030 */ 2031 2032 if (stp->sd_sidp) { 2033 prsignal(stp->sd_sidp, SIGHUP); 2034 if (stp->sd_sidp != stp->sd_pgidp) 2035 pgsignal(stp->sd_pgidp, SIGTSTP); 2036 } 2037 2038 /* 2039 * wake up read, write, and exception pollers and 2040 * reset wakeup mechanism. 2041 */ 2042 cv_broadcast(&q->q_wait); /* the readers */ 2043 cv_broadcast(&_WR(q)->q_wait); /* the writers */ 2044 cv_broadcast(&stp->sd_monitor); /* the ioctllers */ 2045 strhup(stp); 2046 mutex_exit(&stp->sd_lock); 2047 return (0); 2048 2049 case M_UNHANGUP: 2050 freemsg(bp); 2051 mutex_enter(&stp->sd_lock); 2052 stp->sd_werror = 0; 2053 stp->sd_flag &= ~STRHUP; 2054 mutex_exit(&stp->sd_lock); 2055 return (0); 2056 2057 case M_SIG: 2058 /* 2059 * Someone downstream wants to post a signal. The 2060 * signal to post is contained in the first byte of the 2061 * message. If the message would go on the front of 2062 * the queue, send a signal to the process group 2063 * (if not SIGPOLL) or to the siglist processes 2064 * (SIGPOLL). If something is already on the queue, 2065 * OR if we are delivering a delayed suspend (*sigh* 2066 * another "tty" hack) and there's no one sleeping already, 2067 * just enqueue the message. 2068 */ 2069 mutex_enter(&stp->sd_lock); 2070 if (q->q_first || (*bp->b_rptr == SIGTSTP && 2071 !(stp->sd_flag & RSLEEP))) { 2072 (void) putq(q, bp); 2073 mutex_exit(&stp->sd_lock); 2074 return (0); 2075 } 2076 mutex_exit(&stp->sd_lock); 2077 /* FALLTHRU */ 2078 2079 case M_PCSIG: 2080 /* 2081 * Don't enqueue, just post the signal. 2082 */ 2083 strsignal(stp, *bp->b_rptr, 0L); 2084 freemsg(bp); 2085 return (0); 2086 2087 case M_FLUSH: 2088 /* 2089 * Flush queues. The indication of which queues to flush 2090 * is in the first byte of the message. If the read queue 2091 * is specified, then flush it. If FLUSHBAND is set, just 2092 * flush the band specified by the second byte of the message. 2093 * 2094 * If a module has issued a M_SETOPT to not flush hi 2095 * priority messages off of the stream head, then pass this 2096 * flag into the flushq code to preserve such messages. 2097 */ 2098 2099 if (*bp->b_rptr & FLUSHR) { 2100 mutex_enter(&stp->sd_lock); 2101 if (*bp->b_rptr & FLUSHBAND) { 2102 ASSERT((bp->b_wptr - bp->b_rptr) >= 2); 2103 flushband(q, *(bp->b_rptr + 1), FLUSHALL); 2104 } else 2105 flushq_common(q, FLUSHALL, 2106 stp->sd_read_opt & RFLUSHPCPROT); 2107 if ((q->q_first == NULL) || 2108 (q->q_first->b_datap->db_type < QPCTL)) 2109 stp->sd_flag &= ~STRPRI; 2110 else { 2111 ASSERT(stp->sd_flag & STRPRI); 2112 } 2113 mutex_exit(&stp->sd_lock); 2114 } 2115 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) { 2116 *bp->b_rptr &= ~FLUSHR; 2117 bp->b_flag |= MSGNOLOOP; 2118 /* 2119 * Protect against the driver passing up 2120 * messages after it has done a qprocsoff. 2121 */ 2122 if (_OTHERQ(q)->q_next == NULL) 2123 freemsg(bp); 2124 else 2125 qreply(q, bp); 2126 return (0); 2127 } 2128 freemsg(bp); 2129 return (0); 2130 2131 case M_IOCACK: 2132 case M_IOCNAK: 2133 iocbp = (struct iocblk *)bp->b_rptr; 2134 /* 2135 * If not waiting for ACK or NAK then just free msg. 2136 * If incorrect id sequence number then just free msg. 2137 * If already have ACK or NAK for user then this is a 2138 * duplicate, display a warning and free the msg. 2139 */ 2140 mutex_enter(&stp->sd_lock); 2141 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2142 (stp->sd_iocid != iocbp->ioc_id)) { 2143 /* 2144 * If the ACK/NAK is a dup, display a message 2145 * Dup is when sd_iocid == ioc_id, and 2146 * sd_iocblk == <valid ptr> or -1 (the former 2147 * is when an ioctl has been put on the stream 2148 * head, but has not yet been consumed, the 2149 * later is when it has been consumed). 2150 */ 2151 if ((stp->sd_iocid == iocbp->ioc_id) && 2152 (stp->sd_iocblk != NULL)) { 2153 log_dupioc(q, bp); 2154 } 2155 freemsg(bp); 2156 mutex_exit(&stp->sd_lock); 2157 return (0); 2158 } 2159 2160 /* 2161 * Assign ACK or NAK to user and wake up. 2162 */ 2163 stp->sd_iocblk = bp; 2164 cv_broadcast(&stp->sd_monitor); 2165 mutex_exit(&stp->sd_lock); 2166 return (0); 2167 2168 case M_COPYIN: 2169 case M_COPYOUT: 2170 reqp = (struct copyreq *)bp->b_rptr; 2171 2172 /* 2173 * If not waiting for ACK or NAK then just fail request. 2174 * If already have ACK, NAK, or copy request, then just 2175 * fail request. 2176 * If incorrect id sequence number then just fail request. 2177 */ 2178 mutex_enter(&stp->sd_lock); 2179 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk || 2180 (stp->sd_iocid != reqp->cq_id)) { 2181 if (bp->b_cont) { 2182 freemsg(bp->b_cont); 2183 bp->b_cont = NULL; 2184 } 2185 bp->b_datap->db_type = M_IOCDATA; 2186 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 2187 resp = (struct copyresp *)bp->b_rptr; 2188 resp->cp_rval = (caddr_t)1; /* failure */ 2189 mutex_exit(&stp->sd_lock); 2190 putnext(stp->sd_wrq, bp); 2191 return (0); 2192 } 2193 2194 /* 2195 * Assign copy request to user and wake up. 2196 */ 2197 stp->sd_iocblk = bp; 2198 cv_broadcast(&stp->sd_monitor); 2199 mutex_exit(&stp->sd_lock); 2200 return (0); 2201 2202 case M_SETOPTS: 2203 /* 2204 * Set stream head options (read option, write offset, 2205 * min/max packet size, and/or high/low water marks for 2206 * the read side only). 2207 */ 2208 2209 bpri = 0; 2210 sop = (struct stroptions *)bp->b_rptr; 2211 mutex_enter(&stp->sd_lock); 2212 if (sop->so_flags & SO_READOPT) { 2213 switch (sop->so_readopt & RMODEMASK) { 2214 case RNORM: 2215 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 2216 break; 2217 2218 case RMSGD: 2219 stp->sd_read_opt = 2220 ((stp->sd_read_opt & ~RD_MSGNODIS) | 2221 RD_MSGDIS); 2222 break; 2223 2224 case RMSGN: 2225 stp->sd_read_opt = 2226 ((stp->sd_read_opt & ~RD_MSGDIS) | 2227 RD_MSGNODIS); 2228 break; 2229 } 2230 switch (sop->so_readopt & RPROTMASK) { 2231 case RPROTNORM: 2232 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 2233 break; 2234 2235 case RPROTDAT: 2236 stp->sd_read_opt = 2237 ((stp->sd_read_opt & ~RD_PROTDIS) | 2238 RD_PROTDAT); 2239 break; 2240 2241 case RPROTDIS: 2242 stp->sd_read_opt = 2243 ((stp->sd_read_opt & ~RD_PROTDAT) | 2244 RD_PROTDIS); 2245 break; 2246 } 2247 switch (sop->so_readopt & RFLUSHMASK) { 2248 case RFLUSHPCPROT: 2249 /* 2250 * This sets the stream head to NOT flush 2251 * M_PCPROTO messages. 2252 */ 2253 stp->sd_read_opt |= RFLUSHPCPROT; 2254 break; 2255 } 2256 } 2257 if (sop->so_flags & SO_ERROPT) { 2258 switch (sop->so_erropt & RERRMASK) { 2259 case RERRNORM: 2260 stp->sd_flag &= ~STRDERRNONPERSIST; 2261 break; 2262 case RERRNONPERSIST: 2263 stp->sd_flag |= STRDERRNONPERSIST; 2264 break; 2265 } 2266 switch (sop->so_erropt & WERRMASK) { 2267 case WERRNORM: 2268 stp->sd_flag &= ~STWRERRNONPERSIST; 2269 break; 2270 case WERRNONPERSIST: 2271 stp->sd_flag |= STWRERRNONPERSIST; 2272 break; 2273 } 2274 } 2275 if (sop->so_flags & SO_COPYOPT) { 2276 if (sop->so_copyopt & ZCVMSAFE) { 2277 stp->sd_copyflag |= STZCVMSAFE; 2278 stp->sd_copyflag &= ~STZCVMUNSAFE; 2279 } else if (sop->so_copyopt & ZCVMUNSAFE) { 2280 stp->sd_copyflag |= STZCVMUNSAFE; 2281 stp->sd_copyflag &= ~STZCVMSAFE; 2282 } 2283 2284 if (sop->so_copyopt & COPYCACHED) { 2285 stp->sd_copyflag |= STRCOPYCACHED; 2286 } 2287 } 2288 if (sop->so_flags & SO_WROFF) 2289 stp->sd_wroff = sop->so_wroff; 2290 if (sop->so_flags & SO_TAIL) 2291 stp->sd_tail = sop->so_tail; 2292 if (sop->so_flags & SO_MINPSZ) 2293 q->q_minpsz = sop->so_minpsz; 2294 if (sop->so_flags & SO_MAXPSZ) 2295 q->q_maxpsz = sop->so_maxpsz; 2296 if (sop->so_flags & SO_MAXBLK) 2297 stp->sd_maxblk = sop->so_maxblk; 2298 if (sop->so_flags & SO_HIWAT) { 2299 if (sop->so_flags & SO_BAND) { 2300 if (strqset(q, QHIWAT, 2301 sop->so_band, sop->so_hiwat)) { 2302 cmn_err(CE_WARN, "strrput: could not " 2303 "allocate qband\n"); 2304 } else { 2305 bpri = sop->so_band; 2306 } 2307 } else { 2308 q->q_hiwat = sop->so_hiwat; 2309 } 2310 } 2311 if (sop->so_flags & SO_LOWAT) { 2312 if (sop->so_flags & SO_BAND) { 2313 if (strqset(q, QLOWAT, 2314 sop->so_band, sop->so_lowat)) { 2315 cmn_err(CE_WARN, "strrput: could not " 2316 "allocate qband\n"); 2317 } else { 2318 bpri = sop->so_band; 2319 } 2320 } else { 2321 q->q_lowat = sop->so_lowat; 2322 } 2323 } 2324 if (sop->so_flags & SO_MREADON) 2325 stp->sd_flag |= SNDMREAD; 2326 if (sop->so_flags & SO_MREADOFF) 2327 stp->sd_flag &= ~SNDMREAD; 2328 if (sop->so_flags & SO_NDELON) 2329 stp->sd_flag |= OLDNDELAY; 2330 if (sop->so_flags & SO_NDELOFF) 2331 stp->sd_flag &= ~OLDNDELAY; 2332 if (sop->so_flags & SO_ISTTY) 2333 stp->sd_flag |= STRISTTY; 2334 if (sop->so_flags & SO_ISNTTY) 2335 stp->sd_flag &= ~STRISTTY; 2336 if (sop->so_flags & SO_TOSTOP) 2337 stp->sd_flag |= STRTOSTOP; 2338 if (sop->so_flags & SO_TONSTOP) 2339 stp->sd_flag &= ~STRTOSTOP; 2340 if (sop->so_flags & SO_DELIM) 2341 stp->sd_flag |= STRDELIM; 2342 if (sop->so_flags & SO_NODELIM) 2343 stp->sd_flag &= ~STRDELIM; 2344 2345 mutex_exit(&stp->sd_lock); 2346 freemsg(bp); 2347 2348 /* Check backenable in case the water marks changed */ 2349 qbackenable(q, bpri); 2350 return (0); 2351 2352 /* 2353 * The following set of cases deal with situations where two stream 2354 * heads are connected to each other (twisted streams). These messages 2355 * have no meaning at the stream head. 2356 */ 2357 case M_BREAK: 2358 case M_CTL: 2359 case M_DELAY: 2360 case M_START: 2361 case M_STOP: 2362 case M_IOCDATA: 2363 case M_STARTI: 2364 case M_STOPI: 2365 freemsg(bp); 2366 return (0); 2367 2368 case M_IOCTL: 2369 /* 2370 * Always NAK this condition 2371 * (makes no sense) 2372 * If there is one or more threads in the read side 2373 * rwnext we have to defer the nacking until that thread 2374 * returns (in strget). 2375 */ 2376 mutex_enter(&stp->sd_lock); 2377 if (stp->sd_struiodnak != 0) { 2378 /* 2379 * Defer NAK to the streamhead. Queue at the end 2380 * the list. 2381 */ 2382 mblk_t *mp = stp->sd_struionak; 2383 2384 while (mp && mp->b_next) 2385 mp = mp->b_next; 2386 if (mp) 2387 mp->b_next = bp; 2388 else 2389 stp->sd_struionak = bp; 2390 bp->b_next = NULL; 2391 mutex_exit(&stp->sd_lock); 2392 return (0); 2393 } 2394 mutex_exit(&stp->sd_lock); 2395 2396 bp->b_datap->db_type = M_IOCNAK; 2397 /* 2398 * Protect against the driver passing up 2399 * messages after it has done a qprocsoff. 2400 */ 2401 if (_OTHERQ(q)->q_next == NULL) 2402 freemsg(bp); 2403 else 2404 qreply(q, bp); 2405 return (0); 2406 2407 default: 2408 #ifdef DEBUG 2409 cmn_err(CE_WARN, 2410 "bad message type %x received at stream head\n", 2411 bp->b_datap->db_type); 2412 #endif 2413 freemsg(bp); 2414 return (0); 2415 } 2416 2417 /* NOTREACHED */ 2418 } 2419 2420 /* 2421 * Check if the stream pointed to by `stp' can be written to, and return an 2422 * error code if not. If `eiohup' is set, then return EIO if STRHUP is set. 2423 * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream, 2424 * then always return EPIPE and send a SIGPIPE to the invoking thread. 2425 */ 2426 static int 2427 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok) 2428 { 2429 int error; 2430 2431 ASSERT(MUTEX_HELD(&stp->sd_lock)); 2432 2433 /* 2434 * For modem support, POSIX states that on writes, EIO should 2435 * be returned if the stream has been hung up. 2436 */ 2437 if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP) 2438 error = EIO; 2439 else 2440 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0); 2441 2442 if (error != 0) { 2443 if (!(stp->sd_flag & STPLEX) && 2444 (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) { 2445 tsignal(curthread, SIGPIPE); 2446 error = EPIPE; 2447 } 2448 } 2449 2450 return (error); 2451 } 2452 2453 /* 2454 * Copyin and send data down a stream. 2455 * The caller will allocate and copyin any control part that precedes the 2456 * message and pass than in as mctl. 2457 * 2458 * Caller should *not* hold sd_lock. 2459 * When EWOULDBLOCK is returned the caller has to redo the canputnext 2460 * under sd_lock in order to avoid missing a backenabling wakeup. 2461 * 2462 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA. 2463 * 2464 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages. 2465 * For sync streams we can only ignore flow control by reverting to using 2466 * putnext. 2467 * 2468 * If sd_maxblk is less than *iosize this routine might return without 2469 * transferring all of *iosize. In all cases, on return *iosize will contain 2470 * the amount of data that was transferred. 2471 */ 2472 static int 2473 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize, 2474 int b_flag, int pri, int flags) 2475 { 2476 struiod_t uiod; 2477 mblk_t *mp; 2478 queue_t *wqp = stp->sd_wrq; 2479 int error = 0; 2480 ssize_t count = *iosize; 2481 cred_t *cr; 2482 2483 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock)); 2484 2485 if (uiop != NULL && count >= 0) 2486 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0; 2487 2488 if (!(flags & STRUIO_POSTPONE)) { 2489 /* 2490 * Use regular canputnext, strmakedata, putnext sequence. 2491 */ 2492 if (pri == 0) { 2493 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2494 freemsg(mctl); 2495 return (EWOULDBLOCK); 2496 } 2497 } else { 2498 if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) { 2499 freemsg(mctl); 2500 return (EWOULDBLOCK); 2501 } 2502 } 2503 2504 if ((error = strmakedata(iosize, uiop, stp, flags, 2505 &mp)) != 0) { 2506 freemsg(mctl); 2507 /* 2508 * need to change return code to ENOMEM 2509 * so that this is not confused with 2510 * flow control, EAGAIN. 2511 */ 2512 2513 if (error == EAGAIN) 2514 return (ENOMEM); 2515 else 2516 return (error); 2517 } 2518 if (mctl != NULL) { 2519 if (mctl->b_cont == NULL) 2520 mctl->b_cont = mp; 2521 else if (mp != NULL) 2522 linkb(mctl, mp); 2523 mp = mctl; 2524 /* 2525 * Note that for interrupt thread, the CRED() is 2526 * NULL. Don't bother with the pid either. 2527 */ 2528 if ((cr = CRED()) != NULL) { 2529 mblk_setcred(mp, cr); 2530 DB_CPID(mp) = curproc->p_pid; 2531 } 2532 } else if (mp == NULL) 2533 return (0); 2534 2535 mp->b_flag |= b_flag; 2536 mp->b_band = (uchar_t)pri; 2537 2538 if (flags & MSG_IGNFLOW) { 2539 /* 2540 * XXX Hack: Don't get stuck running service 2541 * procedures. This is needed for sockfs when 2542 * sending the unbind message out of the rput 2543 * procedure - we don't want a put procedure 2544 * to run service procedures. 2545 */ 2546 putnext(wqp, mp); 2547 } else { 2548 stream_willservice(stp); 2549 putnext(wqp, mp); 2550 stream_runservice(stp); 2551 } 2552 return (0); 2553 } 2554 /* 2555 * Stream supports rwnext() for the write side. 2556 */ 2557 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) { 2558 freemsg(mctl); 2559 /* 2560 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled". 2561 */ 2562 return (error == EAGAIN ? ENOMEM : error); 2563 } 2564 if (mctl != NULL) { 2565 if (mctl->b_cont == NULL) 2566 mctl->b_cont = mp; 2567 else if (mp != NULL) 2568 linkb(mctl, mp); 2569 mp = mctl; 2570 /* 2571 * Note that for interrupt thread, the CRED() is 2572 * NULL. Don't bother with the pid either. 2573 */ 2574 if ((cr = CRED()) != NULL) { 2575 mblk_setcred(mp, cr); 2576 DB_CPID(mp) = curproc->p_pid; 2577 } 2578 } else if (mp == NULL) { 2579 return (0); 2580 } 2581 2582 mp->b_flag |= b_flag; 2583 mp->b_band = (uchar_t)pri; 2584 2585 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, 2586 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov)); 2587 uiod.d_uio.uio_offset = 0; 2588 uiod.d_mp = mp; 2589 error = rwnext(wqp, &uiod); 2590 if (! uiod.d_mp) { 2591 uioskip(uiop, *iosize); 2592 return (error); 2593 } 2594 ASSERT(mp == uiod.d_mp); 2595 if (error == EINVAL) { 2596 /* 2597 * The stream plumbing must have changed while 2598 * we were away, so just turn off rwnext()s. 2599 */ 2600 error = 0; 2601 } else if (error == EBUSY || error == EWOULDBLOCK) { 2602 /* 2603 * Couldn't enter a perimeter or took a page fault, 2604 * so fall-back to putnext(). 2605 */ 2606 error = 0; 2607 } else { 2608 freemsg(mp); 2609 return (error); 2610 } 2611 /* Have to check canput before consuming data from the uio */ 2612 if (pri == 0) { 2613 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) { 2614 freemsg(mp); 2615 return (EWOULDBLOCK); 2616 } 2617 } else { 2618 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) { 2619 freemsg(mp); 2620 return (EWOULDBLOCK); 2621 } 2622 } 2623 ASSERT(mp == uiod.d_mp); 2624 /* Copyin data from the uio */ 2625 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) { 2626 freemsg(mp); 2627 return (error); 2628 } 2629 uioskip(uiop, *iosize); 2630 if (flags & MSG_IGNFLOW) { 2631 /* 2632 * XXX Hack: Don't get stuck running service procedures. 2633 * This is needed for sockfs when sending the unbind message 2634 * out of the rput procedure - we don't want a put procedure 2635 * to run service procedures. 2636 */ 2637 putnext(wqp, mp); 2638 } else { 2639 stream_willservice(stp); 2640 putnext(wqp, mp); 2641 stream_runservice(stp); 2642 } 2643 return (0); 2644 } 2645 2646 /* 2647 * Write attempts to break the write request into messages conforming 2648 * with the minimum and maximum packet sizes set downstream. 2649 * 2650 * Write will not block if downstream queue is full and 2651 * O_NDELAY is set, otherwise it will block waiting for the queue to get room. 2652 * 2653 * A write of zero bytes gets packaged into a zero length message and sent 2654 * downstream like any other message. 2655 * 2656 * If buffers of the requested sizes are not available, the write will 2657 * sleep until the buffers become available. 2658 * 2659 * Write (if specified) will supply a write offset in a message if it 2660 * makes sense. This can be specified by downstream modules as part of 2661 * a M_SETOPTS message. Write will not supply the write offset if it 2662 * cannot supply any data in a buffer. In other words, write will never 2663 * send down an empty packet due to a write offset. 2664 */ 2665 /* ARGSUSED2 */ 2666 int 2667 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp) 2668 { 2669 return (strwrite_common(vp, uiop, crp, 0)); 2670 } 2671 2672 /* ARGSUSED2 */ 2673 int 2674 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) 2675 { 2676 struct stdata *stp; 2677 struct queue *wqp; 2678 ssize_t rmin, rmax; 2679 ssize_t iosize; 2680 int waitflag; 2681 int tempmode; 2682 int error = 0; 2683 int b_flag; 2684 2685 ASSERT(vp->v_stream); 2686 stp = vp->v_stream; 2687 2688 mutex_enter(&stp->sd_lock); 2689 2690 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2691 mutex_exit(&stp->sd_lock); 2692 return (error); 2693 } 2694 2695 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 2696 error = strwriteable(stp, B_TRUE, B_TRUE); 2697 if (error != 0) { 2698 mutex_exit(&stp->sd_lock); 2699 return (error); 2700 } 2701 } 2702 2703 mutex_exit(&stp->sd_lock); 2704 2705 wqp = stp->sd_wrq; 2706 2707 /* get these values from them cached in the stream head */ 2708 rmin = stp->sd_qn_minpsz; 2709 rmax = stp->sd_qn_maxpsz; 2710 2711 /* 2712 * Check the min/max packet size constraints. If min packet size 2713 * is non-zero, the write cannot be split into multiple messages 2714 * and still guarantee the size constraints. 2715 */ 2716 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp); 2717 2718 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 2719 if (rmax == 0) { 2720 return (0); 2721 } 2722 if (rmin > 0) { 2723 if (uiop->uio_resid < rmin) { 2724 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2725 "strwrite out:q %p out %d error %d", 2726 wqp, 0, ERANGE); 2727 return (ERANGE); 2728 } 2729 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) { 2730 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2731 "strwrite out:q %p out %d error %d", 2732 wqp, 1, ERANGE); 2733 return (ERANGE); 2734 } 2735 } 2736 2737 /* 2738 * Do until count satisfied or error. 2739 */ 2740 waitflag = WRITEWAIT | wflag; 2741 if (stp->sd_flag & OLDNDELAY) 2742 tempmode = uiop->uio_fmode & ~FNDELAY; 2743 else 2744 tempmode = uiop->uio_fmode; 2745 2746 if (rmax == INFPSZ) 2747 rmax = uiop->uio_resid; 2748 2749 /* 2750 * Note that tempmode does not get used in strput/strmakedata 2751 * but only in strwaitq. The other routines use uio_fmode 2752 * unmodified. 2753 */ 2754 2755 /* LINTED: constant in conditional context */ 2756 while (1) { /* breaks when uio_resid reaches zero */ 2757 /* 2758 * Determine the size of the next message to be 2759 * packaged. May have to break write into several 2760 * messages based on max packet size. 2761 */ 2762 iosize = MIN(uiop->uio_resid, rmax); 2763 2764 /* 2765 * Put block downstream when flow control allows it. 2766 */ 2767 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize)) 2768 b_flag = MSGDELIM; 2769 else 2770 b_flag = 0; 2771 2772 for (;;) { 2773 int done = 0; 2774 2775 error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0); 2776 if (error == 0) 2777 break; 2778 if (error != EWOULDBLOCK) 2779 goto out; 2780 2781 mutex_enter(&stp->sd_lock); 2782 /* 2783 * Check for a missed wakeup. 2784 * Needed since strput did not hold sd_lock across 2785 * the canputnext. 2786 */ 2787 if (canputnext(wqp)) { 2788 /* Try again */ 2789 mutex_exit(&stp->sd_lock); 2790 continue; 2791 } 2792 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT, 2793 "strwrite wait:q %p wait", wqp); 2794 if ((error = strwaitq(stp, waitflag, (ssize_t)0, 2795 tempmode, -1, &done)) != 0 || done) { 2796 mutex_exit(&stp->sd_lock); 2797 if ((vp->v_type == VFIFO) && 2798 (uiop->uio_fmode & FNDELAY) && 2799 (error == EAGAIN)) 2800 error = 0; 2801 goto out; 2802 } 2803 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE, 2804 "strwrite wake:q %p awakes", wqp); 2805 if ((error = i_straccess(stp, JCWRITE)) != 0) { 2806 mutex_exit(&stp->sd_lock); 2807 goto out; 2808 } 2809 mutex_exit(&stp->sd_lock); 2810 } 2811 waitflag |= NOINTR; 2812 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID, 2813 "strwrite resid:q %p uiop %p", wqp, uiop); 2814 if (uiop->uio_resid) { 2815 /* Recheck for errors - needed for sockets */ 2816 if ((stp->sd_wput_opt & SW_RECHECK_ERR) && 2817 (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 2818 mutex_enter(&stp->sd_lock); 2819 error = strwriteable(stp, B_FALSE, B_TRUE); 2820 mutex_exit(&stp->sd_lock); 2821 if (error != 0) 2822 return (error); 2823 } 2824 continue; 2825 } 2826 break; 2827 } 2828 out: 2829 /* 2830 * For historical reasons, applications expect EAGAIN when a data 2831 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN. 2832 */ 2833 if (error == ENOMEM) 2834 error = EAGAIN; 2835 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT, 2836 "strwrite out:q %p out %d error %d", wqp, 2, error); 2837 return (error); 2838 } 2839 2840 /* 2841 * Stream head write service routine. 2842 * Its job is to wake up any sleeping writers when a queue 2843 * downstream needs data (part of the flow control in putq and getq). 2844 * It also must wake anyone sleeping on a poll(). 2845 * For stream head right below mux module, it must also invoke put procedure 2846 * of next downstream module. 2847 */ 2848 int 2849 strwsrv(queue_t *q) 2850 { 2851 struct stdata *stp; 2852 queue_t *tq; 2853 qband_t *qbp; 2854 int i; 2855 qband_t *myqbp; 2856 int isevent; 2857 unsigned char qbf[NBAND]; /* band flushing backenable flags */ 2858 2859 TRACE_1(TR_FAC_STREAMS_FR, 2860 TR_STRWSRV, "strwsrv:q %p", q); 2861 stp = (struct stdata *)q->q_ptr; 2862 ASSERT(qclaimed(q)); 2863 mutex_enter(&stp->sd_lock); 2864 ASSERT(!(stp->sd_flag & STPLEX)); 2865 2866 if (stp->sd_flag & WSLEEP) { 2867 stp->sd_flag &= ~WSLEEP; 2868 cv_broadcast(&q->q_wait); 2869 } 2870 mutex_exit(&stp->sd_lock); 2871 2872 /* The other end of a stream pipe went away. */ 2873 if ((tq = q->q_next) == NULL) { 2874 return (0); 2875 } 2876 2877 /* Find the next module forward that has a service procedure */ 2878 claimstr(q); 2879 tq = q->q_nfsrv; 2880 ASSERT(tq != NULL); 2881 2882 if ((q->q_flag & QBACK)) { 2883 if ((tq->q_flag & QFULL)) { 2884 mutex_enter(QLOCK(tq)); 2885 if (!(tq->q_flag & QFULL)) { 2886 mutex_exit(QLOCK(tq)); 2887 goto wakeup; 2888 } 2889 /* 2890 * The queue must have become full again. Set QWANTW 2891 * again so strwsrv will be back enabled when 2892 * the queue becomes non-full next time. 2893 */ 2894 tq->q_flag |= QWANTW; 2895 mutex_exit(QLOCK(tq)); 2896 } else { 2897 wakeup: 2898 pollwakeup(&stp->sd_pollist, POLLWRNORM); 2899 mutex_enter(&stp->sd_lock); 2900 if (stp->sd_sigflags & S_WRNORM) 2901 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 2902 mutex_exit(&stp->sd_lock); 2903 } 2904 } 2905 2906 isevent = 0; 2907 i = 1; 2908 bzero((caddr_t)qbf, NBAND); 2909 mutex_enter(QLOCK(tq)); 2910 if ((myqbp = q->q_bandp) != NULL) 2911 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) { 2912 ASSERT(myqbp); 2913 if ((myqbp->qb_flag & QB_BACK)) { 2914 if (qbp->qb_flag & QB_FULL) { 2915 /* 2916 * The band must have become full again. 2917 * Set QB_WANTW again so strwsrv will 2918 * be back enabled when the band becomes 2919 * non-full next time. 2920 */ 2921 qbp->qb_flag |= QB_WANTW; 2922 } else { 2923 isevent = 1; 2924 qbf[i] = 1; 2925 } 2926 } 2927 myqbp = myqbp->qb_next; 2928 i++; 2929 } 2930 mutex_exit(QLOCK(tq)); 2931 2932 if (isevent) { 2933 for (i = tq->q_nband; i; i--) { 2934 if (qbf[i]) { 2935 pollwakeup(&stp->sd_pollist, POLLWRBAND); 2936 mutex_enter(&stp->sd_lock); 2937 if (stp->sd_sigflags & S_WRBAND) 2938 strsendsig(stp->sd_siglist, S_WRBAND, 2939 (uchar_t)i, 0); 2940 mutex_exit(&stp->sd_lock); 2941 } 2942 } 2943 } 2944 2945 releasestr(q); 2946 return (0); 2947 } 2948 2949 /* 2950 * Special case of strcopyin/strcopyout for copying 2951 * struct strioctl that can deal with both data 2952 * models. 2953 */ 2954 2955 #ifdef _LP64 2956 2957 static int 2958 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 2959 { 2960 struct strioctl32 strioc32; 2961 struct strioctl *striocp; 2962 2963 if (copyflag & U_TO_K) { 2964 ASSERT((copyflag & K_TO_K) == 0); 2965 2966 if ((flag & FMODELS) == DATAMODEL_ILP32) { 2967 if (copyin(from, &strioc32, sizeof (strioc32))) 2968 return (EFAULT); 2969 2970 striocp = (struct strioctl *)to; 2971 striocp->ic_cmd = strioc32.ic_cmd; 2972 striocp->ic_timout = strioc32.ic_timout; 2973 striocp->ic_len = strioc32.ic_len; 2974 striocp->ic_dp = (char *)(uintptr_t)strioc32.ic_dp; 2975 2976 } else { /* NATIVE data model */ 2977 if (copyin(from, to, sizeof (struct strioctl))) { 2978 return (EFAULT); 2979 } else { 2980 return (0); 2981 } 2982 } 2983 } else { 2984 ASSERT(copyflag & K_TO_K); 2985 bcopy(from, to, sizeof (struct strioctl)); 2986 } 2987 return (0); 2988 } 2989 2990 static int 2991 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 2992 { 2993 struct strioctl32 strioc32; 2994 struct strioctl *striocp; 2995 2996 if (copyflag & U_TO_K) { 2997 ASSERT((copyflag & K_TO_K) == 0); 2998 2999 if ((flag & FMODELS) == DATAMODEL_ILP32) { 3000 striocp = (struct strioctl *)from; 3001 strioc32.ic_cmd = striocp->ic_cmd; 3002 strioc32.ic_timout = striocp->ic_timout; 3003 strioc32.ic_len = striocp->ic_len; 3004 strioc32.ic_dp = (caddr32_t)(uintptr_t)striocp->ic_dp; 3005 ASSERT((char *)(uintptr_t)strioc32.ic_dp == 3006 striocp->ic_dp); 3007 3008 if (copyout(&strioc32, to, sizeof (strioc32))) 3009 return (EFAULT); 3010 3011 } else { /* NATIVE data model */ 3012 if (copyout(from, to, sizeof (struct strioctl))) { 3013 return (EFAULT); 3014 } else { 3015 return (0); 3016 } 3017 } 3018 } else { 3019 ASSERT(copyflag & K_TO_K); 3020 bcopy(from, to, sizeof (struct strioctl)); 3021 } 3022 return (0); 3023 } 3024 3025 #else /* ! _LP64 */ 3026 3027 /* ARGSUSED2 */ 3028 static int 3029 strcopyin_strioctl(void *from, void *to, int flag, int copyflag) 3030 { 3031 return (strcopyin(from, to, sizeof (struct strioctl), copyflag)); 3032 } 3033 3034 /* ARGSUSED2 */ 3035 static int 3036 strcopyout_strioctl(void *from, void *to, int flag, int copyflag) 3037 { 3038 return (strcopyout(from, to, sizeof (struct strioctl), copyflag)); 3039 } 3040 3041 #endif /* _LP64 */ 3042 3043 /* 3044 * Determine type of job control semantics expected by user. The 3045 * possibilities are: 3046 * JCREAD - Behaves like read() on fd; send SIGTTIN 3047 * JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set 3048 * JCSETP - Sets a value in the stream; send SIGTTOU, ignore TOSTOP 3049 * JCGETP - Gets a value in the stream; no signals. 3050 * See straccess in strsubr.c for usage of these values. 3051 * 3052 * This routine also returns -1 for I_STR as a special case; the 3053 * caller must call again with the real ioctl number for 3054 * classification. 3055 */ 3056 static int 3057 job_control_type(int cmd) 3058 { 3059 switch (cmd) { 3060 case I_STR: 3061 return (-1); 3062 3063 case I_RECVFD: 3064 case I_E_RECVFD: 3065 return (JCREAD); 3066 3067 case I_FDINSERT: 3068 case I_SENDFD: 3069 return (JCWRITE); 3070 3071 case TCSETA: 3072 case TCSETAW: 3073 case TCSETAF: 3074 case TCSBRK: 3075 case TCXONC: 3076 case TCFLSH: 3077 case TCDSET: /* Obsolete */ 3078 case TIOCSWINSZ: 3079 case TCSETS: 3080 case TCSETSW: 3081 case TCSETSF: 3082 case TIOCSETD: 3083 case TIOCHPCL: 3084 case TIOCSETP: 3085 case TIOCSETN: 3086 case TIOCEXCL: 3087 case TIOCNXCL: 3088 case TIOCFLUSH: 3089 case TIOCSETC: 3090 case TIOCLBIS: 3091 case TIOCLBIC: 3092 case TIOCLSET: 3093 case TIOCSBRK: 3094 case TIOCCBRK: 3095 case TIOCSDTR: 3096 case TIOCCDTR: 3097 case TIOCSLTC: 3098 case TIOCSTOP: 3099 case TIOCSTART: 3100 case TIOCSTI: 3101 case TIOCSPGRP: 3102 case TIOCMSET: 3103 case TIOCMBIS: 3104 case TIOCMBIC: 3105 case TIOCREMOTE: 3106 case TIOCSIGNAL: 3107 case LDSETT: 3108 case LDSMAP: /* Obsolete */ 3109 case DIOCSETP: 3110 case I_FLUSH: 3111 case I_SRDOPT: 3112 case I_SETSIG: 3113 case I_SWROPT: 3114 case I_FLUSHBAND: 3115 case I_SETCLTIME: 3116 case I_SERROPT: 3117 case I_ESETSIG: 3118 case FIONBIO: 3119 case FIOASYNC: 3120 case FIOSETOWN: 3121 case JBOOT: /* Obsolete */ 3122 case JTERM: /* Obsolete */ 3123 case JTIMOM: /* Obsolete */ 3124 case JZOMBOOT: /* Obsolete */ 3125 case JAGENT: /* Obsolete */ 3126 case JTRUN: /* Obsolete */ 3127 case JXTPROTO: /* Obsolete */ 3128 case TIOCSETLD: 3129 return (JCSETP); 3130 } 3131 3132 return (JCGETP); 3133 } 3134 3135 /* 3136 * ioctl for streams 3137 */ 3138 int 3139 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, 3140 cred_t *crp, int *rvalp) 3141 { 3142 struct stdata *stp; 3143 struct strioctl strioc; 3144 struct uio uio; 3145 struct iovec iov; 3146 int access; 3147 mblk_t *mp; 3148 int error = 0; 3149 int done = 0; 3150 ssize_t rmin, rmax; 3151 queue_t *wrq; 3152 queue_t *rdq; 3153 boolean_t kioctl = B_FALSE; 3154 3155 if (flag & FKIOCTL) { 3156 copyflag = K_TO_K; 3157 kioctl = B_TRUE; 3158 } 3159 ASSERT(vp->v_stream); 3160 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 3161 stp = vp->v_stream; 3162 3163 TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER, 3164 "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg); 3165 3166 if (audit_active) 3167 audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp); 3168 3169 /* 3170 * If the copy is kernel to kernel, make sure that the FNATIVE 3171 * flag is set. After this it would be a serious error to have 3172 * no model flag. 3173 */ 3174 if (copyflag == K_TO_K) 3175 flag = (flag & ~FMODELS) | FNATIVE; 3176 3177 ASSERT((flag & FMODELS) != 0); 3178 3179 wrq = stp->sd_wrq; 3180 rdq = _RD(wrq); 3181 3182 access = job_control_type(cmd); 3183 3184 /* We should never see these here, should be handled by iwscn */ 3185 if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR) 3186 return (EINVAL); 3187 3188 mutex_enter(&stp->sd_lock); 3189 if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) { 3190 mutex_exit(&stp->sd_lock); 3191 return (error); 3192 } 3193 mutex_exit(&stp->sd_lock); 3194 3195 /* 3196 * Check for sgttyb-related ioctls first, and complain as 3197 * necessary. 3198 */ 3199 switch (cmd) { 3200 case TIOCGETP: 3201 case TIOCSETP: 3202 case TIOCSETN: 3203 if (sgttyb_handling >= 2 && !sgttyb_complaint) { 3204 sgttyb_complaint = B_TRUE; 3205 cmn_err(CE_NOTE, 3206 "application used obsolete TIOC[GS]ET"); 3207 } 3208 if (sgttyb_handling >= 3) { 3209 tsignal(curthread, SIGSYS); 3210 return (EIO); 3211 } 3212 break; 3213 } 3214 3215 mutex_enter(&stp->sd_lock); 3216 3217 switch (cmd) { 3218 case I_RECVFD: 3219 case I_E_RECVFD: 3220 case I_PEEK: 3221 case I_NREAD: 3222 case FIONREAD: 3223 case FIORDCHK: 3224 case I_ATMARK: 3225 case FIONBIO: 3226 case FIOASYNC: 3227 if (stp->sd_flag & (STRDERR|STPLEX)) { 3228 error = strgeterr(stp, STRDERR|STPLEX, 0); 3229 if (error != 0) { 3230 mutex_exit(&stp->sd_lock); 3231 return (error); 3232 } 3233 } 3234 break; 3235 3236 default: 3237 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) { 3238 error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0); 3239 if (error != 0) { 3240 mutex_exit(&stp->sd_lock); 3241 return (error); 3242 } 3243 } 3244 } 3245 3246 mutex_exit(&stp->sd_lock); 3247 3248 switch (cmd) { 3249 default: 3250 /* 3251 * The stream head has hardcoded knowledge of a 3252 * miscellaneous collection of terminal-, keyboard- and 3253 * mouse-related ioctls, enumerated below. This hardcoded 3254 * knowledge allows the stream head to automatically 3255 * convert transparent ioctl requests made by userland 3256 * programs into I_STR ioctls which many old STREAMS 3257 * modules and drivers require. 3258 * 3259 * No new ioctls should ever be added to this list. 3260 * Instead, the STREAMS module or driver should be written 3261 * to either handle transparent ioctls or require any 3262 * userland programs to use I_STR ioctls (by returning 3263 * EINVAL to any transparent ioctl requests). 3264 * 3265 * More importantly, removing ioctls from this list should 3266 * be done with the utmost care, since our STREAMS modules 3267 * and drivers *count* on the stream head performing this 3268 * conversion, and thus may panic while processing 3269 * transparent ioctl request for one of these ioctls (keep 3270 * in mind that third party modules and drivers may have 3271 * similar problems). 3272 */ 3273 if (((cmd & IOCTYPE) == LDIOC) || 3274 ((cmd & IOCTYPE) == tIOC) || 3275 ((cmd & IOCTYPE) == TIOC) || 3276 ((cmd & IOCTYPE) == KIOC) || 3277 ((cmd & IOCTYPE) == MSIOC) || 3278 ((cmd & IOCTYPE) == VUIOC)) { 3279 /* 3280 * The ioctl is a tty ioctl - set up strioc buffer 3281 * and call strdoioctl() to do the work. 3282 */ 3283 if (stp->sd_flag & STRHUP) 3284 return (ENXIO); 3285 strioc.ic_cmd = cmd; 3286 strioc.ic_timout = INFTIM; 3287 3288 switch (cmd) { 3289 3290 case TCXONC: 3291 case TCSBRK: 3292 case TCFLSH: 3293 case TCDSET: 3294 { 3295 int native_arg = (int)arg; 3296 strioc.ic_len = sizeof (int); 3297 strioc.ic_dp = (char *)&native_arg; 3298 return (strdoioctl(stp, &strioc, flag, 3299 K_TO_K, crp, rvalp)); 3300 } 3301 3302 case TCSETA: 3303 case TCSETAW: 3304 case TCSETAF: 3305 strioc.ic_len = sizeof (struct termio); 3306 strioc.ic_dp = (char *)arg; 3307 return (strdoioctl(stp, &strioc, flag, 3308 copyflag, crp, rvalp)); 3309 3310 case TCSETS: 3311 case TCSETSW: 3312 case TCSETSF: 3313 strioc.ic_len = sizeof (struct termios); 3314 strioc.ic_dp = (char *)arg; 3315 return (strdoioctl(stp, &strioc, flag, 3316 copyflag, crp, rvalp)); 3317 3318 case LDSETT: 3319 strioc.ic_len = sizeof (struct termcb); 3320 strioc.ic_dp = (char *)arg; 3321 return (strdoioctl(stp, &strioc, flag, 3322 copyflag, crp, rvalp)); 3323 3324 case TIOCSETP: 3325 strioc.ic_len = sizeof (struct sgttyb); 3326 strioc.ic_dp = (char *)arg; 3327 return (strdoioctl(stp, &strioc, flag, 3328 copyflag, crp, rvalp)); 3329 3330 case TIOCSTI: 3331 if ((flag & FREAD) == 0 && 3332 secpolicy_sti(crp) != 0) { 3333 return (EPERM); 3334 } 3335 mutex_enter(&stp->sd_lock); 3336 mutex_enter(&curproc->p_splock); 3337 if (stp->sd_sidp != curproc->p_sessp->s_sidp && 3338 secpolicy_sti(crp) != 0) { 3339 mutex_exit(&curproc->p_splock); 3340 mutex_exit(&stp->sd_lock); 3341 return (EACCES); 3342 } 3343 mutex_exit(&curproc->p_splock); 3344 mutex_exit(&stp->sd_lock); 3345 3346 strioc.ic_len = sizeof (char); 3347 strioc.ic_dp = (char *)arg; 3348 return (strdoioctl(stp, &strioc, flag, 3349 copyflag, crp, rvalp)); 3350 3351 case TIOCSWINSZ: 3352 strioc.ic_len = sizeof (struct winsize); 3353 strioc.ic_dp = (char *)arg; 3354 return (strdoioctl(stp, &strioc, flag, 3355 copyflag, crp, rvalp)); 3356 3357 case TIOCSSIZE: 3358 strioc.ic_len = sizeof (struct ttysize); 3359 strioc.ic_dp = (char *)arg; 3360 return (strdoioctl(stp, &strioc, flag, 3361 copyflag, crp, rvalp)); 3362 3363 case TIOCSSOFTCAR: 3364 case KIOCTRANS: 3365 case KIOCTRANSABLE: 3366 case KIOCCMD: 3367 case KIOCSDIRECT: 3368 case KIOCSCOMPAT: 3369 case KIOCSKABORTEN: 3370 case KIOCSRPTDELAY: 3371 case KIOCSRPTRATE: 3372 case VUIDSFORMAT: 3373 case TIOCSPPS: 3374 strioc.ic_len = sizeof (int); 3375 strioc.ic_dp = (char *)arg; 3376 return (strdoioctl(stp, &strioc, flag, 3377 copyflag, crp, rvalp)); 3378 3379 case KIOCSETKEY: 3380 case KIOCGETKEY: 3381 strioc.ic_len = sizeof (struct kiockey); 3382 strioc.ic_dp = (char *)arg; 3383 return (strdoioctl(stp, &strioc, flag, 3384 copyflag, crp, rvalp)); 3385 3386 case KIOCSKEY: 3387 case KIOCGKEY: 3388 strioc.ic_len = sizeof (struct kiockeymap); 3389 strioc.ic_dp = (char *)arg; 3390 return (strdoioctl(stp, &strioc, flag, 3391 copyflag, crp, rvalp)); 3392 3393 case KIOCSLED: 3394 /* arg is a pointer to char */ 3395 strioc.ic_len = sizeof (char); 3396 strioc.ic_dp = (char *)arg; 3397 return (strdoioctl(stp, &strioc, flag, 3398 copyflag, crp, rvalp)); 3399 3400 case MSIOSETPARMS: 3401 strioc.ic_len = sizeof (Ms_parms); 3402 strioc.ic_dp = (char *)arg; 3403 return (strdoioctl(stp, &strioc, flag, 3404 copyflag, crp, rvalp)); 3405 3406 case VUIDSADDR: 3407 case VUIDGADDR: 3408 strioc.ic_len = sizeof (struct vuid_addr_probe); 3409 strioc.ic_dp = (char *)arg; 3410 return (strdoioctl(stp, &strioc, flag, 3411 copyflag, crp, rvalp)); 3412 3413 /* 3414 * These M_IOCTL's don't require any data to be sent 3415 * downstream, and the driver will allocate and link 3416 * on its own mblk_t upon M_IOCACK -- thus we set 3417 * ic_len to zero and set ic_dp to arg so we know 3418 * where to copyout to later. 3419 */ 3420 case TIOCGSOFTCAR: 3421 case TIOCGWINSZ: 3422 case TIOCGSIZE: 3423 case KIOCGTRANS: 3424 case KIOCGTRANSABLE: 3425 case KIOCTYPE: 3426 case KIOCGDIRECT: 3427 case KIOCGCOMPAT: 3428 case KIOCLAYOUT: 3429 case KIOCGLED: 3430 case MSIOGETPARMS: 3431 case MSIOBUTTONS: 3432 case VUIDGFORMAT: 3433 case TIOCGPPS: 3434 case TIOCGPPSEV: 3435 case TCGETA: 3436 case TCGETS: 3437 case LDGETT: 3438 case TIOCGETP: 3439 case KIOCGRPTDELAY: 3440 case KIOCGRPTRATE: 3441 strioc.ic_len = 0; 3442 strioc.ic_dp = (char *)arg; 3443 return (strdoioctl(stp, &strioc, flag, 3444 copyflag, crp, rvalp)); 3445 } 3446 } 3447 3448 /* 3449 * Unknown cmd - send it down as a transparent ioctl. 3450 */ 3451 strioc.ic_cmd = cmd; 3452 strioc.ic_timout = INFTIM; 3453 strioc.ic_len = TRANSPARENT; 3454 strioc.ic_dp = (char *)&arg; 3455 3456 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp)); 3457 3458 case I_STR: 3459 /* 3460 * Stream ioctl. Read in an strioctl buffer from the user 3461 * along with any data specified and send it downstream. 3462 * Strdoioctl will wait allow only one ioctl message at 3463 * a time, and waits for the acknowledgement. 3464 */ 3465 3466 if (stp->sd_flag & STRHUP) 3467 return (ENXIO); 3468 3469 error = strcopyin_strioctl((void *)arg, &strioc, flag, 3470 copyflag); 3471 if (error != 0) 3472 return (error); 3473 3474 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1)) 3475 return (EINVAL); 3476 3477 access = job_control_type(strioc.ic_cmd); 3478 mutex_enter(&stp->sd_lock); 3479 if ((access != -1) && 3480 ((error = i_straccess(stp, access)) != 0)) { 3481 mutex_exit(&stp->sd_lock); 3482 return (error); 3483 } 3484 mutex_exit(&stp->sd_lock); 3485 3486 /* 3487 * The I_STR facility provides a trap door for malicious 3488 * code to send down bogus streamio(7I) ioctl commands to 3489 * unsuspecting STREAMS modules and drivers which expect to 3490 * only get these messages from the stream head. 3491 * Explicitly prohibit any streamio ioctls which can be 3492 * passed downstream by the stream head. Note that we do 3493 * not block all streamio ioctls because the ioctl 3494 * numberspace is not well managed and thus it's possible 3495 * that a module or driver's ioctl numbers may accidentally 3496 * collide with them. 3497 */ 3498 switch (strioc.ic_cmd) { 3499 case I_LINK: 3500 case I_PLINK: 3501 case I_UNLINK: 3502 case I_PUNLINK: 3503 case _I_GETPEERCRED: 3504 case _I_PLINK_LH: 3505 return (EINVAL); 3506 } 3507 3508 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp); 3509 if (error == 0) { 3510 error = strcopyout_strioctl(&strioc, (void *)arg, 3511 flag, copyflag); 3512 } 3513 return (error); 3514 3515 case I_NREAD: 3516 /* 3517 * Return number of bytes of data in first message 3518 * in queue in "arg" and return the number of messages 3519 * in queue in return value. 3520 */ 3521 { 3522 size_t size; 3523 int retval; 3524 int count = 0; 3525 3526 mutex_enter(QLOCK(rdq)); 3527 3528 size = msgdsize(rdq->q_first); 3529 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3530 count++; 3531 3532 mutex_exit(QLOCK(rdq)); 3533 if (stp->sd_struiordq) { 3534 infod_t infod; 3535 3536 infod.d_cmd = INFOD_COUNT; 3537 infod.d_count = 0; 3538 if (count == 0) { 3539 infod.d_cmd |= INFOD_FIRSTBYTES; 3540 infod.d_bytes = 0; 3541 } 3542 infod.d_res = 0; 3543 (void) infonext(rdq, &infod); 3544 count += infod.d_count; 3545 if (infod.d_res & INFOD_FIRSTBYTES) 3546 size = infod.d_bytes; 3547 } 3548 3549 /* 3550 * Drop down from size_t to the "int" required by the 3551 * interface. Cap at INT_MAX. 3552 */ 3553 retval = MIN(size, INT_MAX); 3554 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3555 copyflag); 3556 if (!error) 3557 *rvalp = count; 3558 return (error); 3559 } 3560 3561 case FIONREAD: 3562 /* 3563 * Return number of bytes of data in all data messages 3564 * in queue in "arg". 3565 */ 3566 { 3567 size_t size = 0; 3568 int retval; 3569 3570 mutex_enter(QLOCK(rdq)); 3571 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3572 size += msgdsize(mp); 3573 mutex_exit(QLOCK(rdq)); 3574 3575 if (stp->sd_struiordq) { 3576 infod_t infod; 3577 3578 infod.d_cmd = INFOD_BYTES; 3579 infod.d_res = 0; 3580 infod.d_bytes = 0; 3581 (void) infonext(rdq, &infod); 3582 size += infod.d_bytes; 3583 } 3584 3585 /* 3586 * Drop down from size_t to the "int" required by the 3587 * interface. Cap at INT_MAX. 3588 */ 3589 retval = MIN(size, INT_MAX); 3590 error = strcopyout(&retval, (void *)arg, sizeof (retval), 3591 copyflag); 3592 3593 *rvalp = 0; 3594 return (error); 3595 } 3596 case FIORDCHK: 3597 /* 3598 * FIORDCHK does not use arg value (like FIONREAD), 3599 * instead a count is returned. I_NREAD value may 3600 * not be accurate but safe. The real thing to do is 3601 * to add the msgdsizes of all data messages until 3602 * a non-data message. 3603 */ 3604 { 3605 size_t size = 0; 3606 3607 mutex_enter(QLOCK(rdq)); 3608 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 3609 size += msgdsize(mp); 3610 mutex_exit(QLOCK(rdq)); 3611 3612 if (stp->sd_struiordq) { 3613 infod_t infod; 3614 3615 infod.d_cmd = INFOD_BYTES; 3616 infod.d_res = 0; 3617 infod.d_bytes = 0; 3618 (void) infonext(rdq, &infod); 3619 size += infod.d_bytes; 3620 } 3621 3622 /* 3623 * Since ioctl returns an int, and memory sizes under 3624 * LP64 may not fit, we return INT_MAX if the count was 3625 * actually greater. 3626 */ 3627 *rvalp = MIN(size, INT_MAX); 3628 return (0); 3629 } 3630 3631 case I_FIND: 3632 /* 3633 * Get module name. 3634 */ 3635 { 3636 char mname[FMNAMESZ + 1]; 3637 queue_t *q; 3638 3639 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3640 mname, FMNAMESZ + 1, NULL); 3641 if (error) 3642 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3643 3644 /* 3645 * Return EINVAL if we're handed a bogus module name. 3646 */ 3647 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) { 3648 TRACE_0(TR_FAC_STREAMS_FR, 3649 TR_I_CANT_FIND, "couldn't I_FIND"); 3650 return (EINVAL); 3651 } 3652 3653 *rvalp = 0; 3654 3655 /* Look downstream to see if module is there. */ 3656 claimstr(stp->sd_wrq); 3657 for (q = stp->sd_wrq->q_next; q; q = q->q_next) { 3658 if (q->q_flag&QREADR) { 3659 q = NULL; 3660 break; 3661 } 3662 if (strcmp(mname, q->q_qinfo->qi_minfo->mi_idname) == 0) 3663 break; 3664 } 3665 releasestr(stp->sd_wrq); 3666 3667 *rvalp = (q ? 1 : 0); 3668 return (error); 3669 } 3670 3671 case I_PUSH: 3672 case __I_PUSH_NOCTTY: 3673 /* 3674 * Push a module. 3675 * For the case __I_PUSH_NOCTTY push a module but 3676 * do not allocate controlling tty. See bugid 4025044 3677 */ 3678 3679 { 3680 char mname[FMNAMESZ + 1]; 3681 fmodsw_impl_t *fp; 3682 dev_t dummydev; 3683 3684 if (stp->sd_flag & STRHUP) 3685 return (ENXIO); 3686 3687 /* 3688 * Get module name and look up in fmodsw. 3689 */ 3690 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg, 3691 mname, FMNAMESZ + 1, NULL); 3692 if (error) 3693 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3694 3695 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) == 3696 NULL) 3697 return (EINVAL); 3698 3699 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH, 3700 "I_PUSH:fp %p stp %p", fp, stp); 3701 3702 if (error = strstartplumb(stp, flag, cmd)) { 3703 fmodsw_rele(fp); 3704 return (error); 3705 } 3706 3707 /* 3708 * See if any more modules can be pushed on this stream. 3709 * Note that this check must be done after strstartplumb() 3710 * since otherwise multiple threads issuing I_PUSHes on 3711 * the same stream will be able to exceed nstrpush. 3712 */ 3713 mutex_enter(&stp->sd_lock); 3714 if (stp->sd_pushcnt >= nstrpush) { 3715 fmodsw_rele(fp); 3716 strendplumb(stp); 3717 mutex_exit(&stp->sd_lock); 3718 return (EINVAL); 3719 } 3720 mutex_exit(&stp->sd_lock); 3721 3722 /* 3723 * Push new module and call its open routine 3724 * via qattach(). Modules don't change device 3725 * numbers, so just ignore dummydev here. 3726 */ 3727 dummydev = vp->v_rdev; 3728 if ((error = qattach(rdq, &dummydev, 0, crp, fp, 3729 B_FALSE)) == 0) { 3730 if (vp->v_type == VCHR && /* sorry, no pipes allowed */ 3731 (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) { 3732 /* 3733 * try to allocate it as a controlling terminal 3734 */ 3735 (void) strctty(stp); 3736 } 3737 } 3738 3739 mutex_enter(&stp->sd_lock); 3740 3741 /* 3742 * As a performance concern we are caching the values of 3743 * q_minpsz and q_maxpsz of the module below the stream 3744 * head in the stream head. 3745 */ 3746 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 3747 rmin = stp->sd_wrq->q_next->q_minpsz; 3748 rmax = stp->sd_wrq->q_next->q_maxpsz; 3749 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 3750 3751 /* Do this processing here as a performance concern */ 3752 if (strmsgsz != 0) { 3753 if (rmax == INFPSZ) 3754 rmax = strmsgsz; 3755 else { 3756 if (vp->v_type == VFIFO) 3757 rmax = MIN(PIPE_BUF, rmax); 3758 else rmax = MIN(strmsgsz, rmax); 3759 } 3760 } 3761 3762 mutex_enter(QLOCK(wrq)); 3763 stp->sd_qn_minpsz = rmin; 3764 stp->sd_qn_maxpsz = rmax; 3765 mutex_exit(QLOCK(wrq)); 3766 3767 strendplumb(stp); 3768 mutex_exit(&stp->sd_lock); 3769 return (error); 3770 } 3771 3772 case I_POP: 3773 { 3774 queue_t *q; 3775 3776 if (stp->sd_flag & STRHUP) 3777 return (ENXIO); 3778 if (!wrq->q_next) /* for broken pipes */ 3779 return (EINVAL); 3780 3781 if (error = strstartplumb(stp, flag, cmd)) 3782 return (error); 3783 3784 /* 3785 * If there is an anchor on this stream and popping 3786 * the current module would attempt to pop through the 3787 * anchor, then disallow the pop unless we have sufficient 3788 * privileges; take the cheapest (non-locking) check 3789 * first. 3790 */ 3791 if (secpolicy_ip_config(crp, B_TRUE) != 0 || 3792 (stp->sd_anchorzone != crgetzoneid(crp))) { 3793 mutex_enter(&stp->sd_lock); 3794 /* 3795 * Anchors only apply if there's at least one 3796 * module on the stream (sd_pushcnt > 0). 3797 */ 3798 if (stp->sd_pushcnt > 0 && 3799 stp->sd_pushcnt == stp->sd_anchor && 3800 stp->sd_vnode->v_type != VFIFO) { 3801 strendplumb(stp); 3802 mutex_exit(&stp->sd_lock); 3803 if (stp->sd_anchorzone != crgetzoneid(crp)) 3804 return (EINVAL); 3805 /* Audit and report error */ 3806 return (secpolicy_ip_config(crp, B_FALSE)); 3807 } 3808 mutex_exit(&stp->sd_lock); 3809 } 3810 3811 q = wrq->q_next; 3812 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP, 3813 "I_POP:%p from %p", q, stp); 3814 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) { 3815 error = EINVAL; 3816 } else { 3817 qdetach(_RD(q), 1, flag, crp, B_FALSE); 3818 error = 0; 3819 } 3820 mutex_enter(&stp->sd_lock); 3821 3822 /* 3823 * As a performance concern we are caching the values of 3824 * q_minpsz and q_maxpsz of the module below the stream 3825 * head in the stream head. 3826 */ 3827 mutex_enter(QLOCK(wrq->q_next)); 3828 rmin = wrq->q_next->q_minpsz; 3829 rmax = wrq->q_next->q_maxpsz; 3830 mutex_exit(QLOCK(wrq->q_next)); 3831 3832 /* Do this processing here as a performance concern */ 3833 if (strmsgsz != 0) { 3834 if (rmax == INFPSZ) 3835 rmax = strmsgsz; 3836 else { 3837 if (vp->v_type == VFIFO) 3838 rmax = MIN(PIPE_BUF, rmax); 3839 else rmax = MIN(strmsgsz, rmax); 3840 } 3841 } 3842 3843 mutex_enter(QLOCK(wrq)); 3844 stp->sd_qn_minpsz = rmin; 3845 stp->sd_qn_maxpsz = rmax; 3846 mutex_exit(QLOCK(wrq)); 3847 3848 /* If we popped through the anchor, then reset the anchor. */ 3849 if (stp->sd_pushcnt < stp->sd_anchor) { 3850 stp->sd_anchor = 0; 3851 stp->sd_anchorzone = 0; 3852 } 3853 strendplumb(stp); 3854 mutex_exit(&stp->sd_lock); 3855 return (error); 3856 } 3857 3858 case _I_MUXID2FD: 3859 { 3860 /* 3861 * Create a fd for a I_PLINK'ed lower stream with a given 3862 * muxid. With the fd, application can send down ioctls, 3863 * like I_LIST, to the previously I_PLINK'ed stream. Note 3864 * that after getting the fd, the application has to do an 3865 * I_PUNLINK on the muxid before it can do any operation 3866 * on the lower stream. This is required by spec1170. 3867 * 3868 * The fd used to do this ioctl should point to the same 3869 * controlling device used to do the I_PLINK. If it uses 3870 * a different stream or an invalid muxid, I_MUXID2FD will 3871 * fail. The error code is set to EINVAL. 3872 * 3873 * The intended use of this interface is the following. 3874 * An application I_PLINK'ed a stream and exits. The fd 3875 * to the lower stream is gone. Another application 3876 * wants to get a fd to the lower stream, it uses I_MUXID2FD. 3877 */ 3878 int muxid = (int)arg; 3879 int fd; 3880 linkinfo_t *linkp; 3881 struct file *fp; 3882 netstack_t *ns; 3883 str_stack_t *ss; 3884 3885 /* 3886 * Do not allow the wildcard muxid. This ioctl is not 3887 * intended to find arbitrary link. 3888 */ 3889 if (muxid == 0) { 3890 return (EINVAL); 3891 } 3892 3893 ns = netstack_find_by_cred(crp); 3894 ASSERT(ns != NULL); 3895 ss = ns->netstack_str; 3896 ASSERT(ss != NULL); 3897 3898 mutex_enter(&muxifier); 3899 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss); 3900 if (linkp == NULL) { 3901 mutex_exit(&muxifier); 3902 netstack_rele(ss->ss_netstack); 3903 return (EINVAL); 3904 } 3905 3906 if ((fd = ufalloc(0)) == -1) { 3907 mutex_exit(&muxifier); 3908 netstack_rele(ss->ss_netstack); 3909 return (EMFILE); 3910 } 3911 fp = linkp->li_fpdown; 3912 mutex_enter(&fp->f_tlock); 3913 fp->f_count++; 3914 mutex_exit(&fp->f_tlock); 3915 mutex_exit(&muxifier); 3916 setf(fd, fp); 3917 *rvalp = fd; 3918 netstack_rele(ss->ss_netstack); 3919 return (0); 3920 } 3921 3922 case _I_INSERT: 3923 { 3924 /* 3925 * To insert a module to a given position in a stream. 3926 * In the first release, only allow privileged user 3927 * to use this ioctl. Furthermore, the insert is only allowed 3928 * below an anchor if the zoneid is the same as the zoneid 3929 * which created the anchor. 3930 * 3931 * Note that we do not plan to support this ioctl 3932 * on pipes in the first release. We want to learn more 3933 * about the implications of these ioctls before extending 3934 * their support. And we do not think these features are 3935 * valuable for pipes. 3936 * 3937 * Neither do we support O/C hot stream. Note that only 3938 * the upper streams of TCP/IP stack are O/C hot streams. 3939 * The lower IP stream is not. 3940 * When there is a O/C cold barrier, we only allow inserts 3941 * above the barrier. 3942 */ 3943 STRUCT_DECL(strmodconf, strmodinsert); 3944 char mod_name[FMNAMESZ + 1]; 3945 fmodsw_impl_t *fp; 3946 dev_t dummydev; 3947 queue_t *tmp_wrq; 3948 int pos; 3949 boolean_t is_insert; 3950 3951 STRUCT_INIT(strmodinsert, flag); 3952 if (stp->sd_flag & STRHUP) 3953 return (ENXIO); 3954 if (STRMATED(stp)) 3955 return (EINVAL); 3956 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 3957 return (error); 3958 if (stp->sd_anchor != 0 && 3959 stp->sd_anchorzone != crgetzoneid(crp)) 3960 return (EINVAL); 3961 3962 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert), 3963 STRUCT_SIZE(strmodinsert), copyflag); 3964 if (error) 3965 return (error); 3966 3967 /* 3968 * Get module name and look up in fmodsw. 3969 */ 3970 error = (copyflag & U_TO_K ? copyinstr : 3971 copystr)(STRUCT_FGETP(strmodinsert, mod_name), 3972 mod_name, FMNAMESZ + 1, NULL); 3973 if (error) 3974 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 3975 3976 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) == 3977 NULL) 3978 return (EINVAL); 3979 3980 if (error = strstartplumb(stp, flag, cmd)) { 3981 fmodsw_rele(fp); 3982 return (error); 3983 } 3984 3985 /* 3986 * Is this _I_INSERT just like an I_PUSH? We need to know 3987 * this because we do some optimizations if this is a 3988 * module being pushed. 3989 */ 3990 pos = STRUCT_FGET(strmodinsert, pos); 3991 is_insert = (pos != 0); 3992 3993 /* 3994 * Make sure pos is valid. Even though it is not an I_PUSH, 3995 * we impose the same limit on the number of modules in a 3996 * stream. 3997 */ 3998 mutex_enter(&stp->sd_lock); 3999 if (stp->sd_pushcnt >= nstrpush || pos < 0 || 4000 pos > stp->sd_pushcnt) { 4001 fmodsw_rele(fp); 4002 strendplumb(stp); 4003 mutex_exit(&stp->sd_lock); 4004 return (EINVAL); 4005 } 4006 if (stp->sd_anchor != 0) { 4007 /* 4008 * Is this insert below the anchor? 4009 * Pushcnt hasn't been increased yet hence 4010 * we test for greater than here, and greater or 4011 * equal after qattach. 4012 */ 4013 if (pos > (stp->sd_pushcnt - stp->sd_anchor) && 4014 stp->sd_anchorzone != crgetzoneid(crp)) { 4015 fmodsw_rele(fp); 4016 strendplumb(stp); 4017 mutex_exit(&stp->sd_lock); 4018 return (EPERM); 4019 } 4020 } 4021 4022 mutex_exit(&stp->sd_lock); 4023 4024 /* 4025 * First find the correct position this module to 4026 * be inserted. We don't need to call claimstr() 4027 * as the stream should not be changing at this point. 4028 * 4029 * Insert new module and call its open routine 4030 * via qattach(). Modules don't change device 4031 * numbers, so just ignore dummydev here. 4032 */ 4033 for (tmp_wrq = stp->sd_wrq; pos > 0; 4034 tmp_wrq = tmp_wrq->q_next, pos--) { 4035 ASSERT(SAMESTR(tmp_wrq)); 4036 } 4037 dummydev = vp->v_rdev; 4038 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp, 4039 fp, is_insert)) != 0) { 4040 mutex_enter(&stp->sd_lock); 4041 strendplumb(stp); 4042 mutex_exit(&stp->sd_lock); 4043 return (error); 4044 } 4045 4046 mutex_enter(&stp->sd_lock); 4047 4048 /* 4049 * As a performance concern we are caching the values of 4050 * q_minpsz and q_maxpsz of the module below the stream 4051 * head in the stream head. 4052 */ 4053 if (!is_insert) { 4054 mutex_enter(QLOCK(stp->sd_wrq->q_next)); 4055 rmin = stp->sd_wrq->q_next->q_minpsz; 4056 rmax = stp->sd_wrq->q_next->q_maxpsz; 4057 mutex_exit(QLOCK(stp->sd_wrq->q_next)); 4058 4059 /* Do this processing here as a performance concern */ 4060 if (strmsgsz != 0) { 4061 if (rmax == INFPSZ) { 4062 rmax = strmsgsz; 4063 } else { 4064 rmax = MIN(strmsgsz, rmax); 4065 } 4066 } 4067 4068 mutex_enter(QLOCK(wrq)); 4069 stp->sd_qn_minpsz = rmin; 4070 stp->sd_qn_maxpsz = rmax; 4071 mutex_exit(QLOCK(wrq)); 4072 } 4073 4074 /* 4075 * Need to update the anchor value if this module is 4076 * inserted below the anchor point. 4077 */ 4078 if (stp->sd_anchor != 0) { 4079 pos = STRUCT_FGET(strmodinsert, pos); 4080 if (pos >= (stp->sd_pushcnt - stp->sd_anchor)) 4081 stp->sd_anchor++; 4082 } 4083 4084 strendplumb(stp); 4085 mutex_exit(&stp->sd_lock); 4086 return (0); 4087 } 4088 4089 case _I_REMOVE: 4090 { 4091 /* 4092 * To remove a module with a given name in a stream. The 4093 * caller of this ioctl needs to provide both the name and 4094 * the position of the module to be removed. This eliminates 4095 * the ambiguity of removal if a module is inserted/pushed 4096 * multiple times in a stream. In the first release, only 4097 * allow privileged user to use this ioctl. 4098 * Furthermore, the remove is only allowed 4099 * below an anchor if the zoneid is the same as the zoneid 4100 * which created the anchor. 4101 * 4102 * Note that we do not plan to support this ioctl 4103 * on pipes in the first release. We want to learn more 4104 * about the implications of these ioctls before extending 4105 * their support. And we do not think these features are 4106 * valuable for pipes. 4107 * 4108 * Neither do we support O/C hot stream. Note that only 4109 * the upper streams of TCP/IP stack are O/C hot streams. 4110 * The lower IP stream is not. 4111 * When there is a O/C cold barrier we do not allow removal 4112 * below the barrier. 4113 * 4114 * Also note that _I_REMOVE cannot be used to remove a 4115 * driver or the stream head. 4116 */ 4117 STRUCT_DECL(strmodconf, strmodremove); 4118 queue_t *q; 4119 int pos; 4120 char mod_name[FMNAMESZ + 1]; 4121 boolean_t is_remove; 4122 4123 STRUCT_INIT(strmodremove, flag); 4124 if (stp->sd_flag & STRHUP) 4125 return (ENXIO); 4126 if (STRMATED(stp)) 4127 return (EINVAL); 4128 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) 4129 return (error); 4130 if (stp->sd_anchor != 0 && 4131 stp->sd_anchorzone != crgetzoneid(crp)) 4132 return (EINVAL); 4133 4134 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove), 4135 STRUCT_SIZE(strmodremove), copyflag); 4136 if (error) 4137 return (error); 4138 4139 error = (copyflag & U_TO_K ? copyinstr : 4140 copystr)(STRUCT_FGETP(strmodremove, mod_name), 4141 mod_name, FMNAMESZ + 1, NULL); 4142 if (error) 4143 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT); 4144 4145 if ((error = strstartplumb(stp, flag, cmd)) != 0) 4146 return (error); 4147 4148 /* 4149 * Match the name of given module to the name of module at 4150 * the given position. 4151 */ 4152 pos = STRUCT_FGET(strmodremove, pos); 4153 4154 is_remove = (pos != 0); 4155 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0; 4156 q = q->q_next, pos--) 4157 ; 4158 if (pos > 0 || ! SAMESTR(q) || 4159 strncmp(q->q_qinfo->qi_minfo->mi_idname, mod_name, 4160 strlen(q->q_qinfo->qi_minfo->mi_idname)) != 0) { 4161 mutex_enter(&stp->sd_lock); 4162 strendplumb(stp); 4163 mutex_exit(&stp->sd_lock); 4164 return (EINVAL); 4165 } 4166 4167 /* 4168 * If the position is at or below an anchor, then the zoneid 4169 * must match the zoneid that created the anchor. 4170 */ 4171 if (stp->sd_anchor != 0) { 4172 pos = STRUCT_FGET(strmodremove, pos); 4173 if (pos >= (stp->sd_pushcnt - stp->sd_anchor) && 4174 stp->sd_anchorzone != crgetzoneid(crp)) { 4175 mutex_enter(&stp->sd_lock); 4176 strendplumb(stp); 4177 mutex_exit(&stp->sd_lock); 4178 return (EPERM); 4179 } 4180 } 4181 4182 4183 ASSERT(!(q->q_flag & QREADR)); 4184 qdetach(_RD(q), 1, flag, crp, is_remove); 4185 4186 mutex_enter(&stp->sd_lock); 4187 4188 /* 4189 * As a performance concern we are caching the values of 4190 * q_minpsz and q_maxpsz of the module below the stream 4191 * head in the stream head. 4192 */ 4193 if (!is_remove) { 4194 mutex_enter(QLOCK(wrq->q_next)); 4195 rmin = wrq->q_next->q_minpsz; 4196 rmax = wrq->q_next->q_maxpsz; 4197 mutex_exit(QLOCK(wrq->q_next)); 4198 4199 /* Do this processing here as a performance concern */ 4200 if (strmsgsz != 0) { 4201 if (rmax == INFPSZ) 4202 rmax = strmsgsz; 4203 else { 4204 if (vp->v_type == VFIFO) 4205 rmax = MIN(PIPE_BUF, rmax); 4206 else rmax = MIN(strmsgsz, rmax); 4207 } 4208 } 4209 4210 mutex_enter(QLOCK(wrq)); 4211 stp->sd_qn_minpsz = rmin; 4212 stp->sd_qn_maxpsz = rmax; 4213 mutex_exit(QLOCK(wrq)); 4214 } 4215 4216 /* 4217 * Need to update the anchor value if this module is removed 4218 * at or below the anchor point. If the removed module is at 4219 * the anchor point, remove the anchor for this stream if 4220 * there is no module above the anchor point. Otherwise, if 4221 * the removed module is below the anchor point, decrement the 4222 * anchor point by 1. 4223 */ 4224 if (stp->sd_anchor != 0) { 4225 pos = STRUCT_FGET(strmodremove, pos); 4226 if (pos == stp->sd_pushcnt - stp->sd_anchor + 1) 4227 stp->sd_anchor = 0; 4228 else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1)) 4229 stp->sd_anchor--; 4230 } 4231 4232 strendplumb(stp); 4233 mutex_exit(&stp->sd_lock); 4234 return (0); 4235 } 4236 4237 case I_ANCHOR: 4238 /* 4239 * Set the anchor position on the stream to reside at 4240 * the top module (in other words, the top module 4241 * cannot be popped). Anchors with a FIFO make no 4242 * obvious sense, so they're not allowed. 4243 */ 4244 mutex_enter(&stp->sd_lock); 4245 4246 if (stp->sd_vnode->v_type == VFIFO) { 4247 mutex_exit(&stp->sd_lock); 4248 return (EINVAL); 4249 } 4250 /* Only allow the same zoneid to update the anchor */ 4251 if (stp->sd_anchor != 0 && 4252 stp->sd_anchorzone != crgetzoneid(crp)) { 4253 mutex_exit(&stp->sd_lock); 4254 return (EINVAL); 4255 } 4256 stp->sd_anchor = stp->sd_pushcnt; 4257 stp->sd_anchorzone = crgetzoneid(crp); 4258 mutex_exit(&stp->sd_lock); 4259 return (0); 4260 4261 case I_LOOK: 4262 /* 4263 * Get name of first module downstream. 4264 * If no module, return an error. 4265 */ 4266 { 4267 claimstr(wrq); 4268 if (_SAMESTR(wrq) && wrq->q_next->q_next) { 4269 char *name = wrq->q_next->q_qinfo->qi_minfo->mi_idname; 4270 error = strcopyout(name, (void *)arg, strlen(name) + 1, 4271 copyflag); 4272 releasestr(wrq); 4273 return (error); 4274 } 4275 releasestr(wrq); 4276 return (EINVAL); 4277 } 4278 4279 case I_LINK: 4280 case I_PLINK: 4281 /* 4282 * Link a multiplexor. 4283 */ 4284 error = mlink(vp, cmd, (int)arg, crp, rvalp, 0); 4285 return (error); 4286 4287 case _I_PLINK_LH: 4288 /* 4289 * Link a multiplexor: Call must originate from kernel. 4290 */ 4291 if (kioctl) 4292 return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp)); 4293 4294 return (EINVAL); 4295 case I_UNLINK: 4296 case I_PUNLINK: 4297 /* 4298 * Unlink a multiplexor. 4299 * If arg is -1, unlink all links for which this is the 4300 * controlling stream. Otherwise, arg is an index number 4301 * for a link to be removed. 4302 */ 4303 { 4304 struct linkinfo *linkp; 4305 int native_arg = (int)arg; 4306 int type; 4307 netstack_t *ns; 4308 str_stack_t *ss; 4309 4310 TRACE_1(TR_FAC_STREAMS_FR, 4311 TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp); 4312 if (vp->v_type == VFIFO) { 4313 return (EINVAL); 4314 } 4315 if (cmd == I_UNLINK) 4316 type = LINKNORMAL; 4317 else /* I_PUNLINK */ 4318 type = LINKPERSIST; 4319 if (native_arg == 0) { 4320 return (EINVAL); 4321 } 4322 ns = netstack_find_by_cred(crp); 4323 ASSERT(ns != NULL); 4324 ss = ns->netstack_str; 4325 ASSERT(ss != NULL); 4326 4327 if (native_arg == MUXID_ALL) 4328 error = munlinkall(stp, type, crp, rvalp, ss); 4329 else { 4330 mutex_enter(&muxifier); 4331 if (!(linkp = findlinks(stp, (int)arg, type, ss))) { 4332 /* invalid user supplied index number */ 4333 mutex_exit(&muxifier); 4334 netstack_rele(ss->ss_netstack); 4335 return (EINVAL); 4336 } 4337 /* munlink drops the muxifier lock */ 4338 error = munlink(stp, linkp, type, crp, rvalp, ss); 4339 } 4340 netstack_rele(ss->ss_netstack); 4341 return (error); 4342 } 4343 4344 case I_FLUSH: 4345 /* 4346 * send a flush message downstream 4347 * flush message can indicate 4348 * FLUSHR - flush read queue 4349 * FLUSHW - flush write queue 4350 * FLUSHRW - flush read/write queue 4351 */ 4352 if (stp->sd_flag & STRHUP) 4353 return (ENXIO); 4354 if (arg & ~FLUSHRW) 4355 return (EINVAL); 4356 4357 for (;;) { 4358 if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) { 4359 break; 4360 } 4361 if (error = strwaitbuf(1, BPRI_HI)) { 4362 return (error); 4363 } 4364 } 4365 4366 /* 4367 * Send down an unsupported ioctl and wait for the nack 4368 * in order to allow the M_FLUSH to propagate back 4369 * up to the stream head. 4370 * Replaces if (qready()) runqueues(); 4371 */ 4372 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4373 strioc.ic_timout = 0; 4374 strioc.ic_len = 0; 4375 strioc.ic_dp = NULL; 4376 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4377 *rvalp = 0; 4378 return (0); 4379 4380 case I_FLUSHBAND: 4381 { 4382 struct bandinfo binfo; 4383 4384 error = strcopyin((void *)arg, &binfo, sizeof (binfo), 4385 copyflag); 4386 if (error) 4387 return (error); 4388 if (stp->sd_flag & STRHUP) 4389 return (ENXIO); 4390 if (binfo.bi_flag & ~FLUSHRW) 4391 return (EINVAL); 4392 while (!(mp = allocb(2, BPRI_HI))) { 4393 if (error = strwaitbuf(2, BPRI_HI)) 4394 return (error); 4395 } 4396 mp->b_datap->db_type = M_FLUSH; 4397 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND; 4398 *mp->b_wptr++ = binfo.bi_pri; 4399 putnext(stp->sd_wrq, mp); 4400 /* 4401 * Send down an unsupported ioctl and wait for the nack 4402 * in order to allow the M_FLUSH to propagate back 4403 * up to the stream head. 4404 * Replaces if (qready()) runqueues(); 4405 */ 4406 strioc.ic_cmd = -1; /* The unsupported ioctl */ 4407 strioc.ic_timout = 0; 4408 strioc.ic_len = 0; 4409 strioc.ic_dp = NULL; 4410 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp); 4411 *rvalp = 0; 4412 return (0); 4413 } 4414 4415 case I_SRDOPT: 4416 /* 4417 * Set read options 4418 * 4419 * RNORM - default stream mode 4420 * RMSGN - message no discard 4421 * RMSGD - message discard 4422 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs 4423 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs 4424 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs 4425 */ 4426 if (arg & ~(RMODEMASK | RPROTMASK)) 4427 return (EINVAL); 4428 4429 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN)) 4430 return (EINVAL); 4431 4432 mutex_enter(&stp->sd_lock); 4433 switch (arg & RMODEMASK) { 4434 case RNORM: 4435 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS); 4436 break; 4437 case RMSGD: 4438 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) | 4439 RD_MSGDIS; 4440 break; 4441 case RMSGN: 4442 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) | 4443 RD_MSGNODIS; 4444 break; 4445 } 4446 4447 switch (arg & RPROTMASK) { 4448 case RPROTNORM: 4449 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS); 4450 break; 4451 4452 case RPROTDAT: 4453 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) | 4454 RD_PROTDAT); 4455 break; 4456 4457 case RPROTDIS: 4458 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) | 4459 RD_PROTDIS); 4460 break; 4461 } 4462 mutex_exit(&stp->sd_lock); 4463 return (0); 4464 4465 case I_GRDOPT: 4466 /* 4467 * Get read option and return the value 4468 * to spot pointed to by arg 4469 */ 4470 { 4471 int rdopt; 4472 4473 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD : 4474 ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM)); 4475 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT : 4476 ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM)); 4477 4478 return (strcopyout(&rdopt, (void *)arg, sizeof (int), 4479 copyflag)); 4480 } 4481 4482 case I_SERROPT: 4483 /* 4484 * Set error options 4485 * 4486 * RERRNORM - persistent read errors 4487 * RERRNONPERSIST - non-persistent read errors 4488 * WERRNORM - persistent write errors 4489 * WERRNONPERSIST - non-persistent write errors 4490 */ 4491 if (arg & ~(RERRMASK | WERRMASK)) 4492 return (EINVAL); 4493 4494 mutex_enter(&stp->sd_lock); 4495 switch (arg & RERRMASK) { 4496 case RERRNORM: 4497 stp->sd_flag &= ~STRDERRNONPERSIST; 4498 break; 4499 case RERRNONPERSIST: 4500 stp->sd_flag |= STRDERRNONPERSIST; 4501 break; 4502 } 4503 switch (arg & WERRMASK) { 4504 case WERRNORM: 4505 stp->sd_flag &= ~STWRERRNONPERSIST; 4506 break; 4507 case WERRNONPERSIST: 4508 stp->sd_flag |= STWRERRNONPERSIST; 4509 break; 4510 } 4511 mutex_exit(&stp->sd_lock); 4512 return (0); 4513 4514 case I_GERROPT: 4515 /* 4516 * Get error option and return the value 4517 * to spot pointed to by arg 4518 */ 4519 { 4520 int erropt = 0; 4521 4522 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST : 4523 RERRNORM; 4524 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST : 4525 WERRNORM; 4526 return (strcopyout(&erropt, (void *)arg, sizeof (int), 4527 copyflag)); 4528 } 4529 4530 case I_SETSIG: 4531 /* 4532 * Register the calling proc to receive the SIGPOLL 4533 * signal based on the events given in arg. If 4534 * arg is zero, remove the proc from register list. 4535 */ 4536 { 4537 strsig_t *ssp, *pssp; 4538 struct pid *pidp; 4539 4540 pssp = NULL; 4541 pidp = curproc->p_pidp; 4542 /* 4543 * Hold sd_lock to prevent traversal of sd_siglist while 4544 * it is modified. 4545 */ 4546 mutex_enter(&stp->sd_lock); 4547 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp); 4548 pssp = ssp, ssp = ssp->ss_next) 4549 ; 4550 4551 if (arg) { 4552 if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4553 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4554 mutex_exit(&stp->sd_lock); 4555 return (EINVAL); 4556 } 4557 if ((arg & S_BANDURG) && !(arg & S_RDBAND)) { 4558 mutex_exit(&stp->sd_lock); 4559 return (EINVAL); 4560 } 4561 4562 /* 4563 * If proc not already registered, add it 4564 * to list. 4565 */ 4566 if (!ssp) { 4567 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4568 ssp->ss_pidp = pidp; 4569 ssp->ss_pid = pidp->pid_id; 4570 ssp->ss_next = NULL; 4571 if (pssp) 4572 pssp->ss_next = ssp; 4573 else 4574 stp->sd_siglist = ssp; 4575 mutex_enter(&pidlock); 4576 PID_HOLD(pidp); 4577 mutex_exit(&pidlock); 4578 } 4579 4580 /* 4581 * Set events. 4582 */ 4583 ssp->ss_events = (int)arg; 4584 } else { 4585 /* 4586 * Remove proc from register list. 4587 */ 4588 if (ssp) { 4589 mutex_enter(&pidlock); 4590 PID_RELE(pidp); 4591 mutex_exit(&pidlock); 4592 if (pssp) 4593 pssp->ss_next = ssp->ss_next; 4594 else 4595 stp->sd_siglist = ssp->ss_next; 4596 kmem_free(ssp, sizeof (strsig_t)); 4597 } else { 4598 mutex_exit(&stp->sd_lock); 4599 return (EINVAL); 4600 } 4601 } 4602 4603 /* 4604 * Recalculate OR of sig events. 4605 */ 4606 stp->sd_sigflags = 0; 4607 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4608 stp->sd_sigflags |= ssp->ss_events; 4609 mutex_exit(&stp->sd_lock); 4610 return (0); 4611 } 4612 4613 case I_GETSIG: 4614 /* 4615 * Return (in arg) the current registration of events 4616 * for which the calling proc is to be signaled. 4617 */ 4618 { 4619 struct strsig *ssp; 4620 struct pid *pidp; 4621 4622 pidp = curproc->p_pidp; 4623 mutex_enter(&stp->sd_lock); 4624 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4625 if (ssp->ss_pidp == pidp) { 4626 error = strcopyout(&ssp->ss_events, (void *)arg, 4627 sizeof (int), copyflag); 4628 mutex_exit(&stp->sd_lock); 4629 return (error); 4630 } 4631 mutex_exit(&stp->sd_lock); 4632 return (EINVAL); 4633 } 4634 4635 case I_ESETSIG: 4636 /* 4637 * Register the ss_pid to receive the SIGPOLL 4638 * signal based on the events is ss_events arg. If 4639 * ss_events is zero, remove the proc from register list. 4640 */ 4641 { 4642 struct strsig *ssp, *pssp; 4643 struct proc *proc; 4644 struct pid *pidp; 4645 pid_t pid; 4646 struct strsigset ss; 4647 4648 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4649 if (error) 4650 return (error); 4651 4652 pid = ss.ss_pid; 4653 4654 if (ss.ss_events != 0) { 4655 /* 4656 * Permissions check by sending signal 0. 4657 * Note that when kill fails it does a set_errno 4658 * causing the system call to fail. 4659 */ 4660 error = kill(pid, 0); 4661 if (error) { 4662 return (error); 4663 } 4664 } 4665 mutex_enter(&pidlock); 4666 if (pid == 0) 4667 proc = curproc; 4668 else if (pid < 0) 4669 proc = pgfind(-pid); 4670 else 4671 proc = prfind(pid); 4672 if (proc == NULL) { 4673 mutex_exit(&pidlock); 4674 return (ESRCH); 4675 } 4676 if (pid < 0) 4677 pidp = proc->p_pgidp; 4678 else 4679 pidp = proc->p_pidp; 4680 ASSERT(pidp); 4681 /* 4682 * Get a hold on the pid structure while referencing it. 4683 * There is a separate PID_HOLD should it be inserted 4684 * in the list below. 4685 */ 4686 PID_HOLD(pidp); 4687 mutex_exit(&pidlock); 4688 4689 pssp = NULL; 4690 /* 4691 * Hold sd_lock to prevent traversal of sd_siglist while 4692 * it is modified. 4693 */ 4694 mutex_enter(&stp->sd_lock); 4695 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid); 4696 pssp = ssp, ssp = ssp->ss_next) 4697 ; 4698 4699 if (ss.ss_events) { 4700 if (ss.ss_events & 4701 ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR| 4702 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) { 4703 mutex_exit(&stp->sd_lock); 4704 mutex_enter(&pidlock); 4705 PID_RELE(pidp); 4706 mutex_exit(&pidlock); 4707 return (EINVAL); 4708 } 4709 if ((ss.ss_events & S_BANDURG) && 4710 !(ss.ss_events & S_RDBAND)) { 4711 mutex_exit(&stp->sd_lock); 4712 mutex_enter(&pidlock); 4713 PID_RELE(pidp); 4714 mutex_exit(&pidlock); 4715 return (EINVAL); 4716 } 4717 4718 /* 4719 * If proc not already registered, add it 4720 * to list. 4721 */ 4722 if (!ssp) { 4723 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP); 4724 ssp->ss_pidp = pidp; 4725 ssp->ss_pid = pid; 4726 ssp->ss_next = NULL; 4727 if (pssp) 4728 pssp->ss_next = ssp; 4729 else 4730 stp->sd_siglist = ssp; 4731 mutex_enter(&pidlock); 4732 PID_HOLD(pidp); 4733 mutex_exit(&pidlock); 4734 } 4735 4736 /* 4737 * Set events. 4738 */ 4739 ssp->ss_events = ss.ss_events; 4740 } else { 4741 /* 4742 * Remove proc from register list. 4743 */ 4744 if (ssp) { 4745 mutex_enter(&pidlock); 4746 PID_RELE(pidp); 4747 mutex_exit(&pidlock); 4748 if (pssp) 4749 pssp->ss_next = ssp->ss_next; 4750 else 4751 stp->sd_siglist = ssp->ss_next; 4752 kmem_free(ssp, sizeof (strsig_t)); 4753 } else { 4754 mutex_exit(&stp->sd_lock); 4755 mutex_enter(&pidlock); 4756 PID_RELE(pidp); 4757 mutex_exit(&pidlock); 4758 return (EINVAL); 4759 } 4760 } 4761 4762 /* 4763 * Recalculate OR of sig events. 4764 */ 4765 stp->sd_sigflags = 0; 4766 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4767 stp->sd_sigflags |= ssp->ss_events; 4768 mutex_exit(&stp->sd_lock); 4769 mutex_enter(&pidlock); 4770 PID_RELE(pidp); 4771 mutex_exit(&pidlock); 4772 return (0); 4773 } 4774 4775 case I_EGETSIG: 4776 /* 4777 * Return (in arg) the current registration of events 4778 * for which the calling proc is to be signaled. 4779 */ 4780 { 4781 struct strsig *ssp; 4782 struct proc *proc; 4783 pid_t pid; 4784 struct pid *pidp; 4785 struct strsigset ss; 4786 4787 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag); 4788 if (error) 4789 return (error); 4790 4791 pid = ss.ss_pid; 4792 mutex_enter(&pidlock); 4793 if (pid == 0) 4794 proc = curproc; 4795 else if (pid < 0) 4796 proc = pgfind(-pid); 4797 else 4798 proc = prfind(pid); 4799 if (proc == NULL) { 4800 mutex_exit(&pidlock); 4801 return (ESRCH); 4802 } 4803 if (pid < 0) 4804 pidp = proc->p_pgidp; 4805 else 4806 pidp = proc->p_pidp; 4807 4808 /* Prevent the pidp from being reassigned */ 4809 PID_HOLD(pidp); 4810 mutex_exit(&pidlock); 4811 4812 mutex_enter(&stp->sd_lock); 4813 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 4814 if (ssp->ss_pid == pid) { 4815 ss.ss_pid = ssp->ss_pid; 4816 ss.ss_events = ssp->ss_events; 4817 error = strcopyout(&ss, (void *)arg, 4818 sizeof (struct strsigset), copyflag); 4819 mutex_exit(&stp->sd_lock); 4820 mutex_enter(&pidlock); 4821 PID_RELE(pidp); 4822 mutex_exit(&pidlock); 4823 return (error); 4824 } 4825 mutex_exit(&stp->sd_lock); 4826 mutex_enter(&pidlock); 4827 PID_RELE(pidp); 4828 mutex_exit(&pidlock); 4829 return (EINVAL); 4830 } 4831 4832 case I_PEEK: 4833 { 4834 STRUCT_DECL(strpeek, strpeek); 4835 size_t n; 4836 mblk_t *fmp, *tmp_mp = NULL; 4837 4838 STRUCT_INIT(strpeek, flag); 4839 4840 error = strcopyin((void *)arg, STRUCT_BUF(strpeek), 4841 STRUCT_SIZE(strpeek), copyflag); 4842 if (error) 4843 return (error); 4844 4845 mutex_enter(QLOCK(rdq)); 4846 /* 4847 * Skip the invalid messages 4848 */ 4849 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next) 4850 if (mp->b_datap->db_type != M_SIG) 4851 break; 4852 4853 /* 4854 * If user has requested to peek at a high priority message 4855 * and first message is not, return 0 4856 */ 4857 if (mp != NULL) { 4858 if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) && 4859 queclass(mp) == QNORM) { 4860 *rvalp = 0; 4861 mutex_exit(QLOCK(rdq)); 4862 return (0); 4863 } 4864 } else if (stp->sd_struiordq == NULL || 4865 (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) { 4866 /* 4867 * No mblks to look at at the streamhead and 4868 * 1). This isn't a synch stream or 4869 * 2). This is a synch stream but caller wants high 4870 * priority messages which is not supported by 4871 * the synch stream. (it only supports QNORM) 4872 */ 4873 *rvalp = 0; 4874 mutex_exit(QLOCK(rdq)); 4875 return (0); 4876 } 4877 4878 fmp = mp; 4879 4880 if (mp && mp->b_datap->db_type == M_PASSFP) { 4881 mutex_exit(QLOCK(rdq)); 4882 return (EBADMSG); 4883 } 4884 4885 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO || 4886 mp->b_datap->db_type == M_PROTO || 4887 mp->b_datap->db_type == M_DATA); 4888 4889 if (mp && mp->b_datap->db_type == M_PCPROTO) { 4890 STRUCT_FSET(strpeek, flags, RS_HIPRI); 4891 } else { 4892 STRUCT_FSET(strpeek, flags, 0); 4893 } 4894 4895 4896 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) { 4897 mutex_exit(QLOCK(rdq)); 4898 return (ENOSR); 4899 } 4900 mutex_exit(QLOCK(rdq)); 4901 4902 /* 4903 * set mp = tmp_mp, so that I_PEEK processing can continue. 4904 * tmp_mp is used to free the dup'd message. 4905 */ 4906 mp = tmp_mp; 4907 4908 uio.uio_fmode = 0; 4909 uio.uio_extflg = UIO_COPY_CACHED; 4910 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 4911 UIO_SYSSPACE; 4912 uio.uio_limit = 0; 4913 /* 4914 * First process PROTO blocks, if any. 4915 * If user doesn't want to get ctl info by setting maxlen <= 0, 4916 * then set len to -1/0 and skip control blocks part. 4917 */ 4918 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0) 4919 STRUCT_FSET(strpeek, ctlbuf.len, -1); 4920 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0) 4921 STRUCT_FSET(strpeek, ctlbuf.len, 0); 4922 else { 4923 int ctl_part = 0; 4924 4925 iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf); 4926 iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen); 4927 uio.uio_iov = &iov; 4928 uio.uio_resid = iov.iov_len; 4929 uio.uio_loffset = 0; 4930 uio.uio_iovcnt = 1; 4931 while (mp && mp->b_datap->db_type != M_DATA && 4932 uio.uio_resid >= 0) { 4933 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ? 4934 mp->b_datap->db_type == M_PROTO : 4935 mp->b_datap->db_type == M_PCPROTO); 4936 4937 if ((n = MIN(uio.uio_resid, 4938 mp->b_wptr - mp->b_rptr)) != 0 && 4939 (error = uiomove((char *)mp->b_rptr, n, 4940 UIO_READ, &uio)) != 0) { 4941 freemsg(tmp_mp); 4942 return (error); 4943 } 4944 ctl_part = 1; 4945 mp = mp->b_cont; 4946 } 4947 /* No ctl message */ 4948 if (ctl_part == 0) 4949 STRUCT_FSET(strpeek, ctlbuf.len, -1); 4950 else 4951 STRUCT_FSET(strpeek, ctlbuf.len, 4952 STRUCT_FGET(strpeek, ctlbuf.maxlen) - 4953 uio.uio_resid); 4954 } 4955 4956 /* 4957 * Now process DATA blocks, if any. 4958 * If user doesn't want to get data info by setting maxlen <= 0, 4959 * then set len to -1/0 and skip data blocks part. 4960 */ 4961 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0) 4962 STRUCT_FSET(strpeek, databuf.len, -1); 4963 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0) 4964 STRUCT_FSET(strpeek, databuf.len, 0); 4965 else { 4966 int data_part = 0; 4967 4968 iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf); 4969 iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen); 4970 uio.uio_iov = &iov; 4971 uio.uio_resid = iov.iov_len; 4972 uio.uio_loffset = 0; 4973 uio.uio_iovcnt = 1; 4974 while (mp && uio.uio_resid) { 4975 if (mp->b_datap->db_type == M_DATA) { 4976 if ((n = MIN(uio.uio_resid, 4977 mp->b_wptr - mp->b_rptr)) != 0 && 4978 (error = uiomove((char *)mp->b_rptr, 4979 n, UIO_READ, &uio)) != 0) { 4980 freemsg(tmp_mp); 4981 return (error); 4982 } 4983 data_part = 1; 4984 } 4985 ASSERT(data_part == 0 || 4986 mp->b_datap->db_type == M_DATA); 4987 mp = mp->b_cont; 4988 } 4989 /* No data message */ 4990 if (data_part == 0) 4991 STRUCT_FSET(strpeek, databuf.len, -1); 4992 else 4993 STRUCT_FSET(strpeek, databuf.len, 4994 STRUCT_FGET(strpeek, databuf.maxlen) - 4995 uio.uio_resid); 4996 } 4997 freemsg(tmp_mp); 4998 4999 /* 5000 * It is a synch stream and user wants to get 5001 * data (maxlen > 0). 5002 * uio setup is done by the codes that process DATA 5003 * blocks above. 5004 */ 5005 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) { 5006 infod_t infod; 5007 5008 infod.d_cmd = INFOD_COPYOUT; 5009 infod.d_res = 0; 5010 infod.d_uiop = &uio; 5011 error = infonext(rdq, &infod); 5012 if (error == EINVAL || error == EBUSY) 5013 error = 0; 5014 if (error) 5015 return (error); 5016 STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek, 5017 databuf.maxlen) - uio.uio_resid); 5018 if (STRUCT_FGET(strpeek, databuf.len) == 0) { 5019 /* 5020 * No data found by the infonext(). 5021 */ 5022 STRUCT_FSET(strpeek, databuf.len, -1); 5023 } 5024 } 5025 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg, 5026 STRUCT_SIZE(strpeek), copyflag); 5027 if (error) { 5028 return (error); 5029 } 5030 /* 5031 * If there is no message retrieved, set return code to 0 5032 * otherwise, set it to 1. 5033 */ 5034 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 && 5035 STRUCT_FGET(strpeek, databuf.len) == -1) 5036 *rvalp = 0; 5037 else 5038 *rvalp = 1; 5039 return (0); 5040 } 5041 5042 case I_FDINSERT: 5043 { 5044 STRUCT_DECL(strfdinsert, strfdinsert); 5045 struct file *resftp; 5046 struct stdata *resstp; 5047 t_uscalar_t ival; 5048 ssize_t msgsize; 5049 struct strbuf mctl; 5050 5051 STRUCT_INIT(strfdinsert, flag); 5052 if (stp->sd_flag & STRHUP) 5053 return (ENXIO); 5054 /* 5055 * STRDERR, STWRERR and STPLEX tested above. 5056 */ 5057 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert), 5058 STRUCT_SIZE(strfdinsert), copyflag); 5059 if (error) 5060 return (error); 5061 5062 if (STRUCT_FGET(strfdinsert, offset) < 0 || 5063 (STRUCT_FGET(strfdinsert, offset) % 5064 sizeof (t_uscalar_t)) != 0) 5065 return (EINVAL); 5066 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) { 5067 if ((resstp = resftp->f_vnode->v_stream) == NULL) { 5068 releasef(STRUCT_FGET(strfdinsert, fildes)); 5069 return (EINVAL); 5070 } 5071 } else 5072 return (EINVAL); 5073 5074 mutex_enter(&resstp->sd_lock); 5075 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) { 5076 error = strgeterr(resstp, 5077 STRDERR|STWRERR|STRHUP|STPLEX, 0); 5078 if (error != 0) { 5079 mutex_exit(&resstp->sd_lock); 5080 releasef(STRUCT_FGET(strfdinsert, fildes)); 5081 return (error); 5082 } 5083 } 5084 mutex_exit(&resstp->sd_lock); 5085 5086 #ifdef _ILP32 5087 { 5088 queue_t *q; 5089 queue_t *mate = NULL; 5090 5091 /* get read queue of stream terminus */ 5092 claimstr(resstp->sd_wrq); 5093 for (q = resstp->sd_wrq->q_next; q->q_next != NULL; 5094 q = q->q_next) 5095 if (!STRMATED(resstp) && STREAM(q) != resstp && 5096 mate == NULL) { 5097 ASSERT(q->q_qinfo->qi_srvp); 5098 ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp); 5099 claimstr(q); 5100 mate = q; 5101 } 5102 q = _RD(q); 5103 if (mate) 5104 releasestr(mate); 5105 releasestr(resstp->sd_wrq); 5106 ival = (t_uscalar_t)q; 5107 } 5108 #else 5109 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev); 5110 #endif /* _ILP32 */ 5111 5112 if (STRUCT_FGET(strfdinsert, ctlbuf.len) < 5113 STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) { 5114 releasef(STRUCT_FGET(strfdinsert, fildes)); 5115 return (EINVAL); 5116 } 5117 5118 /* 5119 * Check for legal flag value. 5120 */ 5121 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) { 5122 releasef(STRUCT_FGET(strfdinsert, fildes)); 5123 return (EINVAL); 5124 } 5125 5126 /* get these values from those cached in the stream head */ 5127 mutex_enter(QLOCK(stp->sd_wrq)); 5128 rmin = stp->sd_qn_minpsz; 5129 rmax = stp->sd_qn_maxpsz; 5130 mutex_exit(QLOCK(stp->sd_wrq)); 5131 5132 /* 5133 * Make sure ctl and data sizes together fall within 5134 * the limits of the max and min receive packet sizes 5135 * and do not exceed system limit. A negative data 5136 * length means that no data part is to be sent. 5137 */ 5138 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 5139 if (rmax == 0) { 5140 releasef(STRUCT_FGET(strfdinsert, fildes)); 5141 return (ERANGE); 5142 } 5143 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0) 5144 msgsize = 0; 5145 if ((msgsize < rmin) || 5146 ((msgsize > rmax) && (rmax != INFPSZ)) || 5147 (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) { 5148 releasef(STRUCT_FGET(strfdinsert, fildes)); 5149 return (ERANGE); 5150 } 5151 5152 mutex_enter(&stp->sd_lock); 5153 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) && 5154 !canputnext(stp->sd_wrq)) { 5155 if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, 5156 flag, -1, &done)) != 0 || done) { 5157 mutex_exit(&stp->sd_lock); 5158 releasef(STRUCT_FGET(strfdinsert, fildes)); 5159 return (error); 5160 } 5161 if ((error = i_straccess(stp, access)) != 0) { 5162 mutex_exit(&stp->sd_lock); 5163 releasef( 5164 STRUCT_FGET(strfdinsert, fildes)); 5165 return (error); 5166 } 5167 } 5168 mutex_exit(&stp->sd_lock); 5169 5170 /* 5171 * Copy strfdinsert.ctlbuf into native form of 5172 * ctlbuf to pass down into strmakemsg(). 5173 */ 5174 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen); 5175 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len); 5176 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf); 5177 5178 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf); 5179 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len); 5180 uio.uio_iov = &iov; 5181 uio.uio_iovcnt = 1; 5182 uio.uio_loffset = 0; 5183 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE : 5184 UIO_SYSSPACE; 5185 uio.uio_fmode = 0; 5186 uio.uio_extflg = UIO_COPY_CACHED; 5187 uio.uio_resid = iov.iov_len; 5188 if ((error = strmakemsg(&mctl, 5189 &msgsize, &uio, stp, 5190 STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) { 5191 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5192 releasef(STRUCT_FGET(strfdinsert, fildes)); 5193 return (error); 5194 } 5195 5196 STRUCT_FSET(strfdinsert, databuf.len, msgsize); 5197 5198 /* 5199 * Place the possibly reencoded queue pointer 'offset' bytes 5200 * from the start of the control portion of the message. 5201 */ 5202 *((t_uscalar_t *)(mp->b_rptr + 5203 STRUCT_FGET(strfdinsert, offset))) = ival; 5204 5205 /* 5206 * Put message downstream. 5207 */ 5208 stream_willservice(stp); 5209 putnext(stp->sd_wrq, mp); 5210 stream_runservice(stp); 5211 releasef(STRUCT_FGET(strfdinsert, fildes)); 5212 return (error); 5213 } 5214 5215 case I_SENDFD: 5216 { 5217 struct file *fp; 5218 5219 if ((fp = getf((int)arg)) == NULL) 5220 return (EBADF); 5221 error = do_sendfp(stp, fp, crp); 5222 if (audit_active) { 5223 audit_fdsend((int)arg, fp, error); 5224 } 5225 releasef((int)arg); 5226 return (error); 5227 } 5228 5229 case I_RECVFD: 5230 case I_E_RECVFD: 5231 { 5232 struct k_strrecvfd *srf; 5233 int i, fd; 5234 5235 mutex_enter(&stp->sd_lock); 5236 while (!(mp = getq(rdq))) { 5237 if (stp->sd_flag & (STRHUP|STREOF)) { 5238 mutex_exit(&stp->sd_lock); 5239 return (ENXIO); 5240 } 5241 if ((error = strwaitq(stp, GETWAIT, (ssize_t)0, 5242 flag, -1, &done)) != 0 || done) { 5243 mutex_exit(&stp->sd_lock); 5244 return (error); 5245 } 5246 if ((error = i_straccess(stp, access)) != 0) { 5247 mutex_exit(&stp->sd_lock); 5248 return (error); 5249 } 5250 } 5251 if (mp->b_datap->db_type != M_PASSFP) { 5252 putback(stp, rdq, mp, mp->b_band); 5253 mutex_exit(&stp->sd_lock); 5254 return (EBADMSG); 5255 } 5256 mutex_exit(&stp->sd_lock); 5257 5258 srf = (struct k_strrecvfd *)mp->b_rptr; 5259 if ((fd = ufalloc(0)) == -1) { 5260 mutex_enter(&stp->sd_lock); 5261 putback(stp, rdq, mp, mp->b_band); 5262 mutex_exit(&stp->sd_lock); 5263 return (EMFILE); 5264 } 5265 if (cmd == I_RECVFD) { 5266 struct o_strrecvfd ostrfd; 5267 5268 /* check to see if uid/gid values are too large. */ 5269 5270 if (srf->uid > (o_uid_t)USHRT_MAX || 5271 srf->gid > (o_gid_t)USHRT_MAX) { 5272 mutex_enter(&stp->sd_lock); 5273 putback(stp, rdq, mp, mp->b_band); 5274 mutex_exit(&stp->sd_lock); 5275 setf(fd, NULL); /* release fd entry */ 5276 return (EOVERFLOW); 5277 } 5278 5279 ostrfd.fd = fd; 5280 ostrfd.uid = (o_uid_t)srf->uid; 5281 ostrfd.gid = (o_gid_t)srf->gid; 5282 5283 /* Null the filler bits */ 5284 for (i = 0; i < 8; i++) 5285 ostrfd.fill[i] = 0; 5286 5287 error = strcopyout(&ostrfd, (void *)arg, 5288 sizeof (struct o_strrecvfd), copyflag); 5289 } else { /* I_E_RECVFD */ 5290 struct strrecvfd strfd; 5291 5292 strfd.fd = fd; 5293 strfd.uid = srf->uid; 5294 strfd.gid = srf->gid; 5295 5296 /* null the filler bits */ 5297 for (i = 0; i < 8; i++) 5298 strfd.fill[i] = 0; 5299 5300 error = strcopyout(&strfd, (void *)arg, 5301 sizeof (struct strrecvfd), copyflag); 5302 } 5303 5304 if (error) { 5305 setf(fd, NULL); /* release fd entry */ 5306 mutex_enter(&stp->sd_lock); 5307 putback(stp, rdq, mp, mp->b_band); 5308 mutex_exit(&stp->sd_lock); 5309 return (error); 5310 } 5311 if (audit_active) { 5312 audit_fdrecv(fd, srf->fp); 5313 } 5314 5315 /* 5316 * Always increment f_count since the freemsg() below will 5317 * always call free_passfp() which performs a closef(). 5318 */ 5319 mutex_enter(&srf->fp->f_tlock); 5320 srf->fp->f_count++; 5321 mutex_exit(&srf->fp->f_tlock); 5322 setf(fd, srf->fp); 5323 freemsg(mp); 5324 return (0); 5325 } 5326 5327 case I_SWROPT: 5328 /* 5329 * Set/clear the write options. arg is a bit 5330 * mask with any of the following bits set... 5331 * SNDZERO - send zero length message 5332 * SNDPIPE - send sigpipe to process if 5333 * sd_werror is set and process is 5334 * doing a write or putmsg. 5335 * The new stream head write options should reflect 5336 * what is in arg. 5337 */ 5338 if (arg & ~(SNDZERO|SNDPIPE)) 5339 return (EINVAL); 5340 5341 mutex_enter(&stp->sd_lock); 5342 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO); 5343 if (arg & SNDZERO) 5344 stp->sd_wput_opt |= SW_SNDZERO; 5345 if (arg & SNDPIPE) 5346 stp->sd_wput_opt |= SW_SIGPIPE; 5347 mutex_exit(&stp->sd_lock); 5348 return (0); 5349 5350 case I_GWROPT: 5351 { 5352 int wropt = 0; 5353 5354 if (stp->sd_wput_opt & SW_SNDZERO) 5355 wropt |= SNDZERO; 5356 if (stp->sd_wput_opt & SW_SIGPIPE) 5357 wropt |= SNDPIPE; 5358 return (strcopyout(&wropt, (void *)arg, sizeof (wropt), 5359 copyflag)); 5360 } 5361 5362 case I_LIST: 5363 /* 5364 * Returns all the modules found on this stream, 5365 * upto the driver. If argument is NULL, return the 5366 * number of modules (including driver). If argument 5367 * is not NULL, copy the names into the structure 5368 * provided. 5369 */ 5370 5371 { 5372 queue_t *q; 5373 int num_modules, space_allocated; 5374 STRUCT_DECL(str_list, strlist); 5375 struct str_mlist *mlist_ptr; 5376 5377 if (arg == NULL) { /* Return number of modules plus driver */ 5378 q = stp->sd_wrq; 5379 if (stp->sd_vnode->v_type == VFIFO) { 5380 *rvalp = stp->sd_pushcnt; 5381 } else { 5382 *rvalp = stp->sd_pushcnt + 1; 5383 } 5384 } else { 5385 STRUCT_INIT(strlist, flag); 5386 5387 error = strcopyin((void *)arg, STRUCT_BUF(strlist), 5388 STRUCT_SIZE(strlist), copyflag); 5389 if (error) 5390 return (error); 5391 5392 space_allocated = STRUCT_FGET(strlist, sl_nmods); 5393 if ((space_allocated) <= 0) 5394 return (EINVAL); 5395 claimstr(stp->sd_wrq); 5396 q = stp->sd_wrq; 5397 num_modules = 0; 5398 while (_SAMESTR(q) && (space_allocated != 0)) { 5399 char *name = 5400 q->q_next->q_qinfo->qi_minfo->mi_idname; 5401 5402 mlist_ptr = STRUCT_FGETP(strlist, sl_modlist); 5403 5404 error = strcopyout(name, mlist_ptr, 5405 strlen(name) + 1, copyflag); 5406 5407 if (error) { 5408 releasestr(stp->sd_wrq); 5409 return (error); 5410 } 5411 q = q->q_next; 5412 space_allocated--; 5413 num_modules++; 5414 mlist_ptr = 5415 (struct str_mlist *)((uintptr_t)mlist_ptr + 5416 sizeof (struct str_mlist)); 5417 STRUCT_FSETP(strlist, sl_modlist, mlist_ptr); 5418 } 5419 releasestr(stp->sd_wrq); 5420 error = strcopyout(&num_modules, (void *)arg, 5421 sizeof (int), copyflag); 5422 } 5423 return (error); 5424 } 5425 5426 case I_CKBAND: 5427 { 5428 queue_t *q; 5429 qband_t *qbp; 5430 5431 if ((arg < 0) || (arg >= NBAND)) 5432 return (EINVAL); 5433 q = _RD(stp->sd_wrq); 5434 mutex_enter(QLOCK(q)); 5435 if (arg > (int)q->q_nband) { 5436 *rvalp = 0; 5437 } else { 5438 if (arg == 0) { 5439 if (q->q_first) 5440 *rvalp = 1; 5441 else 5442 *rvalp = 0; 5443 } else { 5444 qbp = q->q_bandp; 5445 while (--arg > 0) 5446 qbp = qbp->qb_next; 5447 if (qbp->qb_first) 5448 *rvalp = 1; 5449 else 5450 *rvalp = 0; 5451 } 5452 } 5453 mutex_exit(QLOCK(q)); 5454 return (0); 5455 } 5456 5457 case I_GETBAND: 5458 { 5459 int intpri; 5460 queue_t *q; 5461 5462 q = _RD(stp->sd_wrq); 5463 mutex_enter(QLOCK(q)); 5464 mp = q->q_first; 5465 if (!mp) { 5466 mutex_exit(QLOCK(q)); 5467 return (ENODATA); 5468 } 5469 intpri = (int)mp->b_band; 5470 error = strcopyout(&intpri, (void *)arg, sizeof (int), 5471 copyflag); 5472 mutex_exit(QLOCK(q)); 5473 return (error); 5474 } 5475 5476 case I_ATMARK: 5477 { 5478 queue_t *q; 5479 5480 if (arg & ~(ANYMARK|LASTMARK)) 5481 return (EINVAL); 5482 q = _RD(stp->sd_wrq); 5483 mutex_enter(&stp->sd_lock); 5484 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) { 5485 *rvalp = 1; 5486 } else { 5487 mutex_enter(QLOCK(q)); 5488 mp = q->q_first; 5489 5490 if (mp == NULL) 5491 *rvalp = 0; 5492 else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK)) 5493 *rvalp = 1; 5494 else if ((arg == LASTMARK) && (mp == stp->sd_mark)) 5495 *rvalp = 1; 5496 else 5497 *rvalp = 0; 5498 mutex_exit(QLOCK(q)); 5499 } 5500 mutex_exit(&stp->sd_lock); 5501 return (0); 5502 } 5503 5504 case I_CANPUT: 5505 { 5506 char band; 5507 5508 if ((arg < 0) || (arg >= NBAND)) 5509 return (EINVAL); 5510 band = (char)arg; 5511 *rvalp = bcanputnext(stp->sd_wrq, band); 5512 return (0); 5513 } 5514 5515 case I_SETCLTIME: 5516 { 5517 int closetime; 5518 5519 error = strcopyin((void *)arg, &closetime, sizeof (int), 5520 copyflag); 5521 if (error) 5522 return (error); 5523 if (closetime < 0) 5524 return (EINVAL); 5525 5526 stp->sd_closetime = closetime; 5527 return (0); 5528 } 5529 5530 case I_GETCLTIME: 5531 { 5532 int closetime; 5533 5534 closetime = stp->sd_closetime; 5535 return (strcopyout(&closetime, (void *)arg, sizeof (int), 5536 copyflag)); 5537 } 5538 5539 case TIOCGSID: 5540 { 5541 pid_t sid; 5542 5543 mutex_enter(&stp->sd_lock); 5544 if (stp->sd_sidp == NULL) { 5545 mutex_exit(&stp->sd_lock); 5546 return (ENOTTY); 5547 } 5548 sid = stp->sd_sidp->pid_id; 5549 mutex_exit(&stp->sd_lock); 5550 return (strcopyout(&sid, (void *)arg, sizeof (pid_t), 5551 copyflag)); 5552 } 5553 5554 case TIOCSPGRP: 5555 { 5556 pid_t pgrp; 5557 proc_t *q; 5558 pid_t sid, fg_pgid, bg_pgid; 5559 5560 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t), 5561 copyflag)) 5562 return (error); 5563 mutex_enter(&stp->sd_lock); 5564 mutex_enter(&pidlock); 5565 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) { 5566 mutex_exit(&pidlock); 5567 mutex_exit(&stp->sd_lock); 5568 return (ENOTTY); 5569 } 5570 if (pgrp == stp->sd_pgidp->pid_id) { 5571 mutex_exit(&pidlock); 5572 mutex_exit(&stp->sd_lock); 5573 return (0); 5574 } 5575 if (pgrp <= 0 || pgrp >= maxpid) { 5576 mutex_exit(&pidlock); 5577 mutex_exit(&stp->sd_lock); 5578 return (EINVAL); 5579 } 5580 if ((q = pgfind(pgrp)) == NULL || 5581 q->p_sessp != ttoproc(curthread)->p_sessp) { 5582 mutex_exit(&pidlock); 5583 mutex_exit(&stp->sd_lock); 5584 return (EPERM); 5585 } 5586 sid = stp->sd_sidp->pid_id; 5587 fg_pgid = q->p_pgrp; 5588 bg_pgid = stp->sd_pgidp->pid_id; 5589 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid); 5590 PID_RELE(stp->sd_pgidp); 5591 ctty_clear_sighuped(); 5592 stp->sd_pgidp = q->p_pgidp; 5593 PID_HOLD(stp->sd_pgidp); 5594 mutex_exit(&pidlock); 5595 mutex_exit(&stp->sd_lock); 5596 return (0); 5597 } 5598 5599 case TIOCGPGRP: 5600 { 5601 pid_t pgrp; 5602 5603 mutex_enter(&stp->sd_lock); 5604 if (stp->sd_sidp == NULL) { 5605 mutex_exit(&stp->sd_lock); 5606 return (ENOTTY); 5607 } 5608 pgrp = stp->sd_pgidp->pid_id; 5609 mutex_exit(&stp->sd_lock); 5610 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t), 5611 copyflag)); 5612 } 5613 5614 case TIOCSCTTY: 5615 { 5616 return (strctty(stp)); 5617 } 5618 5619 case TIOCNOTTY: 5620 { 5621 /* freectty() always assumes curproc. */ 5622 if (freectty(B_FALSE) != 0) 5623 return (0); 5624 return (ENOTTY); 5625 } 5626 5627 case FIONBIO: 5628 case FIOASYNC: 5629 return (0); /* handled by the upper layer */ 5630 } 5631 } 5632 5633 /* 5634 * Custom free routine used for M_PASSFP messages. 5635 */ 5636 static void 5637 free_passfp(struct k_strrecvfd *srf) 5638 { 5639 (void) closef(srf->fp); 5640 kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t)); 5641 } 5642 5643 /* ARGSUSED */ 5644 int 5645 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr) 5646 { 5647 queue_t *qp, *nextqp; 5648 struct k_strrecvfd *srf; 5649 mblk_t *mp; 5650 frtn_t *frtnp; 5651 size_t bufsize; 5652 queue_t *mate = NULL; 5653 syncq_t *sq = NULL; 5654 int retval = 0; 5655 5656 if (stp->sd_flag & STRHUP) 5657 return (ENXIO); 5658 5659 claimstr(stp->sd_wrq); 5660 5661 /* Fastpath, we have a pipe, and we are already mated, use it. */ 5662 if (STRMATED(stp)) { 5663 qp = _RD(stp->sd_mate->sd_wrq); 5664 claimstr(qp); 5665 mate = qp; 5666 } else { /* Not already mated. */ 5667 5668 /* 5669 * Walk the stream to the end of this one. 5670 * assumes that the claimstr() will prevent 5671 * plumbing between the stream head and the 5672 * driver from changing 5673 */ 5674 qp = stp->sd_wrq; 5675 5676 /* 5677 * Loop until we reach the end of this stream. 5678 * On completion, qp points to the write queue 5679 * at the end of the stream, or the read queue 5680 * at the stream head if this is a fifo. 5681 */ 5682 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp)) 5683 ; 5684 5685 /* 5686 * Just in case we get a q_next which is NULL, but 5687 * not at the end of the stream. This is actually 5688 * broken, so we set an assert to catch it in 5689 * debug, and set an error and return if not debug. 5690 */ 5691 ASSERT(qp); 5692 if (qp == NULL) { 5693 releasestr(stp->sd_wrq); 5694 return (EINVAL); 5695 } 5696 5697 /* 5698 * Enter the syncq for the driver, so (hopefully) 5699 * the queue values will not change on us. 5700 * XXXX - This will only prevent the race IFF only 5701 * the write side modifies the q_next member, and 5702 * the put procedure is protected by at least 5703 * MT_PERQ. 5704 */ 5705 if ((sq = qp->q_syncq) != NULL) 5706 entersq(sq, SQ_PUT); 5707 5708 /* Now get the q_next value from this qp. */ 5709 nextqp = qp->q_next; 5710 5711 /* 5712 * If nextqp exists and the other stream is different 5713 * from this one claim the stream, set the mate, and 5714 * get the read queue at the stream head of the other 5715 * stream. Assumes that nextqp was at least valid when 5716 * we got it. Hopefully the entersq of the driver 5717 * will prevent it from changing on us. 5718 */ 5719 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) { 5720 ASSERT(qp->q_qinfo->qi_srvp); 5721 ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp); 5722 ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp); 5723 claimstr(nextqp); 5724 5725 /* Make sure we still have a q_next */ 5726 if (nextqp != qp->q_next) { 5727 releasestr(stp->sd_wrq); 5728 releasestr(nextqp); 5729 return (EINVAL); 5730 } 5731 5732 qp = _RD(STREAM(nextqp)->sd_wrq); 5733 mate = qp; 5734 } 5735 /* If we entered the synq above, leave it. */ 5736 if (sq != NULL) 5737 leavesq(sq, SQ_PUT); 5738 } /* STRMATED(STP) */ 5739 5740 /* XXX prevents substitution of the ops vector */ 5741 if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) { 5742 retval = EINVAL; 5743 goto out; 5744 } 5745 5746 if (qp->q_flag & QFULL) { 5747 retval = EAGAIN; 5748 goto out; 5749 } 5750 5751 /* 5752 * Since M_PASSFP messages include a file descriptor, we use 5753 * esballoc() and specify a custom free routine (free_passfp()) that 5754 * will close the descriptor as part of freeing the message. For 5755 * convenience, we stash the frtn_t right after the data block. 5756 */ 5757 bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t); 5758 srf = kmem_alloc(bufsize, KM_NOSLEEP); 5759 if (srf == NULL) { 5760 retval = EAGAIN; 5761 goto out; 5762 } 5763 5764 frtnp = (frtn_t *)(srf + 1); 5765 frtnp->free_arg = (caddr_t)srf; 5766 frtnp->free_func = free_passfp; 5767 5768 mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp); 5769 if (mp == NULL) { 5770 kmem_free(srf, bufsize); 5771 retval = EAGAIN; 5772 goto out; 5773 } 5774 mp->b_wptr += sizeof (struct k_strrecvfd); 5775 mp->b_datap->db_type = M_PASSFP; 5776 5777 srf->fp = fp; 5778 srf->uid = crgetuid(curthread->t_cred); 5779 srf->gid = crgetgid(curthread->t_cred); 5780 mutex_enter(&fp->f_tlock); 5781 fp->f_count++; 5782 mutex_exit(&fp->f_tlock); 5783 5784 put(qp, mp); 5785 out: 5786 releasestr(stp->sd_wrq); 5787 if (mate) 5788 releasestr(mate); 5789 return (retval); 5790 } 5791 5792 /* 5793 * Send an ioctl message downstream and wait for acknowledgement. 5794 * flags may be set to either U_TO_K or K_TO_K and a combination 5795 * of STR_NOERROR or STR_NOSIG 5796 * STR_NOSIG: Signals are essentially ignored or held and have 5797 * no effect for the duration of the call. 5798 * STR_NOERROR: Ignores stream head read, write and hup errors. 5799 * Additionally, if an existing ioctl times out, it is assumed 5800 * lost and and this ioctl will continue as if the previous ioctl had 5801 * finished. ETIME may be returned if this ioctl times out (i.e. 5802 * ic_timout is not INFTIM). Non-stream head errors may be returned if 5803 * the ioc_error indicates that the driver/module had problems, 5804 * an EFAULT was found when accessing user data, a lack of 5805 * resources, etc. 5806 */ 5807 int 5808 strdoioctl( 5809 struct stdata *stp, 5810 struct strioctl *strioc, 5811 int fflags, /* file flags with model info */ 5812 int flag, 5813 cred_t *crp, 5814 int *rvalp) 5815 { 5816 mblk_t *bp; 5817 struct iocblk *iocbp; 5818 struct copyreq *reqp; 5819 struct copyresp *resp; 5820 int id; 5821 int transparent = 0; 5822 int error = 0; 5823 int len = 0; 5824 caddr_t taddr; 5825 int copyflag = (flag & (U_TO_K | K_TO_K)); 5826 int sigflag = (flag & STR_NOSIG); 5827 int errs; 5828 uint_t waitflags; 5829 5830 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K); 5831 ASSERT((fflags & FMODELS) != 0); 5832 5833 TRACE_2(TR_FAC_STREAMS_FR, 5834 TR_STRDOIOCTL, 5835 "strdoioctl:stp %p strioc %p", stp, strioc); 5836 if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */ 5837 transparent = 1; 5838 strioc->ic_len = sizeof (intptr_t); 5839 } 5840 5841 if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz)) 5842 return (EINVAL); 5843 5844 if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error, 5845 crp)) == NULL) 5846 return (error); 5847 5848 bzero(bp->b_wptr, sizeof (union ioctypes)); 5849 5850 iocbp = (struct iocblk *)bp->b_wptr; 5851 iocbp->ioc_count = strioc->ic_len; 5852 iocbp->ioc_cmd = strioc->ic_cmd; 5853 iocbp->ioc_flag = (fflags & FMODELS); 5854 5855 crhold(crp); 5856 iocbp->ioc_cr = crp; 5857 DB_TYPE(bp) = M_IOCTL; 5858 DB_CPID(bp) = curproc->p_pid; 5859 bp->b_wptr += sizeof (struct iocblk); 5860 5861 if (flag & STR_NOERROR) 5862 errs = STPLEX; 5863 else 5864 errs = STRHUP|STRDERR|STWRERR|STPLEX; 5865 5866 /* 5867 * If there is data to copy into ioctl block, do so. 5868 */ 5869 if (iocbp->ioc_count > 0) { 5870 if (transparent) 5871 /* 5872 * Note: STR_NOERROR does not have an effect 5873 * in putiocd() 5874 */ 5875 id = K_TO_K | sigflag; 5876 else 5877 id = flag; 5878 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) { 5879 freemsg(bp); 5880 crfree(crp); 5881 return (error); 5882 } 5883 5884 /* 5885 * We could have slept copying in user pages. 5886 * Recheck the stream head state (the other end 5887 * of a pipe could have gone away). 5888 */ 5889 if (stp->sd_flag & errs) { 5890 mutex_enter(&stp->sd_lock); 5891 error = strgeterr(stp, errs, 0); 5892 mutex_exit(&stp->sd_lock); 5893 if (error != 0) { 5894 freemsg(bp); 5895 crfree(crp); 5896 return (error); 5897 } 5898 } 5899 } 5900 if (transparent) 5901 iocbp->ioc_count = TRANSPARENT; 5902 5903 /* 5904 * Block for up to STRTIMOUT milliseconds if there is an outstanding 5905 * ioctl for this stream already running. All processes 5906 * sleeping here will be awakened as a result of an ACK 5907 * or NAK being received for the outstanding ioctl, or 5908 * as a result of the timer expiring on the outstanding 5909 * ioctl (a failure), or as a result of any waiting 5910 * process's timer expiring (also a failure). 5911 */ 5912 5913 error = 0; 5914 mutex_enter(&stp->sd_lock); 5915 while (stp->sd_flag & (IOCWAIT | IOCWAITNE)) { 5916 clock_t cv_rval; 5917 5918 TRACE_0(TR_FAC_STREAMS_FR, 5919 TR_STRDOIOCTL_WAIT, 5920 "strdoioctl sleeps - IOCWAIT"); 5921 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock, 5922 STRTIMOUT, sigflag); 5923 if (cv_rval <= 0) { 5924 if (cv_rval == 0) { 5925 error = EINTR; 5926 } else { 5927 if (flag & STR_NOERROR) { 5928 /* 5929 * Terminating current ioctl in 5930 * progress -- assume it got lost and 5931 * wake up the other thread so that the 5932 * operation completes. 5933 */ 5934 if (!(stp->sd_flag & IOCWAITNE)) { 5935 stp->sd_flag |= IOCWAITNE; 5936 cv_broadcast(&stp->sd_monitor); 5937 } 5938 /* 5939 * Otherwise, there's a running 5940 * STR_NOERROR -- we have no choice 5941 * here but to wait forever (or until 5942 * interrupted). 5943 */ 5944 } else { 5945 /* 5946 * pending ioctl has caused 5947 * us to time out 5948 */ 5949 error = ETIME; 5950 } 5951 } 5952 } else if ((stp->sd_flag & errs)) { 5953 error = strgeterr(stp, errs, 0); 5954 } 5955 if (error) { 5956 mutex_exit(&stp->sd_lock); 5957 freemsg(bp); 5958 crfree(crp); 5959 return (error); 5960 } 5961 } 5962 5963 /* 5964 * Have control of ioctl mechanism. 5965 * Send down ioctl packet and wait for response. 5966 */ 5967 if (stp->sd_iocblk != (mblk_t *)-1) { 5968 freemsg(stp->sd_iocblk); 5969 } 5970 stp->sd_iocblk = NULL; 5971 5972 /* 5973 * If this is marked with 'noerror' (internal; mostly 5974 * I_{P,}{UN,}LINK), then make sure nobody else is able to get 5975 * in here by setting IOCWAITNE. 5976 */ 5977 waitflags = IOCWAIT; 5978 if (flag & STR_NOERROR) 5979 waitflags |= IOCWAITNE; 5980 5981 stp->sd_flag |= waitflags; 5982 5983 /* 5984 * Assign sequence number. 5985 */ 5986 iocbp->ioc_id = stp->sd_iocid = getiocseqno(); 5987 5988 mutex_exit(&stp->sd_lock); 5989 5990 TRACE_1(TR_FAC_STREAMS_FR, 5991 TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp); 5992 stream_willservice(stp); 5993 putnext(stp->sd_wrq, bp); 5994 stream_runservice(stp); 5995 5996 /* 5997 * Timed wait for acknowledgment. The wait time is limited by the 5998 * timeout value, which must be a positive integer (number of 5999 * milliseconds) to wait, or 0 (use default value of STRTIMOUT 6000 * milliseconds), or -1 (wait forever). This will be awakened 6001 * either by an ACK/NAK message arriving, the timer expiring, or 6002 * the timer expiring on another ioctl waiting for control of the 6003 * mechanism. 6004 */ 6005 waitioc: 6006 mutex_enter(&stp->sd_lock); 6007 6008 6009 /* 6010 * If the reply has already arrived, don't sleep. If awakened from 6011 * the sleep, fail only if the reply has not arrived by then. 6012 * Otherwise, process the reply. 6013 */ 6014 while (!stp->sd_iocblk) { 6015 clock_t cv_rval; 6016 6017 if (stp->sd_flag & errs) { 6018 error = strgeterr(stp, errs, 0); 6019 if (error != 0) { 6020 stp->sd_flag &= ~waitflags; 6021 cv_broadcast(&stp->sd_iocmonitor); 6022 mutex_exit(&stp->sd_lock); 6023 crfree(crp); 6024 return (error); 6025 } 6026 } 6027 6028 TRACE_0(TR_FAC_STREAMS_FR, 6029 TR_STRDOIOCTL_WAIT2, 6030 "strdoioctl sleeps awaiting reply"); 6031 ASSERT(error == 0); 6032 6033 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, 6034 (strioc->ic_timout ? 6035 strioc->ic_timout * 1000 : STRTIMOUT), sigflag); 6036 6037 /* 6038 * There are four possible cases here: interrupt, timeout, 6039 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a 6040 * valid M_IOCTL reply). 6041 * 6042 * If we've been awakened by a STR_NOERROR ioctl on some other 6043 * thread, then sd_iocblk will still be NULL, and IOCWAITNE 6044 * will be set. Pretend as if we just timed out. Note that 6045 * this other thread waited at least STRTIMOUT before trying to 6046 * awaken our thread, so this is indistinguishable (even for 6047 * INFTIM) from the case where we failed with ETIME waiting on 6048 * IOCWAIT in the prior loop. 6049 */ 6050 if (cv_rval > 0 && !(flag & STR_NOERROR) && 6051 stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) { 6052 cv_rval = -1; 6053 } 6054 6055 /* 6056 * note: STR_NOERROR does not protect 6057 * us here.. use ic_timout < 0 6058 */ 6059 if (cv_rval <= 0) { 6060 if (cv_rval == 0) { 6061 error = EINTR; 6062 } else { 6063 error = ETIME; 6064 } 6065 /* 6066 * A message could have come in after we were scheduled 6067 * but before we were actually run. 6068 */ 6069 bp = stp->sd_iocblk; 6070 stp->sd_iocblk = NULL; 6071 if (bp != NULL) { 6072 if ((bp->b_datap->db_type == M_COPYIN) || 6073 (bp->b_datap->db_type == M_COPYOUT)) { 6074 mutex_exit(&stp->sd_lock); 6075 if (bp->b_cont) { 6076 freemsg(bp->b_cont); 6077 bp->b_cont = NULL; 6078 } 6079 bp->b_datap->db_type = M_IOCDATA; 6080 bp->b_wptr = bp->b_rptr + 6081 sizeof (struct copyresp); 6082 resp = (struct copyresp *)bp->b_rptr; 6083 resp->cp_rval = 6084 (caddr_t)1; /* failure */ 6085 stream_willservice(stp); 6086 putnext(stp->sd_wrq, bp); 6087 stream_runservice(stp); 6088 mutex_enter(&stp->sd_lock); 6089 } else { 6090 freemsg(bp); 6091 } 6092 } 6093 stp->sd_flag &= ~waitflags; 6094 cv_broadcast(&stp->sd_iocmonitor); 6095 mutex_exit(&stp->sd_lock); 6096 crfree(crp); 6097 return (error); 6098 } 6099 } 6100 bp = stp->sd_iocblk; 6101 /* 6102 * Note: it is strictly impossible to get here with sd_iocblk set to 6103 * -1. This is because the initial loop above doesn't allow any new 6104 * ioctls into the fray until all others have passed this point. 6105 */ 6106 ASSERT(bp != NULL && bp != (mblk_t *)-1); 6107 TRACE_1(TR_FAC_STREAMS_FR, 6108 TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp); 6109 if ((bp->b_datap->db_type == M_IOCACK) || 6110 (bp->b_datap->db_type == M_IOCNAK)) { 6111 /* for detection of duplicate ioctl replies */ 6112 stp->sd_iocblk = (mblk_t *)-1; 6113 stp->sd_flag &= ~waitflags; 6114 cv_broadcast(&stp->sd_iocmonitor); 6115 mutex_exit(&stp->sd_lock); 6116 } else { 6117 /* 6118 * flags not cleared here because we're still doing 6119 * copy in/out for ioctl. 6120 */ 6121 stp->sd_iocblk = NULL; 6122 mutex_exit(&stp->sd_lock); 6123 } 6124 6125 6126 /* 6127 * Have received acknowledgment. 6128 */ 6129 6130 switch (bp->b_datap->db_type) { 6131 case M_IOCACK: 6132 /* 6133 * Positive ack. 6134 */ 6135 iocbp = (struct iocblk *)bp->b_rptr; 6136 6137 /* 6138 * Set error if indicated. 6139 */ 6140 if (iocbp->ioc_error) { 6141 error = iocbp->ioc_error; 6142 break; 6143 } 6144 6145 /* 6146 * Set return value. 6147 */ 6148 *rvalp = iocbp->ioc_rval; 6149 6150 /* 6151 * Data may have been returned in ACK message (ioc_count > 0). 6152 * If so, copy it out to the user's buffer. 6153 */ 6154 if (iocbp->ioc_count && !transparent) { 6155 if (error = getiocd(bp, strioc->ic_dp, copyflag)) 6156 break; 6157 } 6158 if (!transparent) { 6159 if (len) /* an M_COPYOUT was used with I_STR */ 6160 strioc->ic_len = len; 6161 else 6162 strioc->ic_len = (int)iocbp->ioc_count; 6163 } 6164 break; 6165 6166 case M_IOCNAK: 6167 /* 6168 * Negative ack. 6169 * 6170 * The only thing to do is set error as specified 6171 * in neg ack packet. 6172 */ 6173 iocbp = (struct iocblk *)bp->b_rptr; 6174 6175 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL); 6176 break; 6177 6178 case M_COPYIN: 6179 /* 6180 * Driver or module has requested user ioctl data. 6181 */ 6182 reqp = (struct copyreq *)bp->b_rptr; 6183 6184 /* 6185 * M_COPYIN should *never* have a message attached, though 6186 * it's harmless if it does -- thus, panic on a DEBUG 6187 * kernel and just free it on a non-DEBUG build. 6188 */ 6189 ASSERT(bp->b_cont == NULL); 6190 if (bp->b_cont != NULL) { 6191 freemsg(bp->b_cont); 6192 bp->b_cont = NULL; 6193 } 6194 6195 error = putiocd(bp, reqp->cq_addr, flag, crp); 6196 if (error && bp->b_cont) { 6197 freemsg(bp->b_cont); 6198 bp->b_cont = NULL; 6199 } 6200 6201 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6202 bp->b_datap->db_type = M_IOCDATA; 6203 6204 mblk_setcred(bp, crp); 6205 DB_CPID(bp) = curproc->p_pid; 6206 resp = (struct copyresp *)bp->b_rptr; 6207 resp->cp_rval = (caddr_t)(uintptr_t)error; 6208 resp->cp_flag = (fflags & FMODELS); 6209 6210 stream_willservice(stp); 6211 putnext(stp->sd_wrq, bp); 6212 stream_runservice(stp); 6213 6214 if (error) { 6215 mutex_enter(&stp->sd_lock); 6216 stp->sd_flag &= ~waitflags; 6217 cv_broadcast(&stp->sd_iocmonitor); 6218 mutex_exit(&stp->sd_lock); 6219 crfree(crp); 6220 return (error); 6221 } 6222 6223 goto waitioc; 6224 6225 case M_COPYOUT: 6226 /* 6227 * Driver or module has ioctl data for a user. 6228 */ 6229 reqp = (struct copyreq *)bp->b_rptr; 6230 ASSERT(bp->b_cont != NULL); 6231 6232 /* 6233 * Always (transparent or non-transparent ) 6234 * use the address specified in the request 6235 */ 6236 taddr = reqp->cq_addr; 6237 if (!transparent) 6238 len = (int)reqp->cq_size; 6239 6240 /* copyout data to the provided address */ 6241 error = getiocd(bp, taddr, copyflag); 6242 6243 freemsg(bp->b_cont); 6244 bp->b_cont = NULL; 6245 6246 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp); 6247 bp->b_datap->db_type = M_IOCDATA; 6248 6249 mblk_setcred(bp, crp); 6250 DB_CPID(bp) = curproc->p_pid; 6251 resp = (struct copyresp *)bp->b_rptr; 6252 resp->cp_rval = (caddr_t)(uintptr_t)error; 6253 resp->cp_flag = (fflags & FMODELS); 6254 6255 stream_willservice(stp); 6256 putnext(stp->sd_wrq, bp); 6257 stream_runservice(stp); 6258 6259 if (error) { 6260 mutex_enter(&stp->sd_lock); 6261 stp->sd_flag &= ~waitflags; 6262 cv_broadcast(&stp->sd_iocmonitor); 6263 mutex_exit(&stp->sd_lock); 6264 crfree(crp); 6265 return (error); 6266 } 6267 goto waitioc; 6268 6269 default: 6270 ASSERT(0); 6271 mutex_enter(&stp->sd_lock); 6272 stp->sd_flag &= ~waitflags; 6273 cv_broadcast(&stp->sd_iocmonitor); 6274 mutex_exit(&stp->sd_lock); 6275 break; 6276 } 6277 6278 freemsg(bp); 6279 crfree(crp); 6280 return (error); 6281 } 6282 6283 /* 6284 * For the SunOS keyboard driver. 6285 * Return the next available "ioctl" sequence number. 6286 * Exported, so that streams modules can send "ioctl" messages 6287 * downstream from their open routine. 6288 */ 6289 int 6290 getiocseqno(void) 6291 { 6292 int i; 6293 6294 mutex_enter(&strresources); 6295 i = ++ioc_id; 6296 mutex_exit(&strresources); 6297 return (i); 6298 } 6299 6300 /* 6301 * Get the next message from the read queue. If the message is 6302 * priority, STRPRI will have been set by strrput(). This flag 6303 * should be reset only when the entire message at the front of the 6304 * queue as been consumed. 6305 * 6306 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 6307 */ 6308 int 6309 strgetmsg( 6310 struct vnode *vp, 6311 struct strbuf *mctl, 6312 struct strbuf *mdata, 6313 unsigned char *prip, 6314 int *flagsp, 6315 int fmode, 6316 rval_t *rvp) 6317 { 6318 struct stdata *stp; 6319 mblk_t *bp, *nbp; 6320 mblk_t *savemp = NULL; 6321 mblk_t *savemptail = NULL; 6322 uint_t old_sd_flag; 6323 int flg; 6324 int more = 0; 6325 int error = 0; 6326 char first = 1; 6327 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 6328 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 6329 unsigned char pri = 0; 6330 queue_t *q; 6331 int pr = 0; /* Partial read successful */ 6332 struct uio uios; 6333 struct uio *uiop = &uios; 6334 struct iovec iovs; 6335 unsigned char type; 6336 6337 TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER, 6338 "strgetmsg:%p", vp); 6339 6340 ASSERT(vp->v_stream); 6341 stp = vp->v_stream; 6342 rvp->r_val1 = 0; 6343 6344 mutex_enter(&stp->sd_lock); 6345 6346 if ((error = i_straccess(stp, JCREAD)) != 0) { 6347 mutex_exit(&stp->sd_lock); 6348 return (error); 6349 } 6350 6351 if (stp->sd_flag & (STRDERR|STPLEX)) { 6352 error = strgeterr(stp, STRDERR|STPLEX, 0); 6353 if (error != 0) { 6354 mutex_exit(&stp->sd_lock); 6355 return (error); 6356 } 6357 } 6358 mutex_exit(&stp->sd_lock); 6359 6360 switch (*flagsp) { 6361 case MSG_HIPRI: 6362 if (*prip != 0) 6363 return (EINVAL); 6364 break; 6365 6366 case MSG_ANY: 6367 case MSG_BAND: 6368 break; 6369 6370 default: 6371 return (EINVAL); 6372 } 6373 /* 6374 * Setup uio and iov for data part 6375 */ 6376 iovs.iov_base = mdata->buf; 6377 iovs.iov_len = mdata->maxlen; 6378 uios.uio_iov = &iovs; 6379 uios.uio_iovcnt = 1; 6380 uios.uio_loffset = 0; 6381 uios.uio_segflg = UIO_USERSPACE; 6382 uios.uio_fmode = 0; 6383 uios.uio_extflg = UIO_COPY_CACHED; 6384 uios.uio_resid = mdata->maxlen; 6385 uios.uio_offset = 0; 6386 6387 q = _RD(stp->sd_wrq); 6388 mutex_enter(&stp->sd_lock); 6389 old_sd_flag = stp->sd_flag; 6390 mark = 0; 6391 for (;;) { 6392 int done = 0; 6393 mblk_t *q_first = q->q_first; 6394 6395 /* 6396 * Get the next message of appropriate priority 6397 * from the stream head. If the caller is interested 6398 * in band or hipri messages, then they should already 6399 * be enqueued at the stream head. On the other hand 6400 * if the caller wants normal (band 0) messages, they 6401 * might be deferred in a synchronous stream and they 6402 * will need to be pulled up. 6403 * 6404 * After we have dequeued a message, we might find that 6405 * it was a deferred M_SIG that was enqueued at the 6406 * stream head. It must now be posted as part of the 6407 * read by calling strsignal_nolock(). 6408 * 6409 * Also note that strrput does not enqueue an M_PCSIG, 6410 * and there cannot be more than one hipri message, 6411 * so there was no need to have the M_PCSIG case. 6412 * 6413 * At some time it might be nice to try and wrap the 6414 * functionality of kstrgetmsg() and strgetmsg() into 6415 * a common routine so to reduce the amount of replicated 6416 * code (since they are extremely similar). 6417 */ 6418 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) { 6419 /* Asking for normal, band0 data */ 6420 bp = strget(stp, q, uiop, first, &error); 6421 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6422 if (bp != NULL) { 6423 if (bp->b_datap->db_type == M_SIG) { 6424 strsignal_nolock(stp, *bp->b_rptr, 6425 (int32_t)bp->b_band); 6426 continue; 6427 } else { 6428 break; 6429 } 6430 } 6431 if (error != 0) { 6432 goto getmout; 6433 } 6434 6435 /* 6436 * We can't depend on the value of STRPRI here because 6437 * the stream head may be in transit. Therefore, we 6438 * must look at the type of the first message to 6439 * determine if a high priority messages is waiting 6440 */ 6441 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL && 6442 q_first->b_datap->db_type >= QPCTL && 6443 (bp = getq_noenab(q)) != NULL) { 6444 /* Asked for HIPRI and got one */ 6445 ASSERT(bp->b_datap->db_type >= QPCTL); 6446 break; 6447 } else if ((*flagsp & MSG_BAND) && q_first != NULL && 6448 ((q_first->b_band >= *prip) || 6449 q_first->b_datap->db_type >= QPCTL) && 6450 (bp = getq_noenab(q)) != NULL) { 6451 /* 6452 * Asked for at least band "prip" and got either at 6453 * least that band or a hipri message. 6454 */ 6455 ASSERT(bp->b_band >= *prip || 6456 bp->b_datap->db_type >= QPCTL); 6457 if (bp->b_datap->db_type == M_SIG) { 6458 strsignal_nolock(stp, *bp->b_rptr, 6459 (int32_t)bp->b_band); 6460 continue; 6461 } else { 6462 break; 6463 } 6464 } 6465 6466 /* No data. Time to sleep? */ 6467 qbackenable(q, 0); 6468 6469 /* 6470 * If STRHUP or STREOF, return 0 length control and data. 6471 * If resid is 0, then a read(fd,buf,0) was done. Do not 6472 * sleep to satisfy this request because by default we have 6473 * zero bytes to return. 6474 */ 6475 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 && 6476 mdata->maxlen == 0)) { 6477 mctl->len = mdata->len = 0; 6478 *flagsp = 0; 6479 mutex_exit(&stp->sd_lock); 6480 return (0); 6481 } 6482 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT, 6483 "strgetmsg calls strwaitq:%p, %p", 6484 vp, uiop); 6485 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1, 6486 &done)) != 0) || done) { 6487 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE, 6488 "strgetmsg error or done:%p, %p", 6489 vp, uiop); 6490 mutex_exit(&stp->sd_lock); 6491 return (error); 6492 } 6493 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE, 6494 "strgetmsg awakes:%p, %p", vp, uiop); 6495 if ((error = i_straccess(stp, JCREAD)) != 0) { 6496 mutex_exit(&stp->sd_lock); 6497 return (error); 6498 } 6499 first = 0; 6500 } 6501 ASSERT(bp != NULL); 6502 /* 6503 * Extract any mark information. If the message is not completely 6504 * consumed this information will be put in the mblk 6505 * that is putback. 6506 * If MSGMARKNEXT is set and the message is completely consumed 6507 * the STRATMARK flag will be set below. Likewise, if 6508 * MSGNOTMARKNEXT is set and the message is 6509 * completely consumed STRNOTATMARK will be set. 6510 */ 6511 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 6512 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 6513 (MSGMARKNEXT|MSGNOTMARKNEXT)); 6514 if (mark != 0 && bp == stp->sd_mark) { 6515 mark |= _LASTMARK; 6516 stp->sd_mark = NULL; 6517 } 6518 /* 6519 * keep track of the original message type and priority 6520 */ 6521 pri = bp->b_band; 6522 type = bp->b_datap->db_type; 6523 if (type == M_PASSFP) { 6524 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 6525 stp->sd_mark = bp; 6526 bp->b_flag |= mark & ~_LASTMARK; 6527 putback(stp, q, bp, pri); 6528 qbackenable(q, pri); 6529 mutex_exit(&stp->sd_lock); 6530 return (EBADMSG); 6531 } 6532 ASSERT(type != M_SIG); 6533 6534 /* 6535 * Set this flag so strrput will not generate signals. Need to 6536 * make sure this flag is cleared before leaving this routine 6537 * else signals will stop being sent. 6538 */ 6539 stp->sd_flag |= STRGETINPROG; 6540 mutex_exit(&stp->sd_lock); 6541 6542 if (STREAM_NEEDSERVICE(stp)) 6543 stream_runservice(stp); 6544 6545 /* 6546 * Set HIPRI flag if message is priority. 6547 */ 6548 if (type >= QPCTL) 6549 flg = MSG_HIPRI; 6550 else 6551 flg = MSG_BAND; 6552 6553 /* 6554 * First process PROTO or PCPROTO blocks, if any. 6555 */ 6556 if (mctl->maxlen >= 0 && type != M_DATA) { 6557 size_t n, bcnt; 6558 char *ubuf; 6559 6560 bcnt = mctl->maxlen; 6561 ubuf = mctl->buf; 6562 while (bp != NULL && bp->b_datap->db_type != M_DATA) { 6563 if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 && 6564 copyout(bp->b_rptr, ubuf, n)) { 6565 error = EFAULT; 6566 mutex_enter(&stp->sd_lock); 6567 /* 6568 * clear stream head pri flag based on 6569 * first message type 6570 */ 6571 if (type >= QPCTL) { 6572 ASSERT(type == M_PCPROTO); 6573 stp->sd_flag &= ~STRPRI; 6574 } 6575 more = 0; 6576 freemsg(bp); 6577 goto getmout; 6578 } 6579 ubuf += n; 6580 bp->b_rptr += n; 6581 if (bp->b_rptr >= bp->b_wptr) { 6582 nbp = bp; 6583 bp = bp->b_cont; 6584 freeb(nbp); 6585 } 6586 ASSERT(n <= bcnt); 6587 bcnt -= n; 6588 if (bcnt == 0) 6589 break; 6590 } 6591 mctl->len = mctl->maxlen - bcnt; 6592 } else 6593 mctl->len = -1; 6594 6595 if (bp && bp->b_datap->db_type != M_DATA) { 6596 /* 6597 * More PROTO blocks in msg. 6598 */ 6599 more |= MORECTL; 6600 savemp = bp; 6601 while (bp && bp->b_datap->db_type != M_DATA) { 6602 savemptail = bp; 6603 bp = bp->b_cont; 6604 } 6605 savemptail->b_cont = NULL; 6606 } 6607 6608 /* 6609 * Now process DATA blocks, if any. 6610 */ 6611 if (mdata->maxlen >= 0 && bp) { 6612 /* 6613 * struiocopyout will consume a potential zero-length 6614 * M_DATA even if uio_resid is zero. 6615 */ 6616 size_t oldresid = uiop->uio_resid; 6617 6618 bp = struiocopyout(bp, uiop, &error); 6619 if (error != 0) { 6620 mutex_enter(&stp->sd_lock); 6621 /* 6622 * clear stream head hi pri flag based on 6623 * first message 6624 */ 6625 if (type >= QPCTL) { 6626 ASSERT(type == M_PCPROTO); 6627 stp->sd_flag &= ~STRPRI; 6628 } 6629 more = 0; 6630 freemsg(savemp); 6631 goto getmout; 6632 } 6633 /* 6634 * (pr == 1) indicates a partial read. 6635 */ 6636 if (oldresid > uiop->uio_resid) 6637 pr = 1; 6638 mdata->len = mdata->maxlen - uiop->uio_resid; 6639 } else 6640 mdata->len = -1; 6641 6642 if (bp) { /* more data blocks in msg */ 6643 more |= MOREDATA; 6644 if (savemp) 6645 savemptail->b_cont = bp; 6646 else 6647 savemp = bp; 6648 } 6649 6650 mutex_enter(&stp->sd_lock); 6651 if (savemp) { 6652 if (pr && (savemp->b_datap->db_type == M_DATA) && 6653 msgnodata(savemp)) { 6654 /* 6655 * Avoid queuing a zero-length tail part of 6656 * a message. pr=1 indicates that we read some of 6657 * the message. 6658 */ 6659 freemsg(savemp); 6660 more &= ~MOREDATA; 6661 /* 6662 * clear stream head hi pri flag based on 6663 * first message 6664 */ 6665 if (type >= QPCTL) { 6666 ASSERT(type == M_PCPROTO); 6667 stp->sd_flag &= ~STRPRI; 6668 } 6669 } else { 6670 savemp->b_band = pri; 6671 /* 6672 * If the first message was HIPRI and the one we're 6673 * putting back isn't, then clear STRPRI, otherwise 6674 * set STRPRI again. Note that we must set STRPRI 6675 * again since the flush logic in strrput_nondata() 6676 * may have cleared it while we had sd_lock dropped. 6677 */ 6678 if (type >= QPCTL) { 6679 ASSERT(type == M_PCPROTO); 6680 if (queclass(savemp) < QPCTL) 6681 stp->sd_flag &= ~STRPRI; 6682 else 6683 stp->sd_flag |= STRPRI; 6684 } else if (queclass(savemp) >= QPCTL) { 6685 /* 6686 * The first message was not a HIPRI message, 6687 * but the one we are about to putback is. 6688 * For simplicitly, we do not allow for HIPRI 6689 * messages to be embedded in the message 6690 * body, so just force it to same type as 6691 * first message. 6692 */ 6693 ASSERT(type == M_DATA || type == M_PROTO); 6694 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 6695 savemp->b_datap->db_type = type; 6696 } 6697 if (mark != 0) { 6698 savemp->b_flag |= mark & ~_LASTMARK; 6699 if ((mark & _LASTMARK) && 6700 (stp->sd_mark == NULL)) { 6701 /* 6702 * If another marked message arrived 6703 * while sd_lock was not held sd_mark 6704 * would be non-NULL. 6705 */ 6706 stp->sd_mark = savemp; 6707 } 6708 } 6709 putback(stp, q, savemp, pri); 6710 } 6711 } else { 6712 /* 6713 * The complete message was consumed. 6714 * 6715 * If another M_PCPROTO arrived while sd_lock was not held 6716 * it would have been discarded since STRPRI was still set. 6717 * 6718 * Move the MSG*MARKNEXT information 6719 * to the stream head just in case 6720 * the read queue becomes empty. 6721 * clear stream head hi pri flag based on 6722 * first message 6723 * 6724 * If the stream head was at the mark 6725 * (STRATMARK) before we dropped sd_lock above 6726 * and some data was consumed then we have 6727 * moved past the mark thus STRATMARK is 6728 * cleared. However, if a message arrived in 6729 * strrput during the copyout above causing 6730 * STRATMARK to be set we can not clear that 6731 * flag. 6732 */ 6733 if (type >= QPCTL) { 6734 ASSERT(type == M_PCPROTO); 6735 stp->sd_flag &= ~STRPRI; 6736 } 6737 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 6738 if (mark & MSGMARKNEXT) { 6739 stp->sd_flag &= ~STRNOTATMARK; 6740 stp->sd_flag |= STRATMARK; 6741 } else if (mark & MSGNOTMARKNEXT) { 6742 stp->sd_flag &= ~STRATMARK; 6743 stp->sd_flag |= STRNOTATMARK; 6744 } else { 6745 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 6746 } 6747 } else if (pr && (old_sd_flag & STRATMARK)) { 6748 stp->sd_flag &= ~STRATMARK; 6749 } 6750 } 6751 6752 *flagsp = flg; 6753 *prip = pri; 6754 6755 /* 6756 * Getmsg cleanup processing - if the state of the queue has changed 6757 * some signals may need to be sent and/or poll awakened. 6758 */ 6759 getmout: 6760 qbackenable(q, pri); 6761 6762 /* 6763 * We dropped the stream head lock above. Send all M_SIG messages 6764 * before processing stream head for SIGPOLL messages. 6765 */ 6766 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6767 while ((bp = q->q_first) != NULL && 6768 (bp->b_datap->db_type == M_SIG)) { 6769 /* 6770 * sd_lock is held so the content of the read queue can not 6771 * change. 6772 */ 6773 bp = getq(q); 6774 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 6775 6776 strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band); 6777 mutex_exit(&stp->sd_lock); 6778 freemsg(bp); 6779 if (STREAM_NEEDSERVICE(stp)) 6780 stream_runservice(stp); 6781 mutex_enter(&stp->sd_lock); 6782 } 6783 6784 /* 6785 * stream head cannot change while we make the determination 6786 * whether or not to send a signal. Drop the flag to allow strrput 6787 * to send firstmsgsigs again. 6788 */ 6789 stp->sd_flag &= ~STRGETINPROG; 6790 6791 /* 6792 * If the type of message at the front of the queue changed 6793 * due to the receive the appropriate signals and pollwakeup events 6794 * are generated. The type of changes are: 6795 * Processed a hipri message, q_first is not hipri. 6796 * Processed a band X message, and q_first is band Y. 6797 * The generated signals and pollwakeups are identical to what 6798 * strrput() generates should the message that is now on q_first 6799 * arrive to an empty read queue. 6800 * 6801 * Note: only strrput will send a signal for a hipri message. 6802 */ 6803 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 6804 strsigset_t signals = 0; 6805 strpollset_t pollwakeups = 0; 6806 6807 if (flg & MSG_HIPRI) { 6808 /* 6809 * Removed a hipri message. Regular data at 6810 * the front of the queue. 6811 */ 6812 if (bp->b_band == 0) { 6813 signals = S_INPUT | S_RDNORM; 6814 pollwakeups = POLLIN | POLLRDNORM; 6815 } else { 6816 signals = S_INPUT | S_RDBAND; 6817 pollwakeups = POLLIN | POLLRDBAND; 6818 } 6819 } else if (pri != bp->b_band) { 6820 /* 6821 * The band is different for the new q_first. 6822 */ 6823 if (bp->b_band == 0) { 6824 signals = S_RDNORM; 6825 pollwakeups = POLLIN | POLLRDNORM; 6826 } else { 6827 signals = S_RDBAND; 6828 pollwakeups = POLLIN | POLLRDBAND; 6829 } 6830 } 6831 6832 if (pollwakeups != 0) { 6833 if (pollwakeups == (POLLIN | POLLRDNORM)) { 6834 if (!(stp->sd_rput_opt & SR_POLLIN)) 6835 goto no_pollwake; 6836 stp->sd_rput_opt &= ~SR_POLLIN; 6837 } 6838 mutex_exit(&stp->sd_lock); 6839 pollwakeup(&stp->sd_pollist, pollwakeups); 6840 mutex_enter(&stp->sd_lock); 6841 } 6842 no_pollwake: 6843 6844 if (stp->sd_sigflags & signals) 6845 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 6846 } 6847 mutex_exit(&stp->sd_lock); 6848 6849 rvp->r_val1 = more; 6850 return (error); 6851 #undef _LASTMARK 6852 } 6853 6854 /* 6855 * Get the next message from the read queue. If the message is 6856 * priority, STRPRI will have been set by strrput(). This flag 6857 * should be reset only when the entire message at the front of the 6858 * queue as been consumed. 6859 * 6860 * If uiop is NULL all data is returned in mctlp. 6861 * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed 6862 * not enabled. 6863 * The timeout parameter is in milliseconds; -1 for infinity. 6864 * This routine handles the consolidation private flags: 6865 * MSG_IGNERROR Ignore any stream head error except STPLEX. 6866 * MSG_DELAYERROR Defer the error check until the queue is empty. 6867 * MSG_HOLDSIG Hold signals while waiting for data. 6868 * MSG_IPEEK Only peek at messages. 6869 * MSG_DISCARDTAIL Discard the tail M_DATA part of the message 6870 * that doesn't fit. 6871 * MSG_NOMARK If the message is marked leave it on the queue. 6872 * 6873 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common. 6874 */ 6875 int 6876 kstrgetmsg( 6877 struct vnode *vp, 6878 mblk_t **mctlp, 6879 struct uio *uiop, 6880 unsigned char *prip, 6881 int *flagsp, 6882 clock_t timout, 6883 rval_t *rvp) 6884 { 6885 struct stdata *stp; 6886 mblk_t *bp, *nbp; 6887 mblk_t *savemp = NULL; 6888 mblk_t *savemptail = NULL; 6889 int flags; 6890 uint_t old_sd_flag; 6891 int flg; 6892 int more = 0; 6893 int error = 0; 6894 char first = 1; 6895 uint_t mark; /* Contains MSG*MARK and _LASTMARK */ 6896 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */ 6897 unsigned char pri = 0; 6898 queue_t *q; 6899 int pr = 0; /* Partial read successful */ 6900 unsigned char type; 6901 6902 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER, 6903 "kstrgetmsg:%p", vp); 6904 6905 ASSERT(vp->v_stream); 6906 stp = vp->v_stream; 6907 rvp->r_val1 = 0; 6908 6909 mutex_enter(&stp->sd_lock); 6910 6911 if ((error = i_straccess(stp, JCREAD)) != 0) { 6912 mutex_exit(&stp->sd_lock); 6913 return (error); 6914 } 6915 6916 flags = *flagsp; 6917 if (stp->sd_flag & (STRDERR|STPLEX)) { 6918 if ((stp->sd_flag & STPLEX) || 6919 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) { 6920 error = strgeterr(stp, STRDERR|STPLEX, 6921 (flags & MSG_IPEEK)); 6922 if (error != 0) { 6923 mutex_exit(&stp->sd_lock); 6924 return (error); 6925 } 6926 } 6927 } 6928 mutex_exit(&stp->sd_lock); 6929 6930 switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) { 6931 case MSG_HIPRI: 6932 if (*prip != 0) 6933 return (EINVAL); 6934 break; 6935 6936 case MSG_ANY: 6937 case MSG_BAND: 6938 break; 6939 6940 default: 6941 return (EINVAL); 6942 } 6943 6944 retry: 6945 q = _RD(stp->sd_wrq); 6946 mutex_enter(&stp->sd_lock); 6947 old_sd_flag = stp->sd_flag; 6948 mark = 0; 6949 for (;;) { 6950 int done = 0; 6951 int waitflag; 6952 int fmode; 6953 mblk_t *q_first = q->q_first; 6954 6955 /* 6956 * This section of the code operates just like the code 6957 * in strgetmsg(). There is a comment there about what 6958 * is going on here. 6959 */ 6960 if (!(flags & (MSG_HIPRI|MSG_BAND))) { 6961 /* Asking for normal, band0 data */ 6962 bp = strget(stp, q, uiop, first, &error); 6963 ASSERT(MUTEX_HELD(&stp->sd_lock)); 6964 if (bp != NULL) { 6965 if (bp->b_datap->db_type == M_SIG) { 6966 strsignal_nolock(stp, *bp->b_rptr, 6967 (int32_t)bp->b_band); 6968 continue; 6969 } else { 6970 break; 6971 } 6972 } 6973 if (error != 0) { 6974 goto getmout; 6975 } 6976 /* 6977 * We can't depend on the value of STRPRI here because 6978 * the stream head may be in transit. Therefore, we 6979 * must look at the type of the first message to 6980 * determine if a high priority messages is waiting 6981 */ 6982 } else if ((flags & MSG_HIPRI) && q_first != NULL && 6983 q_first->b_datap->db_type >= QPCTL && 6984 (bp = getq_noenab(q)) != NULL) { 6985 ASSERT(bp->b_datap->db_type >= QPCTL); 6986 break; 6987 } else if ((flags & MSG_BAND) && q_first != NULL && 6988 ((q_first->b_band >= *prip) || 6989 q_first->b_datap->db_type >= QPCTL) && 6990 (bp = getq_noenab(q)) != NULL) { 6991 /* 6992 * Asked for at least band "prip" and got either at 6993 * least that band or a hipri message. 6994 */ 6995 ASSERT(bp->b_band >= *prip || 6996 bp->b_datap->db_type >= QPCTL); 6997 if (bp->b_datap->db_type == M_SIG) { 6998 strsignal_nolock(stp, *bp->b_rptr, 6999 (int32_t)bp->b_band); 7000 continue; 7001 } else { 7002 break; 7003 } 7004 } 7005 7006 /* No data. Time to sleep? */ 7007 qbackenable(q, 0); 7008 7009 /* 7010 * Delayed error notification? 7011 */ 7012 if ((stp->sd_flag & (STRDERR|STPLEX)) && 7013 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) { 7014 error = strgeterr(stp, STRDERR|STPLEX, 7015 (flags & MSG_IPEEK)); 7016 if (error != 0) { 7017 mutex_exit(&stp->sd_lock); 7018 return (error); 7019 } 7020 } 7021 7022 /* 7023 * If STRHUP or STREOF, return 0 length control and data. 7024 * If a read(fd,buf,0) has been done, do not sleep, just 7025 * return. 7026 * 7027 * If mctlp == NULL and uiop == NULL, then the code will 7028 * do the strwaitq. This is an understood way of saying 7029 * sleep "polling" until a message is received. 7030 */ 7031 if ((stp->sd_flag & (STRHUP|STREOF)) || 7032 (uiop != NULL && uiop->uio_resid == 0)) { 7033 if (mctlp != NULL) 7034 *mctlp = NULL; 7035 *flagsp = 0; 7036 mutex_exit(&stp->sd_lock); 7037 return (0); 7038 } 7039 7040 waitflag = GETWAIT; 7041 if (flags & 7042 (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) { 7043 if (flags & MSG_HOLDSIG) 7044 waitflag |= STR_NOSIG; 7045 if (flags & MSG_IGNERROR) 7046 waitflag |= STR_NOERROR; 7047 if (flags & MSG_IPEEK) 7048 waitflag |= STR_PEEK; 7049 if (flags & MSG_DELAYERROR) 7050 waitflag |= STR_DELAYERR; 7051 } 7052 if (uiop != NULL) 7053 fmode = uiop->uio_fmode; 7054 else 7055 fmode = 0; 7056 7057 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT, 7058 "kstrgetmsg calls strwaitq:%p, %p", 7059 vp, uiop); 7060 if (((error = strwaitq(stp, waitflag, (ssize_t)0, 7061 fmode, timout, &done)) != 0) || done) { 7062 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE, 7063 "kstrgetmsg error or done:%p, %p", 7064 vp, uiop); 7065 mutex_exit(&stp->sd_lock); 7066 return (error); 7067 } 7068 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE, 7069 "kstrgetmsg awakes:%p, %p", vp, uiop); 7070 if ((error = i_straccess(stp, JCREAD)) != 0) { 7071 mutex_exit(&stp->sd_lock); 7072 return (error); 7073 } 7074 first = 0; 7075 } 7076 ASSERT(bp != NULL); 7077 /* 7078 * Extract any mark information. If the message is not completely 7079 * consumed this information will be put in the mblk 7080 * that is putback. 7081 * If MSGMARKNEXT is set and the message is completely consumed 7082 * the STRATMARK flag will be set below. Likewise, if 7083 * MSGNOTMARKNEXT is set and the message is 7084 * completely consumed STRNOTATMARK will be set. 7085 */ 7086 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT); 7087 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 7088 (MSGMARKNEXT|MSGNOTMARKNEXT)); 7089 pri = bp->b_band; 7090 if (mark != 0) { 7091 /* 7092 * If the caller doesn't want the mark return. 7093 * Used to implement MSG_WAITALL in sockets. 7094 */ 7095 if (flags & MSG_NOMARK) { 7096 putback(stp, q, bp, pri); 7097 qbackenable(q, pri); 7098 mutex_exit(&stp->sd_lock); 7099 return (EWOULDBLOCK); 7100 } 7101 if (bp == stp->sd_mark) { 7102 mark |= _LASTMARK; 7103 stp->sd_mark = NULL; 7104 } 7105 } 7106 7107 /* 7108 * keep track of the first message type 7109 */ 7110 type = bp->b_datap->db_type; 7111 7112 if (bp->b_datap->db_type == M_PASSFP) { 7113 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7114 stp->sd_mark = bp; 7115 bp->b_flag |= mark & ~_LASTMARK; 7116 putback(stp, q, bp, pri); 7117 qbackenable(q, pri); 7118 mutex_exit(&stp->sd_lock); 7119 return (EBADMSG); 7120 } 7121 ASSERT(type != M_SIG); 7122 7123 if (flags & MSG_IPEEK) { 7124 /* 7125 * Clear any struioflag - we do the uiomove over again 7126 * when peeking since it simplifies the code. 7127 * 7128 * Dup the message and put the original back on the queue. 7129 * If dupmsg() fails, try again with copymsg() to see if 7130 * there is indeed a shortage of memory. dupmsg() may fail 7131 * if db_ref in any of the messages reaches its limit. 7132 */ 7133 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) { 7134 /* 7135 * Restore the state of the stream head since we 7136 * need to drop sd_lock (strwaitbuf is sleeping). 7137 */ 7138 size_t size = msgdsize(bp); 7139 7140 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7141 stp->sd_mark = bp; 7142 bp->b_flag |= mark & ~_LASTMARK; 7143 putback(stp, q, bp, pri); 7144 mutex_exit(&stp->sd_lock); 7145 error = strwaitbuf(size, BPRI_HI); 7146 if (error) { 7147 /* 7148 * There is no net change to the queue thus 7149 * no need to qbackenable. 7150 */ 7151 return (error); 7152 } 7153 goto retry; 7154 } 7155 7156 if ((mark & _LASTMARK) && (stp->sd_mark == NULL)) 7157 stp->sd_mark = bp; 7158 bp->b_flag |= mark & ~_LASTMARK; 7159 putback(stp, q, bp, pri); 7160 bp = nbp; 7161 } 7162 7163 /* 7164 * Set this flag so strrput will not generate signals. Need to 7165 * make sure this flag is cleared before leaving this routine 7166 * else signals will stop being sent. 7167 */ 7168 stp->sd_flag |= STRGETINPROG; 7169 mutex_exit(&stp->sd_lock); 7170 7171 if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA) && 7172 (!(DB_FLAGS(bp) & DBLK_COOKED))) { 7173 7174 bp = (stp->sd_rputdatafunc)( 7175 stp->sd_vnode, bp, NULL, 7176 NULL, NULL, NULL); 7177 7178 if (bp == NULL) 7179 goto retry; 7180 7181 DB_FLAGS(bp) |= DBLK_COOKED; 7182 } 7183 7184 if (STREAM_NEEDSERVICE(stp)) 7185 stream_runservice(stp); 7186 7187 /* 7188 * Set HIPRI flag if message is priority. 7189 */ 7190 if (type >= QPCTL) 7191 flg = MSG_HIPRI; 7192 else 7193 flg = MSG_BAND; 7194 7195 /* 7196 * First process PROTO or PCPROTO blocks, if any. 7197 */ 7198 if (mctlp != NULL && type != M_DATA) { 7199 mblk_t *nbp; 7200 7201 *mctlp = bp; 7202 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA) 7203 bp = bp->b_cont; 7204 nbp = bp->b_cont; 7205 bp->b_cont = NULL; 7206 bp = nbp; 7207 } 7208 7209 if (bp && bp->b_datap->db_type != M_DATA) { 7210 /* 7211 * More PROTO blocks in msg. Will only happen if mctlp is NULL. 7212 */ 7213 more |= MORECTL; 7214 savemp = bp; 7215 while (bp && bp->b_datap->db_type != M_DATA) { 7216 savemptail = bp; 7217 bp = bp->b_cont; 7218 } 7219 savemptail->b_cont = NULL; 7220 } 7221 7222 /* 7223 * Now process DATA blocks, if any. 7224 */ 7225 if (uiop == NULL) { 7226 /* Append data to tail of mctlp */ 7227 if (mctlp != NULL) { 7228 mblk_t **mpp = mctlp; 7229 7230 while (*mpp != NULL) 7231 mpp = &((*mpp)->b_cont); 7232 *mpp = bp; 7233 bp = NULL; 7234 } 7235 } else if (uiop->uio_resid >= 0 && bp) { 7236 size_t oldresid = uiop->uio_resid; 7237 7238 /* 7239 * If a streams message is likely to consist 7240 * of many small mblks, it is pulled up into 7241 * one continuous chunk of memory. 7242 * see longer comment at top of page 7243 * by mblk_pull_len declaration. 7244 */ 7245 7246 if (MBLKL(bp) < mblk_pull_len) { 7247 (void) pullupmsg(bp, -1); 7248 } 7249 7250 bp = struiocopyout(bp, uiop, &error); 7251 if (error != 0) { 7252 if (mctlp != NULL) { 7253 freemsg(*mctlp); 7254 *mctlp = NULL; 7255 } else 7256 freemsg(savemp); 7257 mutex_enter(&stp->sd_lock); 7258 /* 7259 * clear stream head hi pri flag based on 7260 * first message 7261 */ 7262 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7263 ASSERT(type == M_PCPROTO); 7264 stp->sd_flag &= ~STRPRI; 7265 } 7266 more = 0; 7267 goto getmout; 7268 } 7269 /* 7270 * (pr == 1) indicates a partial read. 7271 */ 7272 if (oldresid > uiop->uio_resid) 7273 pr = 1; 7274 } 7275 7276 if (bp) { /* more data blocks in msg */ 7277 more |= MOREDATA; 7278 if (savemp) 7279 savemptail->b_cont = bp; 7280 else 7281 savemp = bp; 7282 } 7283 7284 mutex_enter(&stp->sd_lock); 7285 if (savemp) { 7286 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) { 7287 /* 7288 * When MSG_DISCARDTAIL is set or 7289 * when peeking discard any tail. When peeking this 7290 * is the tail of the dup that was copied out - the 7291 * message has already been putback on the queue. 7292 * Return MOREDATA to the caller even though the data 7293 * is discarded. This is used by sockets (to 7294 * set MSG_TRUNC). 7295 */ 7296 freemsg(savemp); 7297 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) { 7298 ASSERT(type == M_PCPROTO); 7299 stp->sd_flag &= ~STRPRI; 7300 } 7301 } else if (pr && (savemp->b_datap->db_type == M_DATA) && 7302 msgnodata(savemp)) { 7303 /* 7304 * Avoid queuing a zero-length tail part of 7305 * a message. pr=1 indicates that we read some of 7306 * the message. 7307 */ 7308 freemsg(savemp); 7309 more &= ~MOREDATA; 7310 if (type >= QPCTL) { 7311 ASSERT(type == M_PCPROTO); 7312 stp->sd_flag &= ~STRPRI; 7313 } 7314 } else { 7315 savemp->b_band = pri; 7316 /* 7317 * If the first message was HIPRI and the one we're 7318 * putting back isn't, then clear STRPRI, otherwise 7319 * set STRPRI again. Note that we must set STRPRI 7320 * again since the flush logic in strrput_nondata() 7321 * may have cleared it while we had sd_lock dropped. 7322 */ 7323 if (type >= QPCTL) { 7324 ASSERT(type == M_PCPROTO); 7325 if (queclass(savemp) < QPCTL) 7326 stp->sd_flag &= ~STRPRI; 7327 else 7328 stp->sd_flag |= STRPRI; 7329 } else if (queclass(savemp) >= QPCTL) { 7330 /* 7331 * The first message was not a HIPRI message, 7332 * but the one we are about to putback is. 7333 * For simplicitly, we do not allow for HIPRI 7334 * messages to be embedded in the message 7335 * body, so just force it to same type as 7336 * first message. 7337 */ 7338 ASSERT(type == M_DATA || type == M_PROTO); 7339 ASSERT(savemp->b_datap->db_type == M_PCPROTO); 7340 savemp->b_datap->db_type = type; 7341 } 7342 if (mark != 0) { 7343 if ((mark & _LASTMARK) && 7344 (stp->sd_mark == NULL)) { 7345 /* 7346 * If another marked message arrived 7347 * while sd_lock was not held sd_mark 7348 * would be non-NULL. 7349 */ 7350 stp->sd_mark = savemp; 7351 } 7352 savemp->b_flag |= mark & ~_LASTMARK; 7353 } 7354 putback(stp, q, savemp, pri); 7355 } 7356 } else if (!(flags & MSG_IPEEK)) { 7357 /* 7358 * The complete message was consumed. 7359 * 7360 * If another M_PCPROTO arrived while sd_lock was not held 7361 * it would have been discarded since STRPRI was still set. 7362 * 7363 * Move the MSG*MARKNEXT information 7364 * to the stream head just in case 7365 * the read queue becomes empty. 7366 * clear stream head hi pri flag based on 7367 * first message 7368 * 7369 * If the stream head was at the mark 7370 * (STRATMARK) before we dropped sd_lock above 7371 * and some data was consumed then we have 7372 * moved past the mark thus STRATMARK is 7373 * cleared. However, if a message arrived in 7374 * strrput during the copyout above causing 7375 * STRATMARK to be set we can not clear that 7376 * flag. 7377 * XXX A "perimeter" would help by single-threading strrput, 7378 * strread, strgetmsg and kstrgetmsg. 7379 */ 7380 if (type >= QPCTL) { 7381 ASSERT(type == M_PCPROTO); 7382 stp->sd_flag &= ~STRPRI; 7383 } 7384 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) { 7385 if (mark & MSGMARKNEXT) { 7386 stp->sd_flag &= ~STRNOTATMARK; 7387 stp->sd_flag |= STRATMARK; 7388 } else if (mark & MSGNOTMARKNEXT) { 7389 stp->sd_flag &= ~STRATMARK; 7390 stp->sd_flag |= STRNOTATMARK; 7391 } else { 7392 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK); 7393 } 7394 } else if (pr && (old_sd_flag & STRATMARK)) { 7395 stp->sd_flag &= ~STRATMARK; 7396 } 7397 } 7398 7399 *flagsp = flg; 7400 *prip = pri; 7401 7402 /* 7403 * Getmsg cleanup processing - if the state of the queue has changed 7404 * some signals may need to be sent and/or poll awakened. 7405 */ 7406 getmout: 7407 qbackenable(q, pri); 7408 7409 /* 7410 * We dropped the stream head lock above. Send all M_SIG messages 7411 * before processing stream head for SIGPOLL messages. 7412 */ 7413 ASSERT(MUTEX_HELD(&stp->sd_lock)); 7414 while ((bp = q->q_first) != NULL && 7415 (bp->b_datap->db_type == M_SIG)) { 7416 /* 7417 * sd_lock is held so the content of the read queue can not 7418 * change. 7419 */ 7420 bp = getq(q); 7421 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG); 7422 7423 strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band); 7424 mutex_exit(&stp->sd_lock); 7425 freemsg(bp); 7426 if (STREAM_NEEDSERVICE(stp)) 7427 stream_runservice(stp); 7428 mutex_enter(&stp->sd_lock); 7429 } 7430 7431 /* 7432 * stream head cannot change while we make the determination 7433 * whether or not to send a signal. Drop the flag to allow strrput 7434 * to send firstmsgsigs again. 7435 */ 7436 stp->sd_flag &= ~STRGETINPROG; 7437 7438 /* 7439 * If the type of message at the front of the queue changed 7440 * due to the receive the appropriate signals and pollwakeup events 7441 * are generated. The type of changes are: 7442 * Processed a hipri message, q_first is not hipri. 7443 * Processed a band X message, and q_first is band Y. 7444 * The generated signals and pollwakeups are identical to what 7445 * strrput() generates should the message that is now on q_first 7446 * arrive to an empty read queue. 7447 * 7448 * Note: only strrput will send a signal for a hipri message. 7449 */ 7450 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) { 7451 strsigset_t signals = 0; 7452 strpollset_t pollwakeups = 0; 7453 7454 if (flg & MSG_HIPRI) { 7455 /* 7456 * Removed a hipri message. Regular data at 7457 * the front of the queue. 7458 */ 7459 if (bp->b_band == 0) { 7460 signals = S_INPUT | S_RDNORM; 7461 pollwakeups = POLLIN | POLLRDNORM; 7462 } else { 7463 signals = S_INPUT | S_RDBAND; 7464 pollwakeups = POLLIN | POLLRDBAND; 7465 } 7466 } else if (pri != bp->b_band) { 7467 /* 7468 * The band is different for the new q_first. 7469 */ 7470 if (bp->b_band == 0) { 7471 signals = S_RDNORM; 7472 pollwakeups = POLLIN | POLLRDNORM; 7473 } else { 7474 signals = S_RDBAND; 7475 pollwakeups = POLLIN | POLLRDBAND; 7476 } 7477 } 7478 7479 if (pollwakeups != 0) { 7480 if (pollwakeups == (POLLIN | POLLRDNORM)) { 7481 if (!(stp->sd_rput_opt & SR_POLLIN)) 7482 goto no_pollwake; 7483 stp->sd_rput_opt &= ~SR_POLLIN; 7484 } 7485 mutex_exit(&stp->sd_lock); 7486 pollwakeup(&stp->sd_pollist, pollwakeups); 7487 mutex_enter(&stp->sd_lock); 7488 } 7489 no_pollwake: 7490 7491 if (stp->sd_sigflags & signals) 7492 strsendsig(stp->sd_siglist, signals, bp->b_band, 0); 7493 } 7494 mutex_exit(&stp->sd_lock); 7495 7496 rvp->r_val1 = more; 7497 return (error); 7498 #undef _LASTMARK 7499 } 7500 7501 /* 7502 * Put a message downstream. 7503 * 7504 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7505 */ 7506 int 7507 strputmsg( 7508 struct vnode *vp, 7509 struct strbuf *mctl, 7510 struct strbuf *mdata, 7511 unsigned char pri, 7512 int flag, 7513 int fmode) 7514 { 7515 struct stdata *stp; 7516 queue_t *wqp; 7517 mblk_t *mp; 7518 ssize_t msgsize; 7519 ssize_t rmin, rmax; 7520 int error; 7521 struct uio uios; 7522 struct uio *uiop = &uios; 7523 struct iovec iovs; 7524 int xpg4 = 0; 7525 7526 ASSERT(vp->v_stream); 7527 stp = vp->v_stream; 7528 wqp = stp->sd_wrq; 7529 7530 /* 7531 * If it is an XPG4 application, we need to send 7532 * SIGPIPE below 7533 */ 7534 7535 xpg4 = (flag & MSG_XPG4) ? 1 : 0; 7536 flag &= ~MSG_XPG4; 7537 7538 if (audit_active) 7539 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode); 7540 7541 mutex_enter(&stp->sd_lock); 7542 7543 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7544 mutex_exit(&stp->sd_lock); 7545 return (error); 7546 } 7547 7548 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7549 error = strwriteable(stp, B_FALSE, xpg4); 7550 if (error != 0) { 7551 mutex_exit(&stp->sd_lock); 7552 return (error); 7553 } 7554 } 7555 7556 mutex_exit(&stp->sd_lock); 7557 7558 /* 7559 * Check for legal flag value. 7560 */ 7561 switch (flag) { 7562 case MSG_HIPRI: 7563 if ((mctl->len < 0) || (pri != 0)) 7564 return (EINVAL); 7565 break; 7566 case MSG_BAND: 7567 break; 7568 7569 default: 7570 return (EINVAL); 7571 } 7572 7573 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN, 7574 "strputmsg in:stp %p", stp); 7575 7576 /* get these values from those cached in the stream head */ 7577 rmin = stp->sd_qn_minpsz; 7578 rmax = stp->sd_qn_maxpsz; 7579 7580 /* 7581 * Make sure ctl and data sizes together fall within the 7582 * limits of the max and min receive packet sizes and do 7583 * not exceed system limit. 7584 */ 7585 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 7586 if (rmax == 0) { 7587 return (ERANGE); 7588 } 7589 /* 7590 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 7591 * Needed to prevent partial failures in the strmakedata loop. 7592 */ 7593 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 7594 rmax = stp->sd_maxblk; 7595 7596 if ((msgsize = mdata->len) < 0) { 7597 msgsize = 0; 7598 rmin = 0; /* no range check for NULL data part */ 7599 } 7600 if ((msgsize < rmin) || 7601 ((msgsize > rmax) && (rmax != INFPSZ)) || 7602 (mctl->len > strctlsz)) { 7603 return (ERANGE); 7604 } 7605 7606 /* 7607 * Setup uio and iov for data part 7608 */ 7609 iovs.iov_base = mdata->buf; 7610 iovs.iov_len = msgsize; 7611 uios.uio_iov = &iovs; 7612 uios.uio_iovcnt = 1; 7613 uios.uio_loffset = 0; 7614 uios.uio_segflg = UIO_USERSPACE; 7615 uios.uio_fmode = fmode; 7616 uios.uio_extflg = UIO_COPY_DEFAULT; 7617 uios.uio_resid = msgsize; 7618 uios.uio_offset = 0; 7619 7620 /* Ignore flow control in strput for HIPRI */ 7621 if (flag & MSG_HIPRI) 7622 flag |= MSG_IGNFLOW; 7623 7624 for (;;) { 7625 int done = 0; 7626 7627 /* 7628 * strput will always free the ctl mblk - even when strput 7629 * fails. 7630 */ 7631 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) { 7632 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7633 "strputmsg out:stp %p out %d error %d", 7634 stp, 1, error); 7635 return (error); 7636 } 7637 /* 7638 * Verify that the whole message can be transferred by 7639 * strput. 7640 */ 7641 ASSERT(stp->sd_maxblk == INFPSZ || 7642 stp->sd_maxblk >= mdata->len); 7643 7644 msgsize = mdata->len; 7645 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 7646 mdata->len = msgsize; 7647 7648 if (error == 0) 7649 break; 7650 7651 if (error != EWOULDBLOCK) 7652 goto out; 7653 7654 mutex_enter(&stp->sd_lock); 7655 /* 7656 * Check for a missed wakeup. 7657 * Needed since strput did not hold sd_lock across 7658 * the canputnext. 7659 */ 7660 if (bcanputnext(wqp, pri)) { 7661 /* Try again */ 7662 mutex_exit(&stp->sd_lock); 7663 continue; 7664 } 7665 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT, 7666 "strputmsg wait:stp %p waits pri %d", stp, pri); 7667 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1, 7668 &done)) != 0) || done) { 7669 mutex_exit(&stp->sd_lock); 7670 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7671 "strputmsg out:q %p out %d error %d", 7672 stp, 0, error); 7673 return (error); 7674 } 7675 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE, 7676 "strputmsg wake:stp %p wakes", stp); 7677 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7678 mutex_exit(&stp->sd_lock); 7679 return (error); 7680 } 7681 mutex_exit(&stp->sd_lock); 7682 } 7683 out: 7684 /* 7685 * For historic reasons, applications expect EAGAIN 7686 * when data mblk could not be allocated. so change 7687 * ENOMEM back to EAGAIN 7688 */ 7689 if (error == ENOMEM) 7690 error = EAGAIN; 7691 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT, 7692 "strputmsg out:stp %p out %d error %d", stp, 2, error); 7693 return (error); 7694 } 7695 7696 /* 7697 * Put a message downstream. 7698 * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop. 7699 * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio 7700 * and the fmode parameter. 7701 * 7702 * This routine handles the consolidation private flags: 7703 * MSG_IGNERROR Ignore any stream head error except STPLEX. 7704 * MSG_HOLDSIG Hold signals while waiting for data. 7705 * MSG_IGNFLOW Don't check streams flow control. 7706 * 7707 * NOTE: strputmsg and kstrputmsg have much of the logic in common. 7708 */ 7709 int 7710 kstrputmsg( 7711 struct vnode *vp, 7712 mblk_t *mctl, 7713 struct uio *uiop, 7714 ssize_t msgsize, 7715 unsigned char pri, 7716 int flag, 7717 int fmode) 7718 { 7719 struct stdata *stp; 7720 queue_t *wqp; 7721 ssize_t rmin, rmax; 7722 int error; 7723 7724 ASSERT(vp->v_stream); 7725 stp = vp->v_stream; 7726 wqp = stp->sd_wrq; 7727 if (audit_active) 7728 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode); 7729 if (mctl == NULL) 7730 return (EINVAL); 7731 7732 mutex_enter(&stp->sd_lock); 7733 7734 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7735 mutex_exit(&stp->sd_lock); 7736 freemsg(mctl); 7737 return (error); 7738 } 7739 7740 if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) { 7741 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 7742 error = strwriteable(stp, B_FALSE, B_TRUE); 7743 if (error != 0) { 7744 mutex_exit(&stp->sd_lock); 7745 freemsg(mctl); 7746 return (error); 7747 } 7748 } 7749 } 7750 7751 mutex_exit(&stp->sd_lock); 7752 7753 /* 7754 * Check for legal flag value. 7755 */ 7756 switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) { 7757 case MSG_HIPRI: 7758 if (pri != 0) { 7759 freemsg(mctl); 7760 return (EINVAL); 7761 } 7762 break; 7763 case MSG_BAND: 7764 break; 7765 default: 7766 freemsg(mctl); 7767 return (EINVAL); 7768 } 7769 7770 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN, 7771 "kstrputmsg in:stp %p", stp); 7772 7773 /* get these values from those cached in the stream head */ 7774 rmin = stp->sd_qn_minpsz; 7775 rmax = stp->sd_qn_maxpsz; 7776 7777 /* 7778 * Make sure ctl and data sizes together fall within the 7779 * limits of the max and min receive packet sizes and do 7780 * not exceed system limit. 7781 */ 7782 ASSERT((rmax >= 0) || (rmax == INFPSZ)); 7783 if (rmax == 0) { 7784 freemsg(mctl); 7785 return (ERANGE); 7786 } 7787 /* 7788 * Use the MAXIMUM of sd_maxblk and q_maxpsz. 7789 * Needed to prevent partial failures in the strmakedata loop. 7790 */ 7791 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk) 7792 rmax = stp->sd_maxblk; 7793 7794 if (uiop == NULL) { 7795 msgsize = -1; 7796 rmin = -1; /* no range check for NULL data part */ 7797 } else { 7798 /* Use uio flags as well as the fmode parameter flags */ 7799 fmode |= uiop->uio_fmode; 7800 7801 if ((msgsize < rmin) || 7802 ((msgsize > rmax) && (rmax != INFPSZ))) { 7803 freemsg(mctl); 7804 return (ERANGE); 7805 } 7806 } 7807 7808 /* Ignore flow control in strput for HIPRI */ 7809 if (flag & MSG_HIPRI) 7810 flag |= MSG_IGNFLOW; 7811 7812 for (;;) { 7813 int done = 0; 7814 int waitflag; 7815 mblk_t *mp; 7816 7817 /* 7818 * strput will always free the ctl mblk - even when strput 7819 * fails. If MSG_IGNFLOW is set then any error returned 7820 * will cause us to break the loop, so we don't need a copy 7821 * of the message. If MSG_IGNFLOW is not set, then we can 7822 * get hit by flow control and be forced to try again. In 7823 * this case we need to have a copy of the message. We 7824 * do this using copymsg since the message may get modified 7825 * by something below us. 7826 * 7827 * We've observed that many TPI providers do not check db_ref 7828 * on the control messages but blindly reuse them for the 7829 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more 7830 * friendly to such providers than using dupmsg. Also, note 7831 * that sockfs uses MSG_IGNFLOW for all TPI control messages. 7832 * Only data messages are subject to flow control, hence 7833 * subject to this copymsg. 7834 */ 7835 if (flag & MSG_IGNFLOW) { 7836 mp = mctl; 7837 mctl = NULL; 7838 } else { 7839 do { 7840 /* 7841 * If a message has a free pointer, the message 7842 * must be dupmsg to maintain this pointer. 7843 * Code using this facility must be sure 7844 * that modules below will not change the 7845 * contents of the dblk without checking db_ref 7846 * first. If db_ref is > 1, then the module 7847 * needs to do a copymsg first. Otherwise, 7848 * the contents of the dblk may become 7849 * inconsistent because the freesmg/freeb below 7850 * may end up calling atomic_add_32_nv. 7851 * The atomic_add_32_nv in freeb (accessing 7852 * all of db_ref, db_type, db_flags, and 7853 * db_struioflag) does not prevent other threads 7854 * from concurrently trying to modify e.g. 7855 * db_type. 7856 */ 7857 if (mctl->b_datap->db_frtnp != NULL) 7858 mp = dupmsg(mctl); 7859 else 7860 mp = copymsg(mctl); 7861 7862 if (mp != NULL) 7863 break; 7864 7865 error = strwaitbuf(msgdsize(mctl), BPRI_MED); 7866 if (error) { 7867 freemsg(mctl); 7868 return (error); 7869 } 7870 } while (mp == NULL); 7871 } 7872 /* 7873 * Verify that all of msgsize can be transferred by 7874 * strput. 7875 */ 7876 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize); 7877 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag); 7878 if (error == 0) 7879 break; 7880 7881 if (error != EWOULDBLOCK) 7882 goto out; 7883 7884 /* 7885 * IF MSG_IGNFLOW is set we should have broken out of loop 7886 * above. 7887 */ 7888 ASSERT(!(flag & MSG_IGNFLOW)); 7889 mutex_enter(&stp->sd_lock); 7890 /* 7891 * Check for a missed wakeup. 7892 * Needed since strput did not hold sd_lock across 7893 * the canputnext. 7894 */ 7895 if (bcanputnext(wqp, pri)) { 7896 /* Try again */ 7897 mutex_exit(&stp->sd_lock); 7898 continue; 7899 } 7900 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT, 7901 "kstrputmsg wait:stp %p waits pri %d", stp, pri); 7902 7903 waitflag = WRITEWAIT; 7904 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) { 7905 if (flag & MSG_HOLDSIG) 7906 waitflag |= STR_NOSIG; 7907 if (flag & MSG_IGNERROR) 7908 waitflag |= STR_NOERROR; 7909 } 7910 if (((error = strwaitq(stp, waitflag, 7911 (ssize_t)0, fmode, -1, &done)) != 0) || done) { 7912 mutex_exit(&stp->sd_lock); 7913 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 7914 "kstrputmsg out:stp %p out %d error %d", 7915 stp, 0, error); 7916 freemsg(mctl); 7917 return (error); 7918 } 7919 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE, 7920 "kstrputmsg wake:stp %p wakes", stp); 7921 if ((error = i_straccess(stp, JCWRITE)) != 0) { 7922 mutex_exit(&stp->sd_lock); 7923 freemsg(mctl); 7924 return (error); 7925 } 7926 mutex_exit(&stp->sd_lock); 7927 } 7928 out: 7929 freemsg(mctl); 7930 /* 7931 * For historic reasons, applications expect EAGAIN 7932 * when data mblk could not be allocated. so change 7933 * ENOMEM back to EAGAIN 7934 */ 7935 if (error == ENOMEM) 7936 error = EAGAIN; 7937 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT, 7938 "kstrputmsg out:stp %p out %d error %d", stp, 2, error); 7939 return (error); 7940 } 7941 7942 /* 7943 * Determines whether the necessary conditions are set on a stream 7944 * for it to be readable, writeable, or have exceptions. 7945 * 7946 * strpoll handles the consolidation private events: 7947 * POLLNOERR Do not return POLLERR even if there are stream 7948 * head errors. 7949 * Used by sockfs. 7950 * POLLRDDATA Do not return POLLIN unless at least one message on 7951 * the queue contains one or more M_DATA mblks. Thus 7952 * when this flag is set a queue with only 7953 * M_PROTO/M_PCPROTO mblks does not return POLLIN. 7954 * Used by sockfs to ignore T_EXDATA_IND messages. 7955 * 7956 * Note: POLLRDDATA assumes that synch streams only return messages with 7957 * an M_DATA attached (i.e. not messages consisting of only 7958 * an M_PROTO/M_PCPROTO part). 7959 */ 7960 int 7961 strpoll( 7962 struct stdata *stp, 7963 short events_arg, 7964 int anyyet, 7965 short *reventsp, 7966 struct pollhead **phpp) 7967 { 7968 int events = (ushort_t)events_arg; 7969 int retevents = 0; 7970 mblk_t *mp; 7971 qband_t *qbp; 7972 long sd_flags = stp->sd_flag; 7973 int headlocked = 0; 7974 7975 /* 7976 * For performance, a single 'if' tests for most possible edge 7977 * conditions in one shot 7978 */ 7979 if (sd_flags & (STPLEX | STRDERR | STWRERR)) { 7980 if (sd_flags & STPLEX) { 7981 *reventsp = POLLNVAL; 7982 return (EINVAL); 7983 } 7984 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) && 7985 (sd_flags & STRDERR)) || 7986 ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) && 7987 (sd_flags & STWRERR))) { 7988 if (!(events & POLLNOERR)) { 7989 *reventsp = POLLERR; 7990 return (0); 7991 } 7992 } 7993 } 7994 if (sd_flags & STRHUP) { 7995 retevents |= POLLHUP; 7996 } else if (events & (POLLWRNORM | POLLWRBAND)) { 7997 queue_t *tq; 7998 queue_t *qp = stp->sd_wrq; 7999 8000 claimstr(qp); 8001 /* Find next module forward that has a service procedure */ 8002 tq = qp->q_next->q_nfsrv; 8003 ASSERT(tq != NULL); 8004 8005 polllock(&stp->sd_pollist, QLOCK(tq)); 8006 if (events & POLLWRNORM) { 8007 queue_t *sqp; 8008 8009 if (tq->q_flag & QFULL) 8010 /* ensure backq svc procedure runs */ 8011 tq->q_flag |= QWANTW; 8012 else if ((sqp = stp->sd_struiowrq) != NULL) { 8013 /* Check sync stream barrier write q */ 8014 mutex_exit(QLOCK(tq)); 8015 polllock(&stp->sd_pollist, QLOCK(sqp)); 8016 if (sqp->q_flag & QFULL) 8017 /* ensure pollwakeup() is done */ 8018 sqp->q_flag |= QWANTWSYNC; 8019 else 8020 retevents |= POLLOUT; 8021 /* More write events to process ??? */ 8022 if (! (events & POLLWRBAND)) { 8023 mutex_exit(QLOCK(sqp)); 8024 releasestr(qp); 8025 goto chkrd; 8026 } 8027 mutex_exit(QLOCK(sqp)); 8028 polllock(&stp->sd_pollist, QLOCK(tq)); 8029 } else 8030 retevents |= POLLOUT; 8031 } 8032 if (events & POLLWRBAND) { 8033 qbp = tq->q_bandp; 8034 if (qbp) { 8035 while (qbp) { 8036 if (qbp->qb_flag & QB_FULL) 8037 qbp->qb_flag |= QB_WANTW; 8038 else 8039 retevents |= POLLWRBAND; 8040 qbp = qbp->qb_next; 8041 } 8042 } else { 8043 retevents |= POLLWRBAND; 8044 } 8045 } 8046 mutex_exit(QLOCK(tq)); 8047 releasestr(qp); 8048 } 8049 chkrd: 8050 if (sd_flags & STRPRI) { 8051 retevents |= (events & POLLPRI); 8052 } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) { 8053 queue_t *qp = _RD(stp->sd_wrq); 8054 int normevents = (events & (POLLIN | POLLRDNORM)); 8055 8056 /* 8057 * Note: Need to do polllock() here since ps_lock may be 8058 * held. See bug 4191544. 8059 */ 8060 polllock(&stp->sd_pollist, &stp->sd_lock); 8061 headlocked = 1; 8062 mp = qp->q_first; 8063 while (mp) { 8064 /* 8065 * For POLLRDDATA we scan b_cont and b_next until we 8066 * find an M_DATA. 8067 */ 8068 if ((events & POLLRDDATA) && 8069 mp->b_datap->db_type != M_DATA) { 8070 mblk_t *nmp = mp->b_cont; 8071 8072 while (nmp != NULL && 8073 nmp->b_datap->db_type != M_DATA) 8074 nmp = nmp->b_cont; 8075 if (nmp == NULL) { 8076 mp = mp->b_next; 8077 continue; 8078 } 8079 } 8080 if (mp->b_band == 0) 8081 retevents |= normevents; 8082 else 8083 retevents |= (events & (POLLIN | POLLRDBAND)); 8084 break; 8085 } 8086 if (! (retevents & normevents) && 8087 (stp->sd_wakeq & RSLEEP)) { 8088 /* 8089 * Sync stream barrier read queue has data. 8090 */ 8091 retevents |= normevents; 8092 } 8093 /* Treat eof as normal data */ 8094 if (sd_flags & STREOF) 8095 retevents |= normevents; 8096 } 8097 8098 *reventsp = (short)retevents; 8099 if (retevents) { 8100 if (headlocked) 8101 mutex_exit(&stp->sd_lock); 8102 return (0); 8103 } 8104 8105 /* 8106 * If poll() has not found any events yet, set up event cell 8107 * to wake up the poll if a requested event occurs on this 8108 * stream. Check for collisions with outstanding poll requests. 8109 */ 8110 if (!anyyet) { 8111 *phpp = &stp->sd_pollist; 8112 if (headlocked == 0) { 8113 polllock(&stp->sd_pollist, &stp->sd_lock); 8114 headlocked = 1; 8115 } 8116 stp->sd_rput_opt |= SR_POLLIN; 8117 } 8118 if (headlocked) 8119 mutex_exit(&stp->sd_lock); 8120 return (0); 8121 } 8122 8123 /* 8124 * The purpose of putback() is to assure sleeping polls/reads 8125 * are awakened when there are no new messages arriving at the, 8126 * stream head, and a message is placed back on the read queue. 8127 * 8128 * sd_lock must be held when messages are placed back on stream 8129 * head. (getq() holds sd_lock when it removes messages from 8130 * the queue) 8131 */ 8132 8133 static void 8134 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band) 8135 { 8136 mblk_t *qfirst = q->q_first; 8137 ASSERT(MUTEX_HELD(&stp->sd_lock)); 8138 8139 if ((stp->sd_rput_opt & SR_CONSOL_DATA) && 8140 (qfirst != NULL) && 8141 (qfirst->b_datap->db_type == M_DATA) && 8142 ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0)) { 8143 /* 8144 * We use the same logic as defined in strrput() 8145 * but in reverse as we are putting back onto the 8146 * queue and want to retain byte ordering. 8147 * Consolidate an M_DATA message onto an M_DATA, 8148 * M_PROTO, or M_PCPROTO by merging it with q_first. 8149 * The consolidation does not take place if the message 8150 * we are returning to the queue is marked with either 8151 * of the marks or the delim flag or if q_first 8152 * is marked with MSGMARK. The MSGMARK check is needed to 8153 * handle the odd semantics of MSGMARK where essentially 8154 * the whole message is to be treated as marked. 8155 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first 8156 * to the front of the b_cont chain. 8157 */ 8158 unsigned char db_type = bp->b_datap->db_type; 8159 8160 if ((db_type == M_DATA || db_type == M_PROTO || 8161 db_type == M_PCPROTO) && 8162 !(bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT))) { 8163 rmvq_noenab(q, qfirst); 8164 /* 8165 * The first message in the b_cont list 8166 * tracks MSGMARKNEXT and MSGNOTMARKNEXT. 8167 * We need to handle the case where we 8168 * are appending: 8169 * 8170 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT. 8171 * 2) a MSGMARKNEXT to a plain message. 8172 * 3) a MSGNOTMARKNEXT to a plain message 8173 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT 8174 * message. 8175 * 8176 * Thus we never append a MSGMARKNEXT or 8177 * MSGNOTMARKNEXT to a MSGMARKNEXT message. 8178 */ 8179 if (qfirst->b_flag & MSGMARKNEXT) { 8180 bp->b_flag |= MSGMARKNEXT; 8181 bp->b_flag &= ~MSGNOTMARKNEXT; 8182 qfirst->b_flag &= ~MSGMARKNEXT; 8183 } else if (qfirst->b_flag & MSGNOTMARKNEXT) { 8184 bp->b_flag |= MSGNOTMARKNEXT; 8185 qfirst->b_flag &= ~MSGNOTMARKNEXT; 8186 } 8187 8188 linkb(bp, qfirst); 8189 } 8190 } 8191 (void) putbq(q, bp); 8192 8193 /* 8194 * A message may have come in when the sd_lock was dropped in the 8195 * calling routine. If this is the case and STR*ATMARK info was 8196 * received, need to move that from the stream head to the q_last 8197 * so that SIOCATMARK can return the proper value. 8198 */ 8199 if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) { 8200 unsigned short *flagp = &q->q_last->b_flag; 8201 uint_t b_flag = (uint_t)*flagp; 8202 8203 if (stp->sd_flag & STRATMARK) { 8204 b_flag &= ~MSGNOTMARKNEXT; 8205 b_flag |= MSGMARKNEXT; 8206 stp->sd_flag &= ~STRATMARK; 8207 } else { 8208 b_flag &= ~MSGMARKNEXT; 8209 b_flag |= MSGNOTMARKNEXT; 8210 stp->sd_flag &= ~STRNOTATMARK; 8211 } 8212 *flagp = (unsigned short) b_flag; 8213 } 8214 8215 #ifdef DEBUG 8216 /* 8217 * Make sure that the flags are not messed up. 8218 */ 8219 { 8220 mblk_t *mp; 8221 mp = q->q_last; 8222 while (mp != NULL) { 8223 ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) != 8224 (MSGMARKNEXT|MSGNOTMARKNEXT)); 8225 mp = mp->b_cont; 8226 } 8227 } 8228 #endif 8229 if (q->q_first == bp) { 8230 short pollevents; 8231 8232 if (stp->sd_flag & RSLEEP) { 8233 stp->sd_flag &= ~RSLEEP; 8234 cv_broadcast(&q->q_wait); 8235 } 8236 if (stp->sd_flag & STRPRI) { 8237 pollevents = POLLPRI; 8238 } else { 8239 if (band == 0) { 8240 if (!(stp->sd_rput_opt & SR_POLLIN)) 8241 return; 8242 stp->sd_rput_opt &= ~SR_POLLIN; 8243 pollevents = POLLIN | POLLRDNORM; 8244 } else { 8245 pollevents = POLLIN | POLLRDBAND; 8246 } 8247 } 8248 mutex_exit(&stp->sd_lock); 8249 pollwakeup(&stp->sd_pollist, pollevents); 8250 mutex_enter(&stp->sd_lock); 8251 } 8252 } 8253 8254 /* 8255 * Return the held vnode attached to the stream head of a 8256 * given queue 8257 * It is the responsibility of the calling routine to ensure 8258 * that the queue does not go away (e.g. pop). 8259 */ 8260 vnode_t * 8261 strq2vp(queue_t *qp) 8262 { 8263 vnode_t *vp; 8264 vp = STREAM(qp)->sd_vnode; 8265 ASSERT(vp != NULL); 8266 VN_HOLD(vp); 8267 return (vp); 8268 } 8269 8270 /* 8271 * return the stream head write queue for the given vp 8272 * It is the responsibility of the calling routine to ensure 8273 * that the stream or vnode do not close. 8274 */ 8275 queue_t * 8276 strvp2wq(vnode_t *vp) 8277 { 8278 ASSERT(vp->v_stream != NULL); 8279 return (vp->v_stream->sd_wrq); 8280 } 8281 8282 /* 8283 * pollwakeup stream head 8284 * It is the responsibility of the calling routine to ensure 8285 * that the stream or vnode do not close. 8286 */ 8287 void 8288 strpollwakeup(vnode_t *vp, short event) 8289 { 8290 ASSERT(vp->v_stream); 8291 pollwakeup(&vp->v_stream->sd_pollist, event); 8292 } 8293 8294 /* 8295 * Mate the stream heads of two vnodes together. If the two vnodes are the 8296 * same, we just make the write-side point at the read-side -- otherwise, 8297 * we do a full mate. Only works on vnodes associated with streams that are 8298 * still being built and thus have only a stream head. 8299 */ 8300 void 8301 strmate(vnode_t *vp1, vnode_t *vp2) 8302 { 8303 queue_t *wrq1 = strvp2wq(vp1); 8304 queue_t *wrq2 = strvp2wq(vp2); 8305 8306 /* 8307 * Verify that there are no modules on the stream yet. We also 8308 * rely on the stream head always having a service procedure to 8309 * avoid tweaking q_nfsrv. 8310 */ 8311 ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL); 8312 ASSERT(wrq1->q_qinfo->qi_srvp != NULL); 8313 ASSERT(wrq2->q_qinfo->qi_srvp != NULL); 8314 8315 /* 8316 * If the queues are the same, just twist; otherwise do a full mate. 8317 */ 8318 if (wrq1 == wrq2) { 8319 wrq1->q_next = _RD(wrq1); 8320 } else { 8321 wrq1->q_next = _RD(wrq2); 8322 wrq2->q_next = _RD(wrq1); 8323 STREAM(wrq1)->sd_mate = STREAM(wrq2); 8324 STREAM(wrq1)->sd_flag |= STRMATE; 8325 STREAM(wrq2)->sd_mate = STREAM(wrq1); 8326 STREAM(wrq2)->sd_flag |= STRMATE; 8327 } 8328 } 8329 8330 /* 8331 * XXX will go away when console is correctly fixed. 8332 * Clean up the console PIDS, from previous I_SETSIG, 8333 * called only for cnopen which never calls strclean(). 8334 */ 8335 void 8336 str_cn_clean(struct vnode *vp) 8337 { 8338 strsig_t *ssp, *pssp, *tssp; 8339 struct stdata *stp; 8340 struct pid *pidp; 8341 int update = 0; 8342 8343 ASSERT(vp->v_stream); 8344 stp = vp->v_stream; 8345 pssp = NULL; 8346 mutex_enter(&stp->sd_lock); 8347 ssp = stp->sd_siglist; 8348 while (ssp) { 8349 mutex_enter(&pidlock); 8350 pidp = ssp->ss_pidp; 8351 /* 8352 * Get rid of PID if the proc is gone. 8353 */ 8354 if (pidp->pid_prinactive) { 8355 tssp = ssp->ss_next; 8356 if (pssp) 8357 pssp->ss_next = tssp; 8358 else 8359 stp->sd_siglist = tssp; 8360 ASSERT(pidp->pid_ref <= 1); 8361 PID_RELE(ssp->ss_pidp); 8362 mutex_exit(&pidlock); 8363 kmem_free(ssp, sizeof (strsig_t)); 8364 update = 1; 8365 ssp = tssp; 8366 continue; 8367 } else 8368 mutex_exit(&pidlock); 8369 pssp = ssp; 8370 ssp = ssp->ss_next; 8371 } 8372 if (update) { 8373 stp->sd_sigflags = 0; 8374 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next) 8375 stp->sd_sigflags |= ssp->ss_events; 8376 } 8377 mutex_exit(&stp->sd_lock); 8378 } 8379 8380 /* 8381 * Return B_TRUE if there is data in the message, B_FALSE otherwise. 8382 */ 8383 static boolean_t 8384 msghasdata(mblk_t *bp) 8385 { 8386 for (; bp; bp = bp->b_cont) 8387 if (bp->b_datap->db_type == M_DATA) { 8388 ASSERT(bp->b_wptr >= bp->b_rptr); 8389 if (bp->b_wptr > bp->b_rptr) 8390 return (B_TRUE); 8391 } 8392 return (B_FALSE); 8393 } 8394