1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 /* 34 * Kernel RPC filtering module 35 */ 36 37 #include <sys/param.h> 38 #include <sys/types.h> 39 #include <sys/stream.h> 40 #include <sys/stropts.h> 41 #include <sys/tihdr.h> 42 #include <sys/timod.h> 43 #include <sys/tiuser.h> 44 #include <sys/debug.h> 45 #include <sys/signal.h> 46 #include <sys/pcb.h> 47 #include <sys/user.h> 48 #include <sys/errno.h> 49 #include <sys/cred.h> 50 #include <sys/policy.h> 51 #include <sys/inline.h> 52 #include <sys/cmn_err.h> 53 #include <sys/kmem.h> 54 #include <sys/file.h> 55 #include <sys/sysmacros.h> 56 #include <sys/systm.h> 57 #include <sys/t_lock.h> 58 #include <sys/ddi.h> 59 #include <sys/vtrace.h> 60 #include <sys/callb.h> 61 62 #include <sys/strlog.h> 63 #include <rpc/rpc_com.h> 64 #include <inet/common.h> 65 #include <rpc/types.h> 66 #include <sys/time.h> 67 #include <rpc/xdr.h> 68 #include <rpc/auth.h> 69 #include <rpc/clnt.h> 70 #include <rpc/rpc_msg.h> 71 #include <rpc/clnt.h> 72 #include <rpc/svc.h> 73 #include <rpc/rpcsys.h> 74 #include <rpc/rpc_rdma.h> 75 76 /* 77 * This is the loadable module wrapper. 78 */ 79 #include <sys/conf.h> 80 #include <sys/modctl.h> 81 #include <sys/syscall.h> 82 83 extern struct streamtab rpcinfo; 84 85 static struct fmodsw fsw = { 86 "rpcmod", 87 &rpcinfo, 88 D_NEW|D_MP, 89 }; 90 91 /* 92 * Module linkage information for the kernel. 93 */ 94 95 static struct modlstrmod modlstrmod = { 96 &mod_strmodops, "rpc interface str mod", &fsw 97 }; 98 99 /* 100 * For the RPC system call. 101 */ 102 static struct sysent rpcsysent = { 103 2, 104 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, 105 rpcsys 106 }; 107 108 static struct modlsys modlsys = { 109 &mod_syscallops, 110 "RPC syscall", 111 &rpcsysent 112 }; 113 114 #ifdef _SYSCALL32_IMPL 115 static struct modlsys modlsys32 = { 116 &mod_syscallops32, 117 "32-bit RPC syscall", 118 &rpcsysent 119 }; 120 #endif /* _SYSCALL32_IMPL */ 121 122 static struct modlinkage modlinkage = { 123 MODREV_1, 124 { 125 &modlsys, 126 #ifdef _SYSCALL32_IMPL 127 &modlsys32, 128 #endif 129 &modlstrmod, 130 NULL 131 } 132 }; 133 134 int 135 _init(void) 136 { 137 int error = 0; 138 callb_id_t cid; 139 int status; 140 141 svc_init(); 142 clnt_init(); 143 cid = callb_add(connmgr_cpr_reset, 0, CB_CL_CPR_RPC, "rpc"); 144 145 if (error = mod_install(&modlinkage)) { 146 /* 147 * Could not install module, cleanup previous 148 * initialization work. 149 */ 150 clnt_fini(); 151 if (cid != NULL) 152 (void) callb_delete(cid); 153 154 return (error); 155 } 156 157 /* 158 * Load up the RDMA plugins and initialize the stats. Even if the 159 * plugins loadup fails, but rpcmod was successfully installed the 160 * counters still get initialized. 161 */ 162 rw_init(&rdma_lock, NULL, RW_DEFAULT, NULL); 163 mutex_init(&rdma_modload_lock, NULL, MUTEX_DEFAULT, NULL); 164 mt_kstat_init(); 165 166 /* 167 * Get our identification into ldi. This is used for loading 168 * other modules, e.g. rpcib. 169 */ 170 status = ldi_ident_from_mod(&modlinkage, &rpcmod_li); 171 if (status != 0) { 172 cmn_err(CE_WARN, "ldi_ident_from_mod fails with %d", status); 173 rpcmod_li = NULL; 174 } 175 176 return (error); 177 } 178 179 /* 180 * The unload entry point fails, because we advertise entry points into 181 * rpcmod from the rest of kRPC: rpcmod_release(). 182 */ 183 int 184 _fini(void) 185 { 186 return (EBUSY); 187 } 188 189 int 190 _info(struct modinfo *modinfop) 191 { 192 return (mod_info(&modlinkage, modinfop)); 193 } 194 195 extern int nulldev(); 196 197 #define RPCMOD_ID 2049 198 199 int rmm_open(), rmm_close(); 200 201 /* 202 * To save instructions, since STREAMS ignores the return value 203 * from these functions, they are defined as void here. Kind of icky, but... 204 */ 205 void rmm_rput(queue_t *, mblk_t *); 206 void rmm_wput(queue_t *, mblk_t *); 207 void rmm_rsrv(queue_t *); 208 void rmm_wsrv(queue_t *); 209 210 int rpcmodopen(), rpcmodclose(); 211 void rpcmodrput(), rpcmodwput(); 212 void rpcmodrsrv(), rpcmodwsrv(); 213 214 static void rpcmodwput_other(queue_t *, mblk_t *); 215 static int mir_close(queue_t *q); 216 static int mir_open(queue_t *q, dev_t *devp, int flag, int sflag, 217 cred_t *credp); 218 static void mir_rput(queue_t *q, mblk_t *mp); 219 static void mir_rsrv(queue_t *q); 220 static void mir_wput(queue_t *q, mblk_t *mp); 221 static void mir_wsrv(queue_t *q); 222 223 static struct module_info rpcmod_info = 224 {RPCMOD_ID, "rpcmod", 0, INFPSZ, 256*1024, 1024}; 225 226 /* 227 * Read side has no service procedure. 228 */ 229 static struct qinit rpcmodrinit = { 230 (int (*)())rmm_rput, 231 (int (*)())rmm_rsrv, 232 rmm_open, 233 rmm_close, 234 nulldev, 235 &rpcmod_info, 236 NULL 237 }; 238 239 /* 240 * The write put procedure is simply putnext to conserve stack space. 241 * The write service procedure is not used to queue data, but instead to 242 * synchronize with flow control. 243 */ 244 static struct qinit rpcmodwinit = { 245 (int (*)())rmm_wput, 246 (int (*)())rmm_wsrv, 247 rmm_open, 248 rmm_close, 249 nulldev, 250 &rpcmod_info, 251 NULL 252 }; 253 struct streamtab rpcinfo = { &rpcmodrinit, &rpcmodwinit, NULL, NULL }; 254 255 struct xprt_style_ops { 256 int (*xo_open)(); 257 int (*xo_close)(); 258 void (*xo_wput)(); 259 void (*xo_wsrv)(); 260 void (*xo_rput)(); 261 void (*xo_rsrv)(); 262 }; 263 264 static struct xprt_style_ops xprt_clts_ops = { 265 rpcmodopen, 266 rpcmodclose, 267 rpcmodwput, 268 rpcmodwsrv, 269 rpcmodrput, 270 NULL 271 }; 272 273 static struct xprt_style_ops xprt_cots_ops = { 274 mir_open, 275 mir_close, 276 mir_wput, 277 mir_wsrv, 278 mir_rput, 279 mir_rsrv 280 }; 281 282 /* 283 * Per rpcmod "slot" data structure. q->q_ptr points to one of these. 284 */ 285 struct rpcm { 286 void *rm_krpc_cell; /* Reserved for use by KRPC */ 287 struct xprt_style_ops *rm_ops; 288 int rm_type; /* Client or server side stream */ 289 #define RM_CLOSING 0x1 /* somebody is trying to close slot */ 290 uint_t rm_state; /* state of the slot. see above */ 291 uint_t rm_ref; /* cnt of external references to slot */ 292 kmutex_t rm_lock; /* mutex protecting above fields */ 293 kcondvar_t rm_cwait; /* condition for closing */ 294 zoneid_t rm_zoneid; /* zone which pushed rpcmod */ 295 }; 296 297 struct temp_slot { 298 void *cell; 299 struct xprt_style_ops *ops; 300 int type; 301 mblk_t *info_ack; 302 kmutex_t lock; 303 kcondvar_t wait; 304 }; 305 306 void tmp_rput(queue_t *q, mblk_t *mp); 307 308 struct xprt_style_ops tmpops = { 309 NULL, 310 NULL, 311 putnext, 312 NULL, 313 tmp_rput, 314 NULL 315 }; 316 317 void 318 tmp_rput(queue_t *q, mblk_t *mp) 319 { 320 struct temp_slot *t = (struct temp_slot *)(q->q_ptr); 321 struct T_info_ack *pptr; 322 323 switch (mp->b_datap->db_type) { 324 case M_PCPROTO: 325 pptr = (struct T_info_ack *)mp->b_rptr; 326 switch (pptr->PRIM_type) { 327 case T_INFO_ACK: 328 mutex_enter(&t->lock); 329 t->info_ack = mp; 330 cv_signal(&t->wait); 331 mutex_exit(&t->lock); 332 return; 333 default: 334 break; 335 } 336 default: 337 break; 338 } 339 340 /* 341 * Not an info-ack, so free it. This is ok because we should 342 * not be receiving data until the open finishes: rpcmod 343 * is pushed well before the end-point is bound to an address. 344 */ 345 freemsg(mp); 346 } 347 348 int 349 rmm_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp) 350 { 351 mblk_t *bp; 352 struct temp_slot ts, *t; 353 struct T_info_ack *pptr; 354 int error = 0; 355 int procson = 0; 356 357 ASSERT(q != NULL); 358 /* 359 * Check for re-opens. 360 */ 361 if (q->q_ptr) { 362 TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, 363 "rpcmodopen_end:(%s)", "q->qptr"); 364 return (0); 365 } 366 367 t = &ts; 368 bzero(t, sizeof (*t)); 369 q->q_ptr = (void *)t; 370 /* WR(q)->q_ptr = (void *)t; */ 371 372 /* 373 * Allocate the required messages upfront. 374 */ 375 if ((bp = allocb(sizeof (struct T_info_req) + 376 sizeof (struct T_info_ack), BPRI_LO)) == (mblk_t *)NULL) { 377 return (ENOBUFS); 378 } 379 380 mutex_init(&t->lock, NULL, MUTEX_DEFAULT, NULL); 381 cv_init(&t->wait, NULL, CV_DEFAULT, NULL); 382 383 t->ops = &tmpops; 384 385 qprocson(q); 386 procson = 1; 387 bp->b_datap->db_type = M_PCPROTO; 388 *(int32_t *)bp->b_wptr = (int32_t)T_INFO_REQ; 389 bp->b_wptr += sizeof (struct T_info_req); 390 putnext(WR(q), bp); 391 392 mutex_enter(&t->lock); 393 while ((bp = t->info_ack) == NULL) { 394 if (cv_wait_sig(&t->wait, &t->lock) == 0) { 395 error = EINTR; 396 break; 397 } 398 } 399 mutex_exit(&t->lock); 400 mutex_destroy(&t->lock); 401 cv_destroy(&t->wait); 402 if (error) 403 goto out; 404 405 pptr = (struct T_info_ack *)t->info_ack->b_rptr; 406 407 if (pptr->SERV_type == T_CLTS) { 408 error = rpcmodopen(q, devp, flag, sflag, crp); 409 if (error == 0) { 410 t = (struct temp_slot *)q->q_ptr; 411 t->ops = &xprt_clts_ops; 412 } 413 } else { 414 error = mir_open(q, devp, flag, sflag, crp); 415 if (error == 0) { 416 t = (struct temp_slot *)q->q_ptr; 417 t->ops = &xprt_cots_ops; 418 } 419 } 420 421 out: 422 freemsg(bp); 423 424 if (error && procson) 425 qprocsoff(q); 426 427 return (error); 428 } 429 430 void 431 rmm_rput(queue_t *q, mblk_t *mp) 432 { 433 (*((struct temp_slot *)q->q_ptr)->ops->xo_rput)(q, mp); 434 } 435 436 void 437 rmm_rsrv(queue_t *q) 438 { 439 (*((struct temp_slot *)q->q_ptr)->ops->xo_rsrv)(q); 440 } 441 442 void 443 rmm_wput(queue_t *q, mblk_t *mp) 444 { 445 (*((struct temp_slot *)q->q_ptr)->ops->xo_wput)(q, mp); 446 } 447 448 void 449 rmm_wsrv(queue_t *q) 450 { 451 (*((struct temp_slot *)q->q_ptr)->ops->xo_wsrv)(q); 452 } 453 454 int 455 rmm_close(queue_t *q, int flag, cred_t *crp) 456 { 457 return ((*((struct temp_slot *)q->q_ptr)->ops->xo_close)(q, flag, crp)); 458 } 459 460 /* 461 * rpcmodopen - open routine gets called when the module gets pushed 462 * onto the stream. 463 */ 464 /*ARGSUSED*/ 465 int 466 rpcmodopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp) 467 { 468 struct rpcm *rmp; 469 470 extern void (*rpc_rele)(queue_t *, mblk_t *); 471 static void rpcmod_release(queue_t *, mblk_t *); 472 473 TRACE_0(TR_FAC_KRPC, TR_RPCMODOPEN_START, "rpcmodopen_start:"); 474 475 /* 476 * Initialize entry points to release a rpcmod slot (and an input 477 * message if supplied) and to send an output message to the module 478 * below rpcmod. 479 */ 480 if (rpc_rele == NULL) 481 rpc_rele = rpcmod_release; 482 483 /* 484 * Only sufficiently privileged users can use this module, and it 485 * is assumed that they will use this module properly, and NOT send 486 * bulk data from downstream. 487 */ 488 if (secpolicy_rpcmod_open(crp) != 0) 489 return (EPERM); 490 491 /* 492 * Allocate slot data structure. 493 */ 494 rmp = kmem_zalloc(sizeof (*rmp), KM_SLEEP); 495 496 mutex_init(&rmp->rm_lock, NULL, MUTEX_DEFAULT, NULL); 497 cv_init(&rmp->rm_cwait, NULL, CV_DEFAULT, NULL); 498 rmp->rm_zoneid = rpc_zoneid(); 499 /* 500 * slot type will be set by kRPC client and server ioctl's 501 */ 502 rmp->rm_type = 0; 503 504 q->q_ptr = (void *)rmp; 505 WR(q)->q_ptr = (void *)rmp; 506 507 TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, "rpcmodopen_end:(%s)", "end"); 508 return (0); 509 } 510 511 /* 512 * rpcmodclose - This routine gets called when the module gets popped 513 * off of the stream. 514 */ 515 /*ARGSUSED*/ 516 int 517 rpcmodclose(queue_t *q, int flag, cred_t *crp) 518 { 519 struct rpcm *rmp; 520 521 ASSERT(q != NULL); 522 rmp = (struct rpcm *)q->q_ptr; 523 524 /* 525 * Mark our state as closing. 526 */ 527 mutex_enter(&rmp->rm_lock); 528 rmp->rm_state |= RM_CLOSING; 529 530 /* 531 * Check and see if there are any messages on the queue. If so, send 532 * the messages, regardless whether the downstream module is ready to 533 * accept data. 534 */ 535 if (rmp->rm_type == RPC_SERVER) { 536 flushq(q, FLUSHDATA); 537 538 qenable(WR(q)); 539 540 if (rmp->rm_ref) { 541 mutex_exit(&rmp->rm_lock); 542 /* 543 * call into SVC to clean the queue 544 */ 545 svc_queueclean(q); 546 mutex_enter(&rmp->rm_lock); 547 548 /* 549 * Block while there are kRPC threads with a reference 550 * to this message. 551 */ 552 while (rmp->rm_ref) 553 cv_wait(&rmp->rm_cwait, &rmp->rm_lock); 554 } 555 556 mutex_exit(&rmp->rm_lock); 557 558 /* 559 * It is now safe to remove this queue from the stream. No kRPC 560 * threads have a reference to the stream, and none ever will, 561 * because RM_CLOSING is set. 562 */ 563 qprocsoff(q); 564 565 /* Notify kRPC that this stream is going away. */ 566 svc_queueclose(q); 567 } else { 568 mutex_exit(&rmp->rm_lock); 569 qprocsoff(q); 570 } 571 572 q->q_ptr = NULL; 573 WR(q)->q_ptr = NULL; 574 mutex_destroy(&rmp->rm_lock); 575 cv_destroy(&rmp->rm_cwait); 576 kmem_free(rmp, sizeof (*rmp)); 577 return (0); 578 } 579 580 #ifdef DEBUG 581 int rpcmod_send_msg_up = 0; 582 int rpcmod_send_uderr = 0; 583 int rpcmod_send_dup = 0; 584 int rpcmod_send_dup_cnt = 0; 585 #endif 586 587 /* 588 * rpcmodrput - Module read put procedure. This is called from 589 * the module, driver, or stream head downstream. 590 */ 591 void 592 rpcmodrput(queue_t *q, mblk_t *mp) 593 { 594 struct rpcm *rmp; 595 union T_primitives *pptr; 596 int hdrsz; 597 598 TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_START, "rpcmodrput_start:"); 599 600 ASSERT(q != NULL); 601 rmp = (struct rpcm *)q->q_ptr; 602 603 if (rmp->rm_type == 0) { 604 freemsg(mp); 605 return; 606 } 607 608 #ifdef DEBUG 609 if (rpcmod_send_msg_up > 0) { 610 mblk_t *nmp = copymsg(mp); 611 if (nmp) { 612 putnext(q, nmp); 613 rpcmod_send_msg_up--; 614 } 615 } 616 if ((rpcmod_send_uderr > 0) && mp->b_datap->db_type == M_PROTO) { 617 mblk_t *nmp; 618 struct T_unitdata_ind *data; 619 struct T_uderror_ind *ud; 620 int d; 621 data = (struct T_unitdata_ind *)mp->b_rptr; 622 if (data->PRIM_type == T_UNITDATA_IND) { 623 d = sizeof (*ud) - sizeof (*data); 624 nmp = allocb(mp->b_wptr - mp->b_rptr + d, BPRI_HI); 625 if (nmp) { 626 ud = (struct T_uderror_ind *)nmp->b_rptr; 627 ud->PRIM_type = T_UDERROR_IND; 628 ud->DEST_length = data->SRC_length; 629 ud->DEST_offset = data->SRC_offset + d; 630 ud->OPT_length = data->OPT_length; 631 ud->OPT_offset = data->OPT_offset + d; 632 ud->ERROR_type = ENETDOWN; 633 if (data->SRC_length) { 634 bcopy(mp->b_rptr + 635 data->SRC_offset, 636 nmp->b_rptr + 637 ud->DEST_offset, 638 data->SRC_length); 639 } 640 if (data->OPT_length) { 641 bcopy(mp->b_rptr + 642 data->OPT_offset, 643 nmp->b_rptr + 644 ud->OPT_offset, 645 data->OPT_length); 646 } 647 nmp->b_wptr += d; 648 nmp->b_wptr += (mp->b_wptr - mp->b_rptr); 649 nmp->b_datap->db_type = M_PROTO; 650 putnext(q, nmp); 651 rpcmod_send_uderr--; 652 } 653 } 654 } 655 #endif 656 switch (mp->b_datap->db_type) { 657 default: 658 putnext(q, mp); 659 break; 660 661 case M_PROTO: 662 case M_PCPROTO: 663 ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (int32_t)); 664 pptr = (union T_primitives *)mp->b_rptr; 665 666 /* 667 * Forward this message to krpc if it is data. 668 */ 669 if (pptr->type == T_UNITDATA_IND) { 670 mblk_t *nmp; 671 672 /* 673 * Check if the module is being popped. 674 */ 675 mutex_enter(&rmp->rm_lock); 676 if (rmp->rm_state & RM_CLOSING) { 677 mutex_exit(&rmp->rm_lock); 678 putnext(q, mp); 679 break; 680 } 681 682 switch (rmp->rm_type) { 683 case RPC_CLIENT: 684 mutex_exit(&rmp->rm_lock); 685 hdrsz = mp->b_wptr - mp->b_rptr; 686 687 /* 688 * Make sure the header is sane. 689 */ 690 if (hdrsz < TUNITDATAINDSZ || 691 hdrsz < (pptr->unitdata_ind.OPT_length + 692 pptr->unitdata_ind.OPT_offset) || 693 hdrsz < (pptr->unitdata_ind.SRC_length + 694 pptr->unitdata_ind.SRC_offset)) { 695 freemsg(mp); 696 return; 697 } 698 699 /* 700 * Call clnt_clts_dispatch_notify, so that it can 701 * pass the message to the proper caller. Don't 702 * discard the header just yet since the client may 703 * need the sender's address. 704 */ 705 clnt_clts_dispatch_notify(mp, hdrsz, rmp->rm_zoneid); 706 return; 707 case RPC_SERVER: 708 /* 709 * rm_krpc_cell is exclusively used by the kRPC 710 * CLTS server 711 */ 712 if (rmp->rm_krpc_cell) { 713 #ifdef DEBUG 714 /* 715 * Test duplicate request cache and 716 * rm_ref count handling by sending a 717 * duplicate every so often, if 718 * desired. 719 */ 720 if (rpcmod_send_dup && 721 rpcmod_send_dup_cnt++ % 722 rpcmod_send_dup) 723 nmp = copymsg(mp); 724 else 725 nmp = NULL; 726 #endif 727 /* 728 * Raise the reference count on this 729 * module to prevent it from being 730 * popped before krpc generates the 731 * reply. 732 */ 733 rmp->rm_ref++; 734 mutex_exit(&rmp->rm_lock); 735 736 /* 737 * Submit the message to krpc. 738 */ 739 svc_queuereq(q, mp); 740 #ifdef DEBUG 741 /* 742 * Send duplicate if we created one. 743 */ 744 if (nmp) { 745 mutex_enter(&rmp->rm_lock); 746 rmp->rm_ref++; 747 mutex_exit(&rmp->rm_lock); 748 svc_queuereq(q, nmp); 749 } 750 #endif 751 } else { 752 mutex_exit(&rmp->rm_lock); 753 freemsg(mp); 754 } 755 return; 756 default: 757 mutex_exit(&rmp->rm_lock); 758 freemsg(mp); 759 return; 760 } /* end switch(rmp->rm_type) */ 761 } else if (pptr->type == T_UDERROR_IND) { 762 mutex_enter(&rmp->rm_lock); 763 hdrsz = mp->b_wptr - mp->b_rptr; 764 765 /* 766 * Make sure the header is sane 767 */ 768 if (hdrsz < TUDERRORINDSZ || 769 hdrsz < (pptr->uderror_ind.OPT_length + 770 pptr->uderror_ind.OPT_offset) || 771 hdrsz < (pptr->uderror_ind.DEST_length + 772 pptr->uderror_ind.DEST_offset)) { 773 mutex_exit(&rmp->rm_lock); 774 freemsg(mp); 775 return; 776 } 777 778 /* 779 * In the case where a unit data error has been 780 * received, all we need to do is clear the message from 781 * the queue. 782 */ 783 mutex_exit(&rmp->rm_lock); 784 freemsg(mp); 785 RPCLOG(32, "rpcmodrput: unitdata error received at " 786 "%ld\n", gethrestime_sec()); 787 return; 788 } /* end else if (pptr->type == T_UDERROR_IND) */ 789 790 putnext(q, mp); 791 break; 792 } /* end switch (mp->b_datap->db_type) */ 793 794 TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_END, 795 "rpcmodrput_end:"); 796 /* 797 * Return codes are not looked at by the STREAMS framework. 798 */ 799 } 800 801 /* 802 * write put procedure 803 */ 804 void 805 rpcmodwput(queue_t *q, mblk_t *mp) 806 { 807 struct rpcm *rmp; 808 809 ASSERT(q != NULL); 810 811 switch (mp->b_datap->db_type) { 812 case M_PROTO: 813 case M_PCPROTO: 814 break; 815 default: 816 rpcmodwput_other(q, mp); 817 return; 818 } 819 820 /* 821 * Check to see if we can send the message downstream. 822 */ 823 if (canputnext(q)) { 824 putnext(q, mp); 825 return; 826 } 827 828 rmp = (struct rpcm *)q->q_ptr; 829 ASSERT(rmp != NULL); 830 831 /* 832 * The first canputnext failed. Try again except this time with the 833 * lock held, so that we can check the state of the stream to see if 834 * it is closing. If either of these conditions evaluate to true 835 * then send the meesage. 836 */ 837 mutex_enter(&rmp->rm_lock); 838 if (canputnext(q) || (rmp->rm_state & RM_CLOSING)) { 839 mutex_exit(&rmp->rm_lock); 840 putnext(q, mp); 841 } else { 842 /* 843 * canputnext failed again and the stream is not closing. 844 * Place the message on the queue and let the service 845 * procedure handle the message. 846 */ 847 mutex_exit(&rmp->rm_lock); 848 (void) putq(q, mp); 849 } 850 } 851 852 static void 853 rpcmodwput_other(queue_t *q, mblk_t *mp) 854 { 855 struct rpcm *rmp; 856 struct iocblk *iocp; 857 858 rmp = (struct rpcm *)q->q_ptr; 859 ASSERT(rmp != NULL); 860 861 switch (mp->b_datap->db_type) { 862 case M_IOCTL: 863 iocp = (struct iocblk *)mp->b_rptr; 864 ASSERT(iocp != NULL); 865 switch (iocp->ioc_cmd) { 866 case RPC_CLIENT: 867 case RPC_SERVER: 868 mutex_enter(&rmp->rm_lock); 869 rmp->rm_type = iocp->ioc_cmd; 870 mutex_exit(&rmp->rm_lock); 871 mp->b_datap->db_type = M_IOCACK; 872 qreply(q, mp); 873 return; 874 default: 875 /* 876 * pass the ioctl downstream and hope someone 877 * down there knows how to handle it. 878 */ 879 putnext(q, mp); 880 return; 881 } 882 default: 883 break; 884 } 885 /* 886 * This is something we definitely do not know how to handle, just 887 * pass the message downstream 888 */ 889 putnext(q, mp); 890 } 891 892 /* 893 * Module write service procedure. This is called by downstream modules 894 * for back enabling during flow control. 895 */ 896 void 897 rpcmodwsrv(queue_t *q) 898 { 899 struct rpcm *rmp; 900 mblk_t *mp = NULL; 901 902 rmp = (struct rpcm *)q->q_ptr; 903 ASSERT(rmp != NULL); 904 905 /* 906 * Get messages that may be queued and send them down stream 907 */ 908 while ((mp = getq(q)) != NULL) { 909 /* 910 * Optimize the service procedure for the server-side, by 911 * avoiding a call to canputnext(). 912 */ 913 if (rmp->rm_type == RPC_SERVER || canputnext(q)) { 914 putnext(q, mp); 915 continue; 916 } 917 (void) putbq(q, mp); 918 return; 919 } 920 } 921 922 static void 923 rpcmod_release(queue_t *q, mblk_t *bp) 924 { 925 struct rpcm *rmp; 926 927 /* 928 * For now, just free the message. 929 */ 930 if (bp) 931 freemsg(bp); 932 rmp = (struct rpcm *)q->q_ptr; 933 934 mutex_enter(&rmp->rm_lock); 935 rmp->rm_ref--; 936 937 if (rmp->rm_ref == 0 && (rmp->rm_state & RM_CLOSING)) { 938 cv_broadcast(&rmp->rm_cwait); 939 } 940 941 mutex_exit(&rmp->rm_lock); 942 } 943 944 /* 945 * This part of rpcmod is pushed on a connection-oriented transport for use 946 * by RPC. It serves to bypass the Stream head, implements 947 * the record marking protocol, and dispatches incoming RPC messages. 948 */ 949 950 /* Default idle timer values */ 951 #define MIR_CLNT_IDLE_TIMEOUT (5 * (60 * 1000L)) /* 5 minutes */ 952 #define MIR_SVC_IDLE_TIMEOUT (6 * (60 * 1000L)) /* 6 minutes */ 953 #define MIR_SVC_ORDREL_TIMEOUT (10 * (60 * 1000L)) /* 10 minutes */ 954 #define MIR_LASTFRAG 0x80000000 /* Record marker */ 955 956 #define DLEN(mp) (mp->b_cont ? msgdsize(mp) : (mp->b_wptr - mp->b_rptr)) 957 958 typedef struct mir_s { 959 void *mir_krpc_cell; /* Reserved for KRPC use. This field */ 960 /* must be first in the structure. */ 961 struct xprt_style_ops *rm_ops; 962 int mir_type; /* Client or server side stream */ 963 964 mblk_t *mir_head_mp; /* RPC msg in progress */ 965 /* 966 * mir_head_mp points the first mblk being collected in 967 * the current RPC message. Record headers are removed 968 * before data is linked into mir_head_mp. 969 */ 970 mblk_t *mir_tail_mp; /* Last mblk in mir_head_mp */ 971 /* 972 * mir_tail_mp points to the last mblk in the message 973 * chain starting at mir_head_mp. It is only valid 974 * if mir_head_mp is non-NULL and is used to add new 975 * data blocks to the end of chain quickly. 976 */ 977 978 int32_t mir_frag_len; /* Bytes seen in the current frag */ 979 /* 980 * mir_frag_len starts at -4 for beginning of each fragment. 981 * When this length is negative, it indicates the number of 982 * bytes that rpcmod needs to complete the record marker 983 * header. When it is positive or zero, it holds the number 984 * of bytes that have arrived for the current fragment and 985 * are held in mir_header_mp. 986 */ 987 988 int32_t mir_frag_header; 989 /* 990 * Fragment header as collected for the current fragment. 991 * It holds the last-fragment indicator and the number 992 * of bytes in the fragment. 993 */ 994 995 unsigned int 996 mir_ordrel_pending : 1, /* Sent T_ORDREL_REQ */ 997 mir_hold_inbound : 1, /* Hold inbound messages on server */ 998 /* side until outbound flow control */ 999 /* is relieved. */ 1000 mir_closing : 1, /* The stream is being closed */ 1001 mir_inrservice : 1, /* data queued or rd srv proc running */ 1002 mir_inwservice : 1, /* data queued or wr srv proc running */ 1003 mir_inwflushdata : 1, /* flush M_DATAs when srv runs */ 1004 /* 1005 * On client streams, mir_clntreq is 0 or 1; it is set 1006 * to 1 whenever a new request is sent out (mir_wput) 1007 * and cleared when the timer fires (mir_timer). If 1008 * the timer fires with this value equal to 0, then the 1009 * stream is considered idle and KRPC is notified. 1010 */ 1011 mir_clntreq : 1, 1012 /* 1013 * On server streams, stop accepting messages 1014 */ 1015 mir_svc_no_more_msgs : 1, 1016 mir_listen_stream : 1, /* listen end point */ 1017 mir_unused : 1, /* no longer used */ 1018 mir_timer_call : 1, 1019 mir_junk_fill_thru_bit_31 : 21; 1020 1021 int mir_setup_complete; /* server has initialized everything */ 1022 timeout_id_t mir_timer_id; /* Timer for idle checks */ 1023 clock_t mir_idle_timeout; /* Allowed idle time before shutdown */ 1024 /* 1025 * This value is copied from clnt_idle_timeout or 1026 * svc_idle_timeout during the appropriate ioctl. 1027 * Kept in milliseconds 1028 */ 1029 clock_t mir_use_timestamp; /* updated on client with each use */ 1030 /* 1031 * This value is set to lbolt 1032 * every time a client stream sends or receives data. 1033 * Even if the timer message arrives, we don't shutdown 1034 * client unless: 1035 * lbolt >= MSEC_TO_TICK(mir_idle_timeout)+mir_use_timestamp. 1036 * This value is kept in HZ. 1037 */ 1038 1039 uint_t *mir_max_msg_sizep; /* Reference to sanity check size */ 1040 /* 1041 * This pointer is set to &clnt_max_msg_size or 1042 * &svc_max_msg_size during the appropriate ioctl. 1043 */ 1044 zoneid_t mir_zoneid; /* zone which pushed rpcmod */ 1045 /* Server-side fields. */ 1046 int mir_ref_cnt; /* Reference count: server side only */ 1047 /* counts the number of references */ 1048 /* that a kernel RPC server thread */ 1049 /* (see svc_run()) has on this rpcmod */ 1050 /* slot. Effectively, it is the */ 1051 /* number * of unprocessed messages */ 1052 /* that have been passed up to the */ 1053 /* KRPC layer */ 1054 1055 mblk_t *mir_svc_pend_mp; /* Pending T_ORDREL_IND or */ 1056 /* T_DISCON_IND */ 1057 1058 /* 1059 * these fields are for both client and server, but for debugging, 1060 * it is easier to have these last in the structure. 1061 */ 1062 kmutex_t mir_mutex; /* Mutex and condvar for close */ 1063 kcondvar_t mir_condvar; /* synchronization. */ 1064 kcondvar_t mir_timer_cv; /* Timer routine sync. */ 1065 } mir_t; 1066 1067 #define MIR_SVC_QUIESCED(mir) \ 1068 (mir->mir_ref_cnt == 0 && mir->mir_inrservice == 0) 1069 1070 #define MIR_CLEAR_INRSRV(mir_ptr) { \ 1071 (mir_ptr)->mir_inrservice = 0; \ 1072 if ((mir_ptr)->mir_type == RPC_SERVER && \ 1073 (mir_ptr)->mir_closing) \ 1074 cv_signal(&(mir_ptr)->mir_condvar); \ 1075 } 1076 1077 /* 1078 * Don't block service procedure (and mir_close) if 1079 * we are in the process of closing. 1080 */ 1081 #define MIR_WCANPUTNEXT(mir_ptr, write_q) \ 1082 (canputnext(write_q) || ((mir_ptr)->mir_svc_no_more_msgs == 1)) 1083 1084 static int mir_clnt_dup_request(queue_t *q, mblk_t *mp); 1085 static void mir_rput_proto(queue_t *q, mblk_t *mp); 1086 static int mir_svc_policy_notify(queue_t *q, int event); 1087 static void mir_svc_release(queue_t *wq, mblk_t *mp); 1088 static void mir_svc_start(queue_t *wq); 1089 static void mir_svc_idle_start(queue_t *, mir_t *); 1090 static void mir_svc_idle_stop(queue_t *, mir_t *); 1091 static void mir_svc_start_close(queue_t *, mir_t *); 1092 static void mir_clnt_idle_do_stop(queue_t *); 1093 static void mir_clnt_idle_stop(queue_t *, mir_t *); 1094 static void mir_clnt_idle_start(queue_t *, mir_t *); 1095 static void mir_wput(queue_t *q, mblk_t *mp); 1096 static void mir_wput_other(queue_t *q, mblk_t *mp); 1097 static void mir_wsrv(queue_t *q); 1098 static void mir_disconnect(queue_t *, mir_t *ir); 1099 static int mir_check_len(queue_t *, int32_t, mblk_t *); 1100 static void mir_timer(void *); 1101 1102 extern void (*mir_rele)(queue_t *, mblk_t *); 1103 extern void (*mir_start)(queue_t *); 1104 extern void (*clnt_stop_idle)(queue_t *); 1105 1106 clock_t clnt_idle_timeout = MIR_CLNT_IDLE_TIMEOUT; 1107 clock_t svc_idle_timeout = MIR_SVC_IDLE_TIMEOUT; 1108 1109 /* 1110 * Timeout for subsequent notifications of idle connection. This is 1111 * typically used to clean up after a wedged orderly release. 1112 */ 1113 clock_t svc_ordrel_timeout = MIR_SVC_ORDREL_TIMEOUT; /* milliseconds */ 1114 1115 extern uint_t *clnt_max_msg_sizep; 1116 extern uint_t *svc_max_msg_sizep; 1117 uint_t clnt_max_msg_size = RPC_MAXDATASIZE; 1118 uint_t svc_max_msg_size = RPC_MAXDATASIZE; 1119 uint_t mir_krpc_cell_null; 1120 1121 static void 1122 mir_timer_stop(mir_t *mir) 1123 { 1124 timeout_id_t tid; 1125 1126 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1127 1128 /* 1129 * Since the mir_mutex lock needs to be released to call 1130 * untimeout(), we need to make sure that no other thread 1131 * can start/stop the timer (changing mir_timer_id) during 1132 * that time. The mir_timer_call bit and the mir_timer_cv 1133 * condition variable are used to synchronize this. Setting 1134 * mir_timer_call also tells mir_timer() (refer to the comments 1135 * in mir_timer()) that it does not need to do anything. 1136 */ 1137 while (mir->mir_timer_call) 1138 cv_wait(&mir->mir_timer_cv, &mir->mir_mutex); 1139 mir->mir_timer_call = B_TRUE; 1140 1141 if ((tid = mir->mir_timer_id) != 0) { 1142 mir->mir_timer_id = 0; 1143 mutex_exit(&mir->mir_mutex); 1144 (void) untimeout(tid); 1145 mutex_enter(&mir->mir_mutex); 1146 } 1147 mir->mir_timer_call = B_FALSE; 1148 cv_broadcast(&mir->mir_timer_cv); 1149 } 1150 1151 static void 1152 mir_timer_start(queue_t *q, mir_t *mir, clock_t intrvl) 1153 { 1154 timeout_id_t tid; 1155 1156 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1157 1158 while (mir->mir_timer_call) 1159 cv_wait(&mir->mir_timer_cv, &mir->mir_mutex); 1160 mir->mir_timer_call = B_TRUE; 1161 1162 if ((tid = mir->mir_timer_id) != 0) { 1163 mutex_exit(&mir->mir_mutex); 1164 (void) untimeout(tid); 1165 mutex_enter(&mir->mir_mutex); 1166 } 1167 /* Only start the timer when it is not closing. */ 1168 if (!mir->mir_closing) { 1169 mir->mir_timer_id = timeout(mir_timer, q, 1170 MSEC_TO_TICK(intrvl)); 1171 } 1172 mir->mir_timer_call = B_FALSE; 1173 cv_broadcast(&mir->mir_timer_cv); 1174 } 1175 1176 static int 1177 mir_clnt_dup_request(queue_t *q, mblk_t *mp) 1178 { 1179 mblk_t *mp1; 1180 uint32_t new_xid; 1181 uint32_t old_xid; 1182 1183 ASSERT(MUTEX_HELD(&((mir_t *)q->q_ptr)->mir_mutex)); 1184 new_xid = BE32_TO_U32(&mp->b_rptr[4]); 1185 /* 1186 * This loop is a bit tacky -- it walks the STREAMS list of 1187 * flow-controlled messages. 1188 */ 1189 if ((mp1 = q->q_first) != NULL) { 1190 do { 1191 old_xid = BE32_TO_U32(&mp1->b_rptr[4]); 1192 if (new_xid == old_xid) 1193 return (1); 1194 } while ((mp1 = mp1->b_next) != NULL); 1195 } 1196 return (0); 1197 } 1198 1199 static int 1200 mir_close(queue_t *q) 1201 { 1202 mir_t *mir; 1203 mblk_t *mp; 1204 bool_t queue_cleaned = FALSE; 1205 1206 RPCLOG(32, "rpcmod: mir_close of q 0x%p\n", (void *)q); 1207 mir = (mir_t *)q->q_ptr; 1208 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1209 mutex_enter(&mir->mir_mutex); 1210 if ((mp = mir->mir_head_mp) != NULL) { 1211 mir->mir_head_mp = (mblk_t *)0; 1212 freemsg(mp); 1213 } 1214 /* 1215 * Set mir_closing so we get notified when MIR_SVC_QUIESCED() 1216 * is TRUE. And mir_timer_start() won't start the timer again. 1217 */ 1218 mir->mir_closing = B_TRUE; 1219 mir_timer_stop(mir); 1220 1221 if (mir->mir_type == RPC_SERVER) { 1222 flushq(q, FLUSHDATA); /* Ditch anything waiting on read q */ 1223 1224 /* 1225 * This will prevent more requests from arriving and 1226 * will force rpcmod to ignore flow control. 1227 */ 1228 mir_svc_start_close(WR(q), mir); 1229 1230 while ((!MIR_SVC_QUIESCED(mir)) || mir->mir_inwservice == 1) { 1231 1232 if (mir->mir_ref_cnt && !mir->mir_inrservice && 1233 (queue_cleaned == FALSE)) { 1234 /* 1235 * call into SVC to clean the queue 1236 */ 1237 mutex_exit(&mir->mir_mutex); 1238 svc_queueclean(q); 1239 queue_cleaned = TRUE; 1240 mutex_enter(&mir->mir_mutex); 1241 continue; 1242 } 1243 1244 /* 1245 * Bugid 1253810 - Force the write service 1246 * procedure to send its messages, regardless 1247 * whether the downstream module is ready 1248 * to accept data. 1249 */ 1250 if (mir->mir_inwservice == 1) 1251 qenable(WR(q)); 1252 1253 cv_wait(&mir->mir_condvar, &mir->mir_mutex); 1254 } 1255 1256 mutex_exit(&mir->mir_mutex); 1257 qprocsoff(q); 1258 1259 /* Notify KRPC that this stream is going away. */ 1260 svc_queueclose(q); 1261 } else { 1262 mutex_exit(&mir->mir_mutex); 1263 qprocsoff(q); 1264 } 1265 1266 mutex_destroy(&mir->mir_mutex); 1267 cv_destroy(&mir->mir_condvar); 1268 cv_destroy(&mir->mir_timer_cv); 1269 kmem_free(mir, sizeof (mir_t)); 1270 return (0); 1271 } 1272 1273 /* 1274 * This is server side only (RPC_SERVER). 1275 * 1276 * Exit idle mode. 1277 */ 1278 static void 1279 mir_svc_idle_stop(queue_t *q, mir_t *mir) 1280 { 1281 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1282 ASSERT((q->q_flag & QREADR) == 0); 1283 ASSERT(mir->mir_type == RPC_SERVER); 1284 RPCLOG(16, "rpcmod: mir_svc_idle_stop of q 0x%p\n", (void *)q); 1285 1286 mir_timer_stop(mir); 1287 } 1288 1289 /* 1290 * This is server side only (RPC_SERVER). 1291 * 1292 * Start idle processing, which will include setting idle timer if the 1293 * stream is not being closed. 1294 */ 1295 static void 1296 mir_svc_idle_start(queue_t *q, mir_t *mir) 1297 { 1298 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1299 ASSERT((q->q_flag & QREADR) == 0); 1300 ASSERT(mir->mir_type == RPC_SERVER); 1301 RPCLOG(16, "rpcmod: mir_svc_idle_start q 0x%p\n", (void *)q); 1302 1303 /* 1304 * Don't re-start idle timer if we are closing queues. 1305 */ 1306 if (mir->mir_closing) { 1307 RPCLOG(16, "mir_svc_idle_start - closing: 0x%p\n", 1308 (void *)q); 1309 1310 /* 1311 * We will call mir_svc_idle_start() whenever MIR_SVC_QUIESCED() 1312 * is true. When it is true, and we are in the process of 1313 * closing the stream, signal any thread waiting in 1314 * mir_close(). 1315 */ 1316 if (mir->mir_inwservice == 0) 1317 cv_signal(&mir->mir_condvar); 1318 1319 } else { 1320 RPCLOG(16, "mir_svc_idle_start - reset %s timer\n", 1321 mir->mir_ordrel_pending ? "ordrel" : "normal"); 1322 /* 1323 * Normal condition, start the idle timer. If an orderly 1324 * release has been sent, set the timeout to wait for the 1325 * client to close its side of the connection. Otherwise, 1326 * use the normal idle timeout. 1327 */ 1328 mir_timer_start(q, mir, mir->mir_ordrel_pending ? 1329 svc_ordrel_timeout : mir->mir_idle_timeout); 1330 } 1331 } 1332 1333 /* ARGSUSED */ 1334 static int 1335 mir_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1336 { 1337 mir_t *mir; 1338 1339 RPCLOG(32, "rpcmod: mir_open of q 0x%p\n", (void *)q); 1340 /* Set variables used directly by KRPC. */ 1341 if (!mir_rele) 1342 mir_rele = mir_svc_release; 1343 if (!mir_start) 1344 mir_start = mir_svc_start; 1345 if (!clnt_stop_idle) 1346 clnt_stop_idle = mir_clnt_idle_do_stop; 1347 if (!clnt_max_msg_sizep) 1348 clnt_max_msg_sizep = &clnt_max_msg_size; 1349 if (!svc_max_msg_sizep) 1350 svc_max_msg_sizep = &svc_max_msg_size; 1351 1352 /* Allocate a zero'ed out mir structure for this stream. */ 1353 mir = kmem_zalloc(sizeof (mir_t), KM_SLEEP); 1354 1355 /* 1356 * We set hold inbound here so that incoming messages will 1357 * be held on the read-side queue until the stream is completely 1358 * initialized with a RPC_CLIENT or RPC_SERVER ioctl. During 1359 * the ioctl processing, the flag is cleared and any messages that 1360 * arrived between the open and the ioctl are delivered to KRPC. 1361 * 1362 * Early data should never arrive on a client stream since 1363 * servers only respond to our requests and we do not send any. 1364 * until after the stream is initialized. Early data is 1365 * very common on a server stream where the client will start 1366 * sending data as soon as the connection is made (and this 1367 * is especially true with TCP where the protocol accepts the 1368 * connection before nfsd or KRPC is notified about it). 1369 */ 1370 1371 mir->mir_hold_inbound = 1; 1372 1373 /* 1374 * Start the record marker looking for a 4-byte header. When 1375 * this length is negative, it indicates that rpcmod is looking 1376 * for bytes to consume for the record marker header. When it 1377 * is positive, it holds the number of bytes that have arrived 1378 * for the current fragment and are being held in mir_header_mp. 1379 */ 1380 1381 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 1382 1383 mir->mir_zoneid = rpc_zoneid(); 1384 mutex_init(&mir->mir_mutex, NULL, MUTEX_DEFAULT, NULL); 1385 cv_init(&mir->mir_condvar, NULL, CV_DRIVER, NULL); 1386 cv_init(&mir->mir_timer_cv, NULL, CV_DRIVER, NULL); 1387 1388 q->q_ptr = (char *)mir; 1389 WR(q)->q_ptr = (char *)mir; 1390 1391 /* 1392 * We noenable the read-side queue because we don't want it 1393 * automatically enabled by putq. We enable it explicitly 1394 * in mir_wsrv when appropriate. (See additional comments on 1395 * flow control at the beginning of mir_rsrv.) 1396 */ 1397 noenable(q); 1398 1399 qprocson(q); 1400 return (0); 1401 } 1402 1403 /* 1404 * Read-side put routine for both the client and server side. Does the 1405 * record marking for incoming RPC messages, and when complete, dispatches 1406 * the message to either the client or server. 1407 */ 1408 static void 1409 mir_do_rput(queue_t *q, mblk_t *mp, int srv) 1410 { 1411 mblk_t *cont_mp; 1412 int excess; 1413 int32_t frag_len; 1414 int32_t frag_header; 1415 mblk_t *head_mp; 1416 int len; 1417 mir_t *mir; 1418 mblk_t *mp1; 1419 unsigned char *rptr; 1420 mblk_t *tail_mp; 1421 unsigned char *wptr; 1422 boolean_t stop_timer = B_FALSE; 1423 1424 mir = (mir_t *)q->q_ptr; 1425 ASSERT(mir != NULL); 1426 1427 /* 1428 * If the stream has not been set up as a RPC_CLIENT or RPC_SERVER 1429 * with the corresponding ioctl, then don't accept 1430 * any inbound data. This should never happen for streams 1431 * created by nfsd or client-side KRPC because they are careful 1432 * to set the mode of the stream before doing anything else. 1433 */ 1434 if (mir->mir_type == 0) { 1435 freemsg(mp); 1436 return; 1437 } 1438 1439 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1440 1441 switch (mp->b_datap->db_type) { 1442 case M_DATA: 1443 break; 1444 case M_PROTO: 1445 case M_PCPROTO: 1446 rptr = mp->b_rptr; 1447 if (mp->b_wptr - rptr < sizeof (uint32_t)) { 1448 RPCLOG(1, "mir_rput: runt TPI message (%d bytes)\n", 1449 (int)(mp->b_wptr - rptr)); 1450 freemsg(mp); 1451 return; 1452 } 1453 if (((union T_primitives *)rptr)->type != T_DATA_IND) { 1454 mir_rput_proto(q, mp); 1455 return; 1456 } 1457 1458 /* Throw away the T_DATA_IND block and continue with data. */ 1459 mp1 = mp; 1460 mp = mp->b_cont; 1461 freeb(mp1); 1462 break; 1463 case M_SETOPTS: 1464 /* 1465 * If a module on the stream is trying set the Stream head's 1466 * high water mark, then set our hiwater to the requested 1467 * value. We are the "stream head" for all inbound 1468 * data messages since messages are passed directly to KRPC. 1469 */ 1470 if ((mp->b_wptr - mp->b_rptr) >= sizeof (struct stroptions)) { 1471 struct stroptions *stropts; 1472 1473 stropts = (struct stroptions *)mp->b_rptr; 1474 if ((stropts->so_flags & SO_HIWAT) && 1475 !(stropts->so_flags & SO_BAND)) { 1476 (void) strqset(q, QHIWAT, 0, stropts->so_hiwat); 1477 } 1478 } 1479 putnext(q, mp); 1480 return; 1481 case M_FLUSH: 1482 RPCLOG(32, "mir_do_rput: ignoring M_FLUSH on q 0x%p. ", 1483 (void *)q); 1484 RPCLOG(32, "M_FLUSH is %x\n", (uint_t)*mp->b_rptr); 1485 1486 putnext(q, mp); 1487 return; 1488 default: 1489 putnext(q, mp); 1490 return; 1491 } 1492 1493 mutex_enter(&mir->mir_mutex); 1494 1495 /* 1496 * If this connection is closing, don't accept any new messages. 1497 */ 1498 if (mir->mir_svc_no_more_msgs) { 1499 ASSERT(mir->mir_type == RPC_SERVER); 1500 mutex_exit(&mir->mir_mutex); 1501 freemsg(mp); 1502 return; 1503 } 1504 1505 /* Get local copies for quicker access. */ 1506 frag_len = mir->mir_frag_len; 1507 frag_header = mir->mir_frag_header; 1508 head_mp = mir->mir_head_mp; 1509 tail_mp = mir->mir_tail_mp; 1510 1511 /* Loop, processing each message block in the mp chain separately. */ 1512 do { 1513 /* 1514 * cont_mp is used in the do/while condition below to 1515 * walk to the next block in the STREAMS message. 1516 * mp->b_cont may be nil'ed during processing so we 1517 * can't rely on it to find the next block. 1518 */ 1519 cont_mp = mp->b_cont; 1520 1521 /* 1522 * Get local copies of rptr and wptr for our processing. 1523 * These always point into "mp" (the current block being 1524 * processed), but rptr is updated as we consume any 1525 * record header in this message, and wptr is updated to 1526 * point to the end of the data for the current fragment, 1527 * if it ends in this block. The main point is that 1528 * they are not always the same as b_rptr and b_wptr. 1529 * b_rptr and b_wptr will be updated when appropriate. 1530 */ 1531 rptr = mp->b_rptr; 1532 wptr = mp->b_wptr; 1533 same_mblk:; 1534 len = (int)(wptr - rptr); 1535 if (len <= 0) { 1536 /* 1537 * If we have processed all of the data in the message 1538 * or the block is empty to begin with, then we're 1539 * done with this block and can go on to cont_mp, 1540 * if there is one. 1541 * 1542 * First, we check to see if the current block is 1543 * now zero-length and, if so, we free it. 1544 * This happens when either the block was empty 1545 * to begin with or we consumed all of the data 1546 * for the record marking header. 1547 */ 1548 if (rptr <= mp->b_rptr) { 1549 /* 1550 * If head_mp is non-NULL, add cont_mp to the 1551 * mblk list. XXX But there is a possibility 1552 * that tail_mp = mp or even head_mp = mp XXX 1553 */ 1554 if (head_mp) { 1555 if (head_mp == mp) 1556 head_mp = NULL; 1557 else if (tail_mp != mp) { 1558 ASSERT((tail_mp->b_cont == NULL) || (tail_mp->b_cont == mp)); 1559 tail_mp->b_cont = cont_mp; 1560 /* 1561 * It's possible that, because 1562 * of a very short mblk (0-3 1563 * bytes), we've ended up here 1564 * and that cont_mp could be 1565 * NULL (if we're at the end 1566 * of an mblk chain). If so, 1567 * don't set tail_mp to 1568 * cont_mp, because the next 1569 * time we access it, we'll 1570 * dereference a NULL pointer 1571 * and crash. Just leave 1572 * tail_mp pointing at the 1573 * current end of chain. 1574 */ 1575 if (cont_mp) 1576 tail_mp = cont_mp; 1577 } else { 1578 mblk_t *smp = head_mp; 1579 1580 while ((smp->b_cont != NULL) && 1581 (smp->b_cont != mp)) 1582 smp = smp->b_cont; 1583 smp->b_cont = cont_mp; 1584 /* 1585 * Don't set tail_mp to cont_mp 1586 * if it's NULL. Instead, set 1587 * tail_mp to smp, which is the 1588 * end of the chain starting 1589 * at head_mp. 1590 */ 1591 if (cont_mp) 1592 tail_mp = cont_mp; 1593 else 1594 tail_mp = smp; 1595 } 1596 } 1597 freeb(mp); 1598 } 1599 continue; 1600 } 1601 1602 /* 1603 * frag_len starts at -4 and is incremented past the record 1604 * marking header to 0, and then becomes positive as real data 1605 * bytes are received for the message. While frag_len is less 1606 * than zero, we need more bytes for the record marking 1607 * header. 1608 */ 1609 if (frag_len < 0) { 1610 uchar_t *up = rptr; 1611 /* 1612 * Collect as many bytes as we need for the record 1613 * marking header and that are available in this block. 1614 */ 1615 do { 1616 --len; 1617 frag_len++; 1618 frag_header <<= 8; 1619 frag_header += (*up++ & 0xFF); 1620 } while (len > 0 && frag_len < 0); 1621 1622 if (rptr == mp->b_rptr) { 1623 /* 1624 * The record header is located at the 1625 * beginning of the block, so just walk 1626 * b_rptr past it. 1627 */ 1628 mp->b_rptr = rptr = up; 1629 } else { 1630 /* 1631 * The record header is located in the middle 1632 * of a block, so copy any remaining data up. 1633 * This happens when an RPC message is 1634 * fragmented into multiple pieces and 1635 * a middle (or end) fragment immediately 1636 * follows a previous fragment in the same 1637 * message block. 1638 */ 1639 wptr = &rptr[len]; 1640 mp->b_wptr = wptr; 1641 if (len) { 1642 RPCLOG(32, "mir_do_rput: copying %d " 1643 "bytes of data up", len); 1644 RPCLOG(32, " db_ref %d\n", 1645 (uint_t)mp->b_datap->db_ref); 1646 bcopy(up, rptr, len); 1647 } 1648 } 1649 1650 /* 1651 * If we haven't received the complete record header 1652 * yet, then loop around to get the next block in the 1653 * STREAMS message. The logic at same_mblk label will 1654 * free the current block if it has become empty. 1655 */ 1656 if (frag_len < 0) { 1657 RPCLOG(32, "mir_do_rput: frag_len is still < 0 " 1658 "(%d)", len); 1659 goto same_mblk; 1660 } 1661 1662 #ifdef RPCDEBUG 1663 if ((frag_header & MIR_LASTFRAG) == 0) { 1664 RPCLOG0(32, "mir_do_rput: multi-fragment " 1665 "record\n"); 1666 } 1667 { 1668 uint_t l = frag_header & ~MIR_LASTFRAG; 1669 1670 if (l != 0 && mir->mir_max_msg_sizep && 1671 l >= *mir->mir_max_msg_sizep) { 1672 RPCLOG(32, "mir_do_rput: fragment size" 1673 " (%d) > maximum", l); 1674 RPCLOG(32, " (%u)\n", 1675 *mir->mir_max_msg_sizep); 1676 } 1677 } 1678 #endif 1679 /* 1680 * At this point we have retrieved the complete record 1681 * header for this fragment. If the current block is 1682 * empty, then we need to free it and walk to the next 1683 * block. 1684 */ 1685 if (mp->b_rptr >= wptr) { 1686 /* 1687 * If this is not the last fragment or if we 1688 * have not received all the data for this 1689 * RPC message, then loop around to the next 1690 * block. 1691 */ 1692 if (!(frag_header & MIR_LASTFRAG) || 1693 (frag_len - 1694 (frag_header & ~MIR_LASTFRAG)) || 1695 !head_mp) 1696 goto same_mblk; 1697 1698 /* 1699 * Quick walk to next block in the 1700 * STREAMS message. 1701 */ 1702 freeb(mp); 1703 continue; 1704 } 1705 } 1706 1707 /* 1708 * We've collected the complete record header. The data 1709 * in the current block is added to the end of the RPC 1710 * message. Note that tail_mp is the same as mp after 1711 * this linkage. 1712 */ 1713 if (!head_mp) 1714 head_mp = mp; 1715 else if (tail_mp != mp) { 1716 ASSERT((tail_mp->b_cont == NULL) || 1717 (tail_mp->b_cont == mp)); 1718 tail_mp->b_cont = mp; 1719 } 1720 tail_mp = mp; 1721 1722 /* 1723 * Add the length of this block to the accumulated 1724 * fragment length. 1725 */ 1726 frag_len += len; 1727 excess = frag_len - (frag_header & ~MIR_LASTFRAG); 1728 /* 1729 * If we have not received all the data for this fragment, 1730 * then walk to the next block. 1731 */ 1732 if (excess < 0) 1733 continue; 1734 1735 /* 1736 * We've received a complete fragment, so reset frag_len 1737 * for the next one. 1738 */ 1739 frag_len = -(int32_t)sizeof (uint32_t); 1740 1741 /* 1742 * Update rptr to point to the beginning of the next 1743 * fragment in this block. If there are no more bytes 1744 * in the block (excess is 0), then rptr will be equal 1745 * to wptr. 1746 */ 1747 rptr = wptr - excess; 1748 1749 /* 1750 * Now we check to see if this fragment is the last one in 1751 * the RPC message. 1752 */ 1753 if (!(frag_header & MIR_LASTFRAG)) { 1754 /* 1755 * This isn't the last one, so start processing the 1756 * next fragment. 1757 */ 1758 frag_header = 0; 1759 1760 /* 1761 * If excess is 0, the next fragment 1762 * starts at the beginning of the next block -- 1763 * we "continue" to the end of the while loop and 1764 * walk to cont_mp. 1765 */ 1766 if (excess == 0) 1767 continue; 1768 RPCLOG0(32, "mir_do_rput: multi-fragment message with " 1769 "two or more fragments in one mblk\n"); 1770 1771 /* 1772 * If excess is non-0, then the next fragment starts 1773 * in this block. rptr points to the beginning 1774 * of the next fragment and we "goto same_mblk" 1775 * to continue processing. 1776 */ 1777 goto same_mblk; 1778 } 1779 1780 /* 1781 * We've got a complete RPC message. Before passing it 1782 * upstream, check to see if there is extra data in this 1783 * message block. If so, then we separate the excess 1784 * from the complete message. The excess data is processed 1785 * after the current message goes upstream. 1786 */ 1787 if (excess > 0) { 1788 RPCLOG(32, "mir_do_rput: end of record, but excess " 1789 "data (%d bytes) in this mblk. dupb/copyb " 1790 "needed\n", excess); 1791 1792 /* Duplicate only the overlapping block. */ 1793 mp1 = dupb(tail_mp); 1794 1795 /* 1796 * dupb() might have failed due to ref count wrap around 1797 * so try a copyb(). 1798 */ 1799 if (mp1 == NULL) 1800 mp1 = copyb(tail_mp); 1801 1802 /* 1803 * Do not use bufcall() to schedule a "buffer 1804 * availability event." The reason is that 1805 * bufcall() has problems. For example, if memory 1806 * runs out, bufcall() itself will fail since it 1807 * needs to allocate memory. The most appropriate 1808 * action right now is to disconnect this connection 1809 * as the system is under stress. We should try to 1810 * free up resources. 1811 */ 1812 if (mp1 == NULL) { 1813 freemsg(head_mp); 1814 RPCLOG0(1, "mir_do_rput: dupb/copyb failed\n"); 1815 mir->mir_frag_header = 0; 1816 mir->mir_frag_len = -(int)sizeof (uint32_t); 1817 mir->mir_head_mp = NULL; 1818 mir->mir_tail_mp = NULL; 1819 1820 mir_disconnect(q, mir); 1821 return; 1822 } 1823 1824 /* 1825 * The new message block is linked with the 1826 * continuation block in cont_mp. We then point 1827 * cont_mp to the new block so that we will 1828 * process it next. 1829 */ 1830 mp1->b_cont = cont_mp; 1831 cont_mp = mp1; 1832 /* 1833 * Data in the new block begins at the 1834 * next fragment (rptr). 1835 */ 1836 cont_mp->b_rptr += (rptr - tail_mp->b_rptr); 1837 ASSERT(cont_mp->b_rptr >= cont_mp->b_datap->db_base); 1838 ASSERT(cont_mp->b_rptr <= cont_mp->b_wptr); 1839 1840 /* Data in the current fragment ends at rptr. */ 1841 tail_mp->b_wptr = rptr; 1842 ASSERT(tail_mp->b_wptr <= tail_mp->b_datap->db_lim); 1843 ASSERT(tail_mp->b_wptr >= tail_mp->b_rptr); 1844 1845 } 1846 1847 /* tail_mp is the last block with data for this RPC message. */ 1848 tail_mp->b_cont = NULL; 1849 1850 /* Pass the RPC message to the current consumer. */ 1851 switch (mir->mir_type) { 1852 case RPC_CLIENT: 1853 if (clnt_dispatch_notify(head_mp, mir->mir_zoneid)) { 1854 /* 1855 * Mark this stream as active. This marker 1856 * is used in mir_timer(). 1857 */ 1858 1859 mir->mir_clntreq = 1; 1860 mir->mir_use_timestamp = lbolt; 1861 } else 1862 freemsg(head_mp); 1863 break; 1864 1865 case RPC_SERVER: 1866 /* 1867 * Check for flow control before passing the 1868 * message to KRPC. 1869 */ 1870 1871 if (!mir->mir_hold_inbound) { 1872 if (mir->mir_krpc_cell) { 1873 /* 1874 * If the reference count is 0 1875 * (not including this request), 1876 * then the stream is transitioning 1877 * from idle to non-idle. In this case, 1878 * we cancel the idle timer. 1879 */ 1880 if (mir->mir_ref_cnt++ == 0) 1881 stop_timer = B_TRUE; 1882 if (mir_check_len(q, 1883 (int32_t)msgdsize(mp), mp)) 1884 return; 1885 svc_queuereq(q, head_mp); /* to KRPC */ 1886 } else { 1887 /* 1888 * Count # of times this happens. Should be 1889 * never, but experience shows otherwise. 1890 */ 1891 mir_krpc_cell_null++; 1892 freemsg(head_mp); 1893 } 1894 1895 } else { 1896 /* 1897 * If the outbound side of the stream is 1898 * flow controlled, then hold this message 1899 * until client catches up. mir_hold_inbound 1900 * is set in mir_wput and cleared in mir_wsrv. 1901 */ 1902 if (srv) 1903 (void) putbq(q, head_mp); 1904 else 1905 (void) putq(q, head_mp); 1906 mir->mir_inrservice = B_TRUE; 1907 } 1908 break; 1909 default: 1910 RPCLOG(1, "mir_rput: unknown mir_type %d\n", 1911 mir->mir_type); 1912 freemsg(head_mp); 1913 break; 1914 } 1915 1916 /* 1917 * Reset head_mp and frag_header since we're starting on a 1918 * new RPC fragment and message. 1919 */ 1920 head_mp = NULL; 1921 tail_mp = NULL; 1922 frag_header = 0; 1923 } while ((mp = cont_mp) != NULL); 1924 1925 /* 1926 * Do a sanity check on the message length. If this message is 1927 * getting excessively large, shut down the connection. 1928 */ 1929 if (head_mp != NULL && mir->mir_setup_complete && 1930 mir_check_len(q, frag_len, head_mp)) 1931 return; 1932 1933 /* Save our local copies back in the mir structure. */ 1934 mir->mir_frag_header = frag_header; 1935 mir->mir_frag_len = frag_len; 1936 mir->mir_head_mp = head_mp; 1937 mir->mir_tail_mp = tail_mp; 1938 1939 /* 1940 * The timer is stopped after the whole message chain is processed. 1941 * The reason is that stopping the timer releases the mir_mutex 1942 * lock temporarily. This means that the request can be serviced 1943 * while we are still processing the message chain. This is not 1944 * good. So we stop the timer here instead. 1945 * 1946 * Note that if the timer fires before we stop it, it will not 1947 * do any harm as MIR_SVC_QUIESCED() is false and mir_timer() 1948 * will just return; 1949 */ 1950 if (stop_timer) { 1951 RPCLOG(16, "mir_do_rput stopping idle timer on 0x%p because " 1952 "ref cnt going to non zero\n", (void *) WR(q)); 1953 mir_svc_idle_stop(WR(q), mir); 1954 } 1955 mutex_exit(&mir->mir_mutex); 1956 } 1957 1958 static void 1959 mir_rput(queue_t *q, mblk_t *mp) 1960 { 1961 mir_do_rput(q, mp, 0); 1962 } 1963 1964 static void 1965 mir_rput_proto(queue_t *q, mblk_t *mp) 1966 { 1967 mir_t *mir = (mir_t *)q->q_ptr; 1968 uint32_t type; 1969 uint32_t reason = 0; 1970 1971 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1972 1973 type = ((union T_primitives *)mp->b_rptr)->type; 1974 switch (mir->mir_type) { 1975 case RPC_CLIENT: 1976 switch (type) { 1977 case T_DISCON_IND: 1978 reason = 1979 ((struct T_discon_ind *)(mp->b_rptr))->DISCON_reason; 1980 /*FALLTHROUGH*/ 1981 case T_ORDREL_IND: 1982 mutex_enter(&mir->mir_mutex); 1983 if (mir->mir_head_mp) { 1984 freemsg(mir->mir_head_mp); 1985 mir->mir_head_mp = (mblk_t *)0; 1986 mir->mir_tail_mp = (mblk_t *)0; 1987 } 1988 /* 1989 * We are disconnecting, but not necessarily 1990 * closing. By not closing, we will fail to 1991 * pick up a possibly changed global timeout value, 1992 * unless we store it now. 1993 */ 1994 mir->mir_idle_timeout = clnt_idle_timeout; 1995 mir_clnt_idle_stop(WR(q), mir); 1996 1997 /* 1998 * Even though we are unconnected, we still 1999 * leave the idle timer going on the client. The 2000 * reason for is that if we've disconnected due 2001 * to a server-side disconnect, reset, or connection 2002 * timeout, there is a possibility the client may 2003 * retry the RPC request. This retry needs to done on 2004 * the same bound address for the server to interpret 2005 * it as such. However, we don't want 2006 * to wait forever for that possibility. If the 2007 * end-point stays unconnected for mir_idle_timeout 2008 * units of time, then that is a signal to the 2009 * connection manager to give up waiting for the 2010 * application (eg. NFS) to send a retry. 2011 */ 2012 mir_clnt_idle_start(WR(q), mir); 2013 mutex_exit(&mir->mir_mutex); 2014 clnt_dispatch_notifyall(WR(q), type, reason); 2015 freemsg(mp); 2016 return; 2017 case T_ERROR_ACK: 2018 { 2019 struct T_error_ack *terror; 2020 2021 terror = (struct T_error_ack *)mp->b_rptr; 2022 RPCLOG(1, "mir_rput_proto T_ERROR_ACK for queue 0x%p", 2023 (void *)q); 2024 RPCLOG(1, " ERROR_prim: %s,", 2025 rpc_tpiprim2name(terror->ERROR_prim)); 2026 RPCLOG(1, " TLI_error: %s,", 2027 rpc_tpierr2name(terror->TLI_error)); 2028 RPCLOG(1, " UNIX_error: %d\n", terror->UNIX_error); 2029 if (terror->ERROR_prim == T_DISCON_REQ) { 2030 clnt_dispatch_notifyall(WR(q), type, reason); 2031 freemsg(mp); 2032 return; 2033 } else { 2034 if (clnt_dispatch_notifyconn(WR(q), mp)) 2035 return; 2036 } 2037 break; 2038 } 2039 case T_OK_ACK: 2040 { 2041 struct T_ok_ack *tok = (struct T_ok_ack *)mp->b_rptr; 2042 2043 if (tok->CORRECT_prim == T_DISCON_REQ) { 2044 clnt_dispatch_notifyall(WR(q), type, reason); 2045 freemsg(mp); 2046 return; 2047 } else { 2048 if (clnt_dispatch_notifyconn(WR(q), mp)) 2049 return; 2050 } 2051 break; 2052 } 2053 case T_CONN_CON: 2054 case T_INFO_ACK: 2055 case T_OPTMGMT_ACK: 2056 if (clnt_dispatch_notifyconn(WR(q), mp)) 2057 return; 2058 break; 2059 case T_BIND_ACK: 2060 break; 2061 default: 2062 RPCLOG(1, "mir_rput: unexpected message %d " 2063 "for KRPC client\n", 2064 ((union T_primitives *)mp->b_rptr)->type); 2065 break; 2066 } 2067 break; 2068 2069 case RPC_SERVER: 2070 switch (type) { 2071 case T_BIND_ACK: 2072 { 2073 struct T_bind_ack *tbind; 2074 2075 /* 2076 * If this is a listening stream, then shut 2077 * off the idle timer. 2078 */ 2079 tbind = (struct T_bind_ack *)mp->b_rptr; 2080 if (tbind->CONIND_number > 0) { 2081 mutex_enter(&mir->mir_mutex); 2082 mir_svc_idle_stop(WR(q), mir); 2083 2084 /* 2085 * mark this as a listen endpoint 2086 * for special handling. 2087 */ 2088 2089 mir->mir_listen_stream = 1; 2090 mutex_exit(&mir->mir_mutex); 2091 } 2092 break; 2093 } 2094 case T_DISCON_IND: 2095 case T_ORDREL_IND: 2096 RPCLOG(16, "mir_rput_proto: got %s indication\n", 2097 type == T_DISCON_IND ? "disconnect" 2098 : "orderly release"); 2099 2100 /* 2101 * For listen endpoint just pass 2102 * on the message. 2103 */ 2104 2105 if (mir->mir_listen_stream) 2106 break; 2107 2108 mutex_enter(&mir->mir_mutex); 2109 2110 /* 2111 * If client wants to break off connection, record 2112 * that fact. 2113 */ 2114 mir_svc_start_close(WR(q), mir); 2115 2116 /* 2117 * If we are idle, then send the orderly release 2118 * or disconnect indication to nfsd. 2119 */ 2120 if (MIR_SVC_QUIESCED(mir)) { 2121 mutex_exit(&mir->mir_mutex); 2122 break; 2123 } 2124 2125 RPCLOG(16, "mir_rput_proto: not idle, so " 2126 "disconnect/ord rel indication not passed " 2127 "upstream on 0x%p\n", (void *)q); 2128 2129 /* 2130 * Hold the indication until we get idle 2131 * If there already is an indication stored, 2132 * replace it if the new one is a disconnect. The 2133 * reasoning is that disconnection takes less time 2134 * to process, and once a client decides to 2135 * disconnect, we should do that. 2136 */ 2137 if (mir->mir_svc_pend_mp) { 2138 if (type == T_DISCON_IND) { 2139 RPCLOG(16, "mir_rput_proto: replacing" 2140 " held disconnect/ord rel" 2141 " indication with disconnect on" 2142 " 0x%p\n", (void *)q); 2143 2144 freemsg(mir->mir_svc_pend_mp); 2145 mir->mir_svc_pend_mp = mp; 2146 } else { 2147 RPCLOG(16, "mir_rput_proto: already " 2148 "held a disconnect/ord rel " 2149 "indication. freeing ord rel " 2150 "ind on 0x%p\n", (void *)q); 2151 freemsg(mp); 2152 } 2153 } else 2154 mir->mir_svc_pend_mp = mp; 2155 2156 mutex_exit(&mir->mir_mutex); 2157 return; 2158 2159 default: 2160 /* nfsd handles server-side non-data messages. */ 2161 break; 2162 } 2163 break; 2164 2165 default: 2166 break; 2167 } 2168 2169 putnext(q, mp); 2170 } 2171 2172 /* 2173 * The server-side read queues are used to hold inbound messages while 2174 * outbound flow control is exerted. When outbound flow control is 2175 * relieved, mir_wsrv qenables the read-side queue. Read-side queues 2176 * are not enabled by STREAMS and are explicitly noenable'ed in mir_open. 2177 * 2178 * For the server side, we have two types of messages queued. The first type 2179 * are messages that are ready to be XDR decoded and and then sent to the 2180 * RPC program's dispatch routine. The second type are "raw" messages that 2181 * haven't been processed, i.e. assembled from rpc record fragements into 2182 * full requests. The only time we will see the second type of message 2183 * queued is if we have a memory allocation failure while processing a 2184 * a raw message. The field mir_first_non_processed_mblk will mark the 2185 * first such raw message. So the flow for server side is: 2186 * 2187 * - send processed queued messages to kRPC until we run out or find 2188 * one that needs additional processing because we were short on memory 2189 * earlier 2190 * - process a message that was deferred because of lack of 2191 * memory 2192 * - continue processing messages until the queue empties or we 2193 * have to stop because of lack of memory 2194 * - during each of the above phase, if the queue is empty and 2195 * there are no pending messages that were passed to the RPC 2196 * layer, send upstream the pending disconnect/ordrel indication if 2197 * there is one 2198 * 2199 * The read-side queue is also enabled by a bufcall callback if dupmsg 2200 * fails in mir_rput. 2201 */ 2202 static void 2203 mir_rsrv(queue_t *q) 2204 { 2205 mir_t *mir; 2206 mblk_t *mp; 2207 mblk_t *cmp = NULL; 2208 boolean_t stop_timer = B_FALSE; 2209 2210 mir = (mir_t *)q->q_ptr; 2211 mutex_enter(&mir->mir_mutex); 2212 2213 mp = NULL; 2214 switch (mir->mir_type) { 2215 case RPC_SERVER: 2216 if (mir->mir_ref_cnt == 0) 2217 mir->mir_hold_inbound = 0; 2218 if (mir->mir_hold_inbound) { 2219 2220 ASSERT(cmp == NULL); 2221 if (q->q_first == NULL) { 2222 2223 MIR_CLEAR_INRSRV(mir); 2224 2225 if (MIR_SVC_QUIESCED(mir)) { 2226 cmp = mir->mir_svc_pend_mp; 2227 mir->mir_svc_pend_mp = NULL; 2228 } 2229 } 2230 2231 mutex_exit(&mir->mir_mutex); 2232 2233 if (cmp != NULL) { 2234 RPCLOG(16, "mir_rsrv: line %d: sending a held " 2235 "disconnect/ord rel indication upstream\n", 2236 __LINE__); 2237 putnext(q, cmp); 2238 } 2239 2240 return; 2241 } 2242 while (mp = getq(q)) { 2243 if (mir->mir_krpc_cell) { 2244 /* 2245 * If we were idle, turn off idle timer since 2246 * we aren't idle any more. 2247 */ 2248 if (mir->mir_ref_cnt++ == 0) 2249 stop_timer = B_TRUE; 2250 if (mir_check_len(q, 2251 (int32_t)msgdsize(mp), mp)) 2252 return; 2253 svc_queuereq(q, mp); 2254 } else { 2255 /* 2256 * Count # of times this happens. Should be 2257 * never, but experience shows otherwise. 2258 */ 2259 mir_krpc_cell_null++; 2260 freemsg(mp); 2261 } 2262 } 2263 break; 2264 case RPC_CLIENT: 2265 break; 2266 default: 2267 RPCLOG(1, "mir_rsrv: unexpected mir_type %d\n", mir->mir_type); 2268 2269 if (q->q_first == NULL) 2270 MIR_CLEAR_INRSRV(mir); 2271 2272 mutex_exit(&mir->mir_mutex); 2273 2274 return; 2275 } 2276 2277 /* 2278 * The timer is stopped after all the messages are processed. 2279 * The reason is that stopping the timer releases the mir_mutex 2280 * lock temporarily. This means that the request can be serviced 2281 * while we are still processing the message queue. This is not 2282 * good. So we stop the timer here instead. 2283 */ 2284 if (stop_timer) { 2285 RPCLOG(16, "mir_rsrv stopping idle timer on 0x%p because ref " 2286 "cnt going to non zero\n", (void *)WR(q)); 2287 mir_svc_idle_stop(WR(q), mir); 2288 } 2289 2290 if (q->q_first == NULL) { 2291 2292 MIR_CLEAR_INRSRV(mir); 2293 2294 ASSERT(cmp == NULL); 2295 if (mir->mir_type == RPC_SERVER && MIR_SVC_QUIESCED(mir)) { 2296 cmp = mir->mir_svc_pend_mp; 2297 mir->mir_svc_pend_mp = NULL; 2298 } 2299 2300 mutex_exit(&mir->mir_mutex); 2301 2302 if (cmp != NULL) { 2303 RPCLOG(16, "mir_rsrv: line %d: sending a held " 2304 "disconnect/ord rel indication upstream\n", 2305 __LINE__); 2306 putnext(q, cmp); 2307 } 2308 2309 return; 2310 } 2311 mutex_exit(&mir->mir_mutex); 2312 } 2313 2314 static int mir_svc_policy_fails; 2315 2316 /* 2317 * Called to send an event code to nfsd/lockd so that it initiates 2318 * connection close. 2319 */ 2320 static int 2321 mir_svc_policy_notify(queue_t *q, int event) 2322 { 2323 mblk_t *mp; 2324 #ifdef DEBUG 2325 mir_t *mir = (mir_t *)q->q_ptr; 2326 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2327 #endif 2328 ASSERT(q->q_flag & QREADR); 2329 2330 /* 2331 * Create an M_DATA message with the event code and pass it to the 2332 * Stream head (nfsd or whoever created the stream will consume it). 2333 */ 2334 mp = allocb(sizeof (int), BPRI_HI); 2335 2336 if (!mp) { 2337 2338 mir_svc_policy_fails++; 2339 RPCLOG(16, "mir_svc_policy_notify: could not allocate event " 2340 "%d\n", event); 2341 return (ENOMEM); 2342 } 2343 2344 U32_TO_BE32(event, mp->b_rptr); 2345 mp->b_wptr = mp->b_rptr + sizeof (int); 2346 putnext(q, mp); 2347 return (0); 2348 } 2349 2350 /* 2351 * Server side: start the close phase. We want to get this rpcmod slot in an 2352 * idle state before mir_close() is called. 2353 */ 2354 static void 2355 mir_svc_start_close(queue_t *wq, mir_t *mir) 2356 { 2357 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2358 ASSERT((wq->q_flag & QREADR) == 0); 2359 ASSERT(mir->mir_type == RPC_SERVER); 2360 2361 2362 /* 2363 * Do not accept any more messages. 2364 */ 2365 mir->mir_svc_no_more_msgs = 1; 2366 2367 /* 2368 * Next two statements will make the read service procedure invoke 2369 * svc_queuereq() on everything stuck in the streams read queue. 2370 * It's not necessary because enabling the write queue will 2371 * have the same effect, but why not speed the process along? 2372 */ 2373 mir->mir_hold_inbound = 0; 2374 qenable(RD(wq)); 2375 2376 /* 2377 * Meanwhile force the write service procedure to send the 2378 * responses downstream, regardless of flow control. 2379 */ 2380 qenable(wq); 2381 } 2382 2383 /* 2384 * This routine is called directly by KRPC after a request is completed, 2385 * whether a reply was sent or the request was dropped. 2386 */ 2387 static void 2388 mir_svc_release(queue_t *wq, mblk_t *mp) 2389 { 2390 mir_t *mir = (mir_t *)wq->q_ptr; 2391 mblk_t *cmp = NULL; 2392 2393 ASSERT((wq->q_flag & QREADR) == 0); 2394 if (mp) 2395 freemsg(mp); 2396 2397 mutex_enter(&mir->mir_mutex); 2398 mir->mir_ref_cnt--; 2399 ASSERT(mir->mir_ref_cnt >= 0); 2400 2401 /* 2402 * Start idle processing if this is the last reference. 2403 */ 2404 if (MIR_SVC_QUIESCED(mir)) { 2405 2406 RPCLOG(16, "mir_svc_release starting idle timer on 0x%p " 2407 "because ref cnt is zero\n", (void *) wq); 2408 2409 cmp = mir->mir_svc_pend_mp; 2410 mir->mir_svc_pend_mp = NULL; 2411 mir_svc_idle_start(wq, mir); 2412 } 2413 2414 mutex_exit(&mir->mir_mutex); 2415 2416 if (cmp) { 2417 RPCLOG(16, "mir_svc_release: sending a held " 2418 "disconnect/ord rel indication upstream on queue 0x%p\n", 2419 (void *)RD(wq)); 2420 2421 putnext(RD(wq), cmp); 2422 } 2423 } 2424 2425 /* 2426 * This routine is called by server-side KRPC when it is ready to 2427 * handle inbound messages on the stream. 2428 */ 2429 static void 2430 mir_svc_start(queue_t *wq) 2431 { 2432 mir_t *mir = (mir_t *)wq->q_ptr; 2433 2434 /* 2435 * no longer need to take the mir_mutex because the 2436 * mir_setup_complete field has been moved out of 2437 * the binary field protected by the mir_mutex. 2438 */ 2439 2440 mir->mir_setup_complete = 1; 2441 qenable(RD(wq)); 2442 } 2443 2444 /* 2445 * client side wrapper for stopping timer with normal idle timeout. 2446 */ 2447 static void 2448 mir_clnt_idle_stop(queue_t *wq, mir_t *mir) 2449 { 2450 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2451 ASSERT((wq->q_flag & QREADR) == 0); 2452 ASSERT(mir->mir_type == RPC_CLIENT); 2453 2454 mir_timer_stop(mir); 2455 } 2456 2457 /* 2458 * client side wrapper for stopping timer with normal idle timeout. 2459 */ 2460 static void 2461 mir_clnt_idle_start(queue_t *wq, mir_t *mir) 2462 { 2463 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2464 ASSERT((wq->q_flag & QREADR) == 0); 2465 ASSERT(mir->mir_type == RPC_CLIENT); 2466 2467 mir_timer_start(wq, mir, mir->mir_idle_timeout); 2468 } 2469 2470 /* 2471 * client side only. Forces rpcmod to stop sending T_ORDREL_REQs on 2472 * end-points that aren't connected. 2473 */ 2474 static void 2475 mir_clnt_idle_do_stop(queue_t *wq) 2476 { 2477 mir_t *mir = (mir_t *)wq->q_ptr; 2478 2479 RPCLOG(1, "mir_clnt_idle_do_stop: wq 0x%p\n", (void *)wq); 2480 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2481 mutex_enter(&mir->mir_mutex); 2482 mir_clnt_idle_stop(wq, mir); 2483 mutex_exit(&mir->mir_mutex); 2484 } 2485 2486 /* 2487 * Timer handler. It handles idle timeout and memory shortage problem. 2488 */ 2489 static void 2490 mir_timer(void *arg) 2491 { 2492 queue_t *wq = (queue_t *)arg; 2493 mir_t *mir = (mir_t *)wq->q_ptr; 2494 boolean_t notify; 2495 2496 mutex_enter(&mir->mir_mutex); 2497 2498 /* 2499 * mir_timer_call is set only when either mir_timer_[start|stop] 2500 * is progressing. And mir_timer() can only be run while they 2501 * are progressing if the timer is being stopped. So just 2502 * return. 2503 */ 2504 if (mir->mir_timer_call) { 2505 mutex_exit(&mir->mir_mutex); 2506 return; 2507 } 2508 mir->mir_timer_id = 0; 2509 2510 switch (mir->mir_type) { 2511 case RPC_CLIENT: 2512 2513 /* 2514 * For clients, the timer fires at clnt_idle_timeout 2515 * intervals. If the activity marker (mir_clntreq) is 2516 * zero, then the stream has been idle since the last 2517 * timer event and we notify KRPC. If mir_clntreq is 2518 * non-zero, then the stream is active and we just 2519 * restart the timer for another interval. mir_clntreq 2520 * is set to 1 in mir_wput for every request passed 2521 * downstream. 2522 * 2523 * If this was a memory shortage timer reset the idle 2524 * timeout regardless; the mir_clntreq will not be a 2525 * valid indicator. 2526 * 2527 * The timer is initially started in mir_wput during 2528 * RPC_CLIENT ioctl processing. 2529 * 2530 * The timer interval can be changed for individual 2531 * streams with the ND variable "mir_idle_timeout". 2532 */ 2533 if (mir->mir_clntreq > 0 && mir->mir_use_timestamp + 2534 MSEC_TO_TICK(mir->mir_idle_timeout) - lbolt >= 0) { 2535 clock_t tout; 2536 2537 tout = mir->mir_idle_timeout - 2538 TICK_TO_MSEC(lbolt - mir->mir_use_timestamp); 2539 if (tout < 0) 2540 tout = 1000; 2541 #if 0 2542 printf("mir_timer[%d < %d + %d]: reset client timer to %d (ms)\n", 2543 TICK_TO_MSEC(lbolt), TICK_TO_MSEC(mir->mir_use_timestamp), 2544 mir->mir_idle_timeout, tout); 2545 #endif 2546 mir->mir_clntreq = 0; 2547 mir_timer_start(wq, mir, tout); 2548 mutex_exit(&mir->mir_mutex); 2549 return; 2550 } 2551 #if 0 2552 printf("mir_timer[%d]: doing client timeout\n", lbolt / hz); 2553 #endif 2554 /* 2555 * We are disconnecting, but not necessarily 2556 * closing. By not closing, we will fail to 2557 * pick up a possibly changed global timeout value, 2558 * unless we store it now. 2559 */ 2560 mir->mir_idle_timeout = clnt_idle_timeout; 2561 mir_clnt_idle_start(wq, mir); 2562 2563 mutex_exit(&mir->mir_mutex); 2564 /* 2565 * We pass T_ORDREL_REQ as an integer value 2566 * to KRPC as the indication that the stream 2567 * is idle. This is not a T_ORDREL_REQ message, 2568 * it is just a convenient value since we call 2569 * the same KRPC routine for T_ORDREL_INDs and 2570 * T_DISCON_INDs. 2571 */ 2572 clnt_dispatch_notifyall(wq, T_ORDREL_REQ, 0); 2573 return; 2574 2575 case RPC_SERVER: 2576 2577 /* 2578 * For servers, the timer is only running when the stream 2579 * is really idle or memory is short. The timer is started 2580 * by mir_wput when mir_type is set to RPC_SERVER and 2581 * by mir_svc_idle_start whenever the stream goes idle 2582 * (mir_ref_cnt == 0). The timer is cancelled in 2583 * mir_rput whenever a new inbound request is passed to KRPC 2584 * and the stream was previously idle. 2585 * 2586 * The timer interval can be changed for individual 2587 * streams with the ND variable "mir_idle_timeout". 2588 * 2589 * If the stream is not idle do nothing. 2590 */ 2591 if (!MIR_SVC_QUIESCED(mir)) { 2592 mutex_exit(&mir->mir_mutex); 2593 return; 2594 } 2595 2596 notify = !mir->mir_inrservice; 2597 mutex_exit(&mir->mir_mutex); 2598 2599 /* 2600 * If there is no packet queued up in read queue, the stream 2601 * is really idle so notify nfsd to close it. 2602 */ 2603 if (notify) { 2604 RPCLOG(16, "mir_timer: telling stream head listener " 2605 "to close stream (0x%p)\n", (void *) RD(wq)); 2606 (void) mir_svc_policy_notify(RD(wq), 1); 2607 } 2608 return; 2609 default: 2610 RPCLOG(1, "mir_timer: unexpected mir_type %d\n", 2611 mir->mir_type); 2612 mutex_exit(&mir->mir_mutex); 2613 return; 2614 } 2615 } 2616 2617 /* 2618 * Called by the RPC package to send either a call or a return, or a 2619 * transport connection request. Adds the record marking header. 2620 */ 2621 static void 2622 mir_wput(queue_t *q, mblk_t *mp) 2623 { 2624 uint_t frag_header; 2625 mir_t *mir = (mir_t *)q->q_ptr; 2626 uchar_t *rptr = mp->b_rptr; 2627 2628 if (!mir) { 2629 freemsg(mp); 2630 return; 2631 } 2632 2633 if (mp->b_datap->db_type != M_DATA) { 2634 mir_wput_other(q, mp); 2635 return; 2636 } 2637 2638 if (mir->mir_ordrel_pending == 1) { 2639 freemsg(mp); 2640 RPCLOG(16, "mir_wput wq 0x%p: got data after T_ORDREL_REQ\n", 2641 (void *)q); 2642 return; 2643 } 2644 2645 frag_header = (uint_t)DLEN(mp); 2646 frag_header |= MIR_LASTFRAG; 2647 2648 /* Stick in the 4 byte record marking header. */ 2649 if ((rptr - mp->b_datap->db_base) < sizeof (uint32_t) || 2650 !IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) { 2651 /* 2652 * Since we know that M_DATA messages are created exclusively 2653 * by KRPC, we expect that KRPC will leave room for our header 2654 * and 4 byte align which is normal for XDR. 2655 * If KRPC (or someone else) does not cooperate, then we 2656 * just throw away the message. 2657 */ 2658 RPCLOG(1, "mir_wput: KRPC did not leave space for record " 2659 "fragment header (%d bytes left)\n", 2660 (int)(rptr - mp->b_datap->db_base)); 2661 freemsg(mp); 2662 return; 2663 } 2664 rptr -= sizeof (uint32_t); 2665 *(uint32_t *)rptr = htonl(frag_header); 2666 mp->b_rptr = rptr; 2667 2668 mutex_enter(&mir->mir_mutex); 2669 if (mir->mir_type == RPC_CLIENT) { 2670 /* 2671 * For the client, set mir_clntreq to indicate that the 2672 * connection is active. 2673 */ 2674 mir->mir_clntreq = 1; 2675 mir->mir_use_timestamp = lbolt; 2676 } 2677 2678 /* 2679 * If we haven't already queued some data and the downstream module 2680 * can accept more data, send it on, otherwise we queue the message 2681 * and take other actions depending on mir_type. 2682 */ 2683 if (!mir->mir_inwservice && MIR_WCANPUTNEXT(mir, q)) { 2684 mutex_exit(&mir->mir_mutex); 2685 2686 /* 2687 * Now we pass the RPC message downstream. 2688 */ 2689 putnext(q, mp); 2690 return; 2691 } 2692 2693 switch (mir->mir_type) { 2694 case RPC_CLIENT: 2695 /* 2696 * Check for a previous duplicate request on the 2697 * queue. If there is one, then we throw away 2698 * the current message and let the previous one 2699 * go through. If we can't find a duplicate, then 2700 * send this one. This tap dance is an effort 2701 * to reduce traffic and processing requirements 2702 * under load conditions. 2703 */ 2704 if (mir_clnt_dup_request(q, mp)) { 2705 mutex_exit(&mir->mir_mutex); 2706 freemsg(mp); 2707 return; 2708 } 2709 break; 2710 case RPC_SERVER: 2711 /* 2712 * Set mir_hold_inbound so that new inbound RPC 2713 * messages will be held until the client catches 2714 * up on the earlier replies. This flag is cleared 2715 * in mir_wsrv after flow control is relieved; 2716 * the read-side queue is also enabled at that time. 2717 */ 2718 mir->mir_hold_inbound = 1; 2719 break; 2720 default: 2721 RPCLOG(1, "mir_wput: unexpected mir_type %d\n", mir->mir_type); 2722 break; 2723 } 2724 mir->mir_inwservice = 1; 2725 (void) putq(q, mp); 2726 mutex_exit(&mir->mir_mutex); 2727 } 2728 2729 static void 2730 mir_wput_other(queue_t *q, mblk_t *mp) 2731 { 2732 mir_t *mir = (mir_t *)q->q_ptr; 2733 struct iocblk *iocp; 2734 uchar_t *rptr = mp->b_rptr; 2735 bool_t flush_in_svc = FALSE; 2736 2737 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2738 switch (mp->b_datap->db_type) { 2739 case M_IOCTL: 2740 iocp = (struct iocblk *)rptr; 2741 switch (iocp->ioc_cmd) { 2742 case RPC_CLIENT: 2743 mutex_enter(&mir->mir_mutex); 2744 if (mir->mir_type != 0 && 2745 mir->mir_type != iocp->ioc_cmd) { 2746 ioc_eperm: 2747 mutex_exit(&mir->mir_mutex); 2748 iocp->ioc_error = EPERM; 2749 iocp->ioc_count = 0; 2750 mp->b_datap->db_type = M_IOCACK; 2751 qreply(q, mp); 2752 return; 2753 } 2754 2755 mir->mir_type = iocp->ioc_cmd; 2756 2757 /* 2758 * Clear mir_hold_inbound which was set to 1 by 2759 * mir_open. This flag is not used on client 2760 * streams. 2761 */ 2762 mir->mir_hold_inbound = 0; 2763 mir->mir_max_msg_sizep = &clnt_max_msg_size; 2764 2765 /* 2766 * Start the idle timer. See mir_timer() for more 2767 * information on how client timers work. 2768 */ 2769 mir->mir_idle_timeout = clnt_idle_timeout; 2770 mir_clnt_idle_start(q, mir); 2771 mutex_exit(&mir->mir_mutex); 2772 2773 mp->b_datap->db_type = M_IOCACK; 2774 qreply(q, mp); 2775 return; 2776 case RPC_SERVER: 2777 mutex_enter(&mir->mir_mutex); 2778 if (mir->mir_type != 0 && 2779 mir->mir_type != iocp->ioc_cmd) 2780 goto ioc_eperm; 2781 2782 /* 2783 * We don't clear mir_hold_inbound here because 2784 * mir_hold_inbound is used in the flow control 2785 * model. If we cleared it here, then we'd commit 2786 * a small violation to the model where the transport 2787 * might immediately block downstream flow. 2788 */ 2789 2790 mir->mir_type = iocp->ioc_cmd; 2791 mir->mir_max_msg_sizep = &svc_max_msg_size; 2792 2793 /* 2794 * Start the idle timer. See mir_timer() for more 2795 * information on how server timers work. 2796 * 2797 * Note that it is important to start the idle timer 2798 * here so that connections time out even if we 2799 * never receive any data on them. 2800 */ 2801 mir->mir_idle_timeout = svc_idle_timeout; 2802 RPCLOG(16, "mir_wput_other starting idle timer on 0x%p " 2803 "because we got RPC_SERVER ioctl\n", (void *)q); 2804 mir_svc_idle_start(q, mir); 2805 mutex_exit(&mir->mir_mutex); 2806 2807 mp->b_datap->db_type = M_IOCACK; 2808 qreply(q, mp); 2809 return; 2810 default: 2811 break; 2812 } 2813 break; 2814 2815 case M_PROTO: 2816 if (mir->mir_type == RPC_CLIENT) { 2817 /* 2818 * We are likely being called from the context of a 2819 * service procedure. So we need to enqueue. However 2820 * enqueing may put our message behind data messages. 2821 * So flush the data first. 2822 */ 2823 flush_in_svc = TRUE; 2824 } 2825 if ((mp->b_wptr - rptr) < sizeof (uint32_t) || 2826 !IS_P2ALIGNED(rptr, sizeof (uint32_t))) 2827 break; 2828 2829 switch (((union T_primitives *)rptr)->type) { 2830 case T_DATA_REQ: 2831 /* Don't pass T_DATA_REQ messages downstream. */ 2832 freemsg(mp); 2833 return; 2834 case T_ORDREL_REQ: 2835 RPCLOG(8, "mir_wput_other wq 0x%p: got T_ORDREL_REQ\n", 2836 (void *)q); 2837 mutex_enter(&mir->mir_mutex); 2838 if (mir->mir_type != RPC_SERVER) { 2839 /* 2840 * We are likely being called from 2841 * clnt_dispatch_notifyall(). Sending 2842 * a T_ORDREL_REQ will result in 2843 * a some kind of _IND message being sent, 2844 * will be another call to 2845 * clnt_dispatch_notifyall(). To keep the stack 2846 * lean, queue this message. 2847 */ 2848 mir->mir_inwservice = 1; 2849 (void) putq(q, mp); 2850 mutex_exit(&mir->mir_mutex); 2851 return; 2852 } 2853 2854 /* 2855 * Mark the structure such that we don't accept any 2856 * more requests from client. We could defer this 2857 * until we actually send the orderly release 2858 * request downstream, but all that does is delay 2859 * the closing of this stream. 2860 */ 2861 RPCLOG(16, "mir_wput_other wq 0x%p: got T_ORDREL_REQ " 2862 " so calling mir_svc_start_close\n", (void *)q); 2863 2864 mir_svc_start_close(q, mir); 2865 2866 /* 2867 * If we have sent down a T_ORDREL_REQ, don't send 2868 * any more. 2869 */ 2870 if (mir->mir_ordrel_pending) { 2871 freemsg(mp); 2872 mutex_exit(&mir->mir_mutex); 2873 return; 2874 } 2875 2876 /* 2877 * If the stream is not idle, then we hold the 2878 * orderly release until it becomes idle. This 2879 * ensures that KRPC will be able to reply to 2880 * all requests that we have passed to it. 2881 * 2882 * We also queue the request if there is data already 2883 * queued, because we cannot allow the T_ORDREL_REQ 2884 * to go before data. When we had a separate reply 2885 * count, this was not a problem, because the 2886 * reply count was reconciled when mir_wsrv() 2887 * completed. 2888 */ 2889 if (!MIR_SVC_QUIESCED(mir) || 2890 mir->mir_inwservice == 1) { 2891 mir->mir_inwservice = 1; 2892 (void) putq(q, mp); 2893 2894 RPCLOG(16, "mir_wput_other: queuing " 2895 "T_ORDREL_REQ on 0x%p\n", (void *)q); 2896 2897 mutex_exit(&mir->mir_mutex); 2898 return; 2899 } 2900 2901 /* 2902 * Mark the structure so that we know we sent 2903 * an orderly release request, and reset the idle timer. 2904 */ 2905 mir->mir_ordrel_pending = 1; 2906 2907 RPCLOG(16, "mir_wput_other: calling mir_svc_idle_start" 2908 " on 0x%p because we got T_ORDREL_REQ\n", 2909 (void *)q); 2910 2911 mir_svc_idle_start(q, mir); 2912 mutex_exit(&mir->mir_mutex); 2913 2914 /* 2915 * When we break, we will putnext the T_ORDREL_REQ. 2916 */ 2917 break; 2918 2919 case T_CONN_REQ: 2920 mutex_enter(&mir->mir_mutex); 2921 if (mir->mir_head_mp != NULL) { 2922 freemsg(mir->mir_head_mp); 2923 mir->mir_head_mp = NULL; 2924 mir->mir_tail_mp = NULL; 2925 } 2926 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 2927 /* 2928 * Restart timer in case mir_clnt_idle_do_stop() was 2929 * called. 2930 */ 2931 mir->mir_idle_timeout = clnt_idle_timeout; 2932 mir_clnt_idle_stop(q, mir); 2933 mir_clnt_idle_start(q, mir); 2934 mutex_exit(&mir->mir_mutex); 2935 break; 2936 2937 default: 2938 /* 2939 * T_DISCON_REQ is one of the interesting default 2940 * cases here. Ideally, an M_FLUSH is done before 2941 * T_DISCON_REQ is done. However, that is somewhat 2942 * cumbersome for clnt_cots.c to do. So we queue 2943 * T_DISCON_REQ, and let the service procedure 2944 * flush all M_DATA. 2945 */ 2946 break; 2947 } 2948 /* fallthru */; 2949 default: 2950 if (mp->b_datap->db_type >= QPCTL) { 2951 if (mp->b_datap->db_type == M_FLUSH) { 2952 if (mir->mir_type == RPC_CLIENT && 2953 *mp->b_rptr & FLUSHW) { 2954 RPCLOG(32, "mir_wput_other: flushing " 2955 "wq 0x%p\n", (void *)q); 2956 if (*mp->b_rptr & FLUSHBAND) { 2957 flushband(q, *(mp->b_rptr + 1), 2958 FLUSHDATA); 2959 } else { 2960 flushq(q, FLUSHDATA); 2961 } 2962 } else { 2963 RPCLOG(32, "mir_wput_other: ignoring " 2964 "M_FLUSH on wq 0x%p\n", (void *)q); 2965 } 2966 } 2967 break; 2968 } 2969 2970 mutex_enter(&mir->mir_mutex); 2971 if (mir->mir_inwservice == 0 && MIR_WCANPUTNEXT(mir, q)) { 2972 mutex_exit(&mir->mir_mutex); 2973 break; 2974 } 2975 mir->mir_inwservice = 1; 2976 mir->mir_inwflushdata = flush_in_svc; 2977 (void) putq(q, mp); 2978 mutex_exit(&mir->mir_mutex); 2979 qenable(q); 2980 2981 return; 2982 } 2983 putnext(q, mp); 2984 } 2985 2986 static void 2987 mir_wsrv(queue_t *q) 2988 { 2989 mblk_t *mp; 2990 mir_t *mir; 2991 bool_t flushdata; 2992 2993 mir = (mir_t *)q->q_ptr; 2994 mutex_enter(&mir->mir_mutex); 2995 2996 flushdata = mir->mir_inwflushdata; 2997 mir->mir_inwflushdata = 0; 2998 2999 while (mp = getq(q)) { 3000 if (mp->b_datap->db_type == M_DATA) { 3001 /* 3002 * Do not send any more data if we have sent 3003 * a T_ORDREL_REQ. 3004 */ 3005 if (flushdata || mir->mir_ordrel_pending == 1) { 3006 freemsg(mp); 3007 continue; 3008 } 3009 3010 /* 3011 * Make sure that the stream can really handle more 3012 * data. 3013 */ 3014 if (!MIR_WCANPUTNEXT(mir, q)) { 3015 (void) putbq(q, mp); 3016 mutex_exit(&mir->mir_mutex); 3017 return; 3018 } 3019 3020 /* 3021 * Now we pass the RPC message downstream. 3022 */ 3023 mutex_exit(&mir->mir_mutex); 3024 putnext(q, mp); 3025 mutex_enter(&mir->mir_mutex); 3026 continue; 3027 } 3028 3029 /* 3030 * This is not an RPC message, pass it downstream 3031 * (ignoring flow control) if the server side is not sending a 3032 * T_ORDREL_REQ downstream. 3033 */ 3034 if (mir->mir_type != RPC_SERVER || 3035 ((union T_primitives *)mp->b_rptr)->type != 3036 T_ORDREL_REQ) { 3037 mutex_exit(&mir->mir_mutex); 3038 putnext(q, mp); 3039 mutex_enter(&mir->mir_mutex); 3040 continue; 3041 } 3042 3043 if (mir->mir_ordrel_pending == 1) { 3044 /* 3045 * Don't send two T_ORDRELs 3046 */ 3047 freemsg(mp); 3048 continue; 3049 } 3050 3051 /* 3052 * Mark the structure so that we know we sent an orderly 3053 * release request. We will check to see slot is idle at the 3054 * end of this routine, and if so, reset the idle timer to 3055 * handle orderly release timeouts. 3056 */ 3057 mir->mir_ordrel_pending = 1; 3058 RPCLOG(16, "mir_wsrv: sending ordrel req on q 0x%p\n", 3059 (void *)q); 3060 /* 3061 * Send the orderly release downstream. If there are other 3062 * pending replies we won't be able to send them. However, 3063 * the only reason we should send the orderly release is if 3064 * we were idle, or if an unusual event occurred. 3065 */ 3066 mutex_exit(&mir->mir_mutex); 3067 putnext(q, mp); 3068 mutex_enter(&mir->mir_mutex); 3069 } 3070 3071 if (q->q_first == NULL) 3072 /* 3073 * If we call mir_svc_idle_start() below, then 3074 * clearing mir_inwservice here will also result in 3075 * any thread waiting in mir_close() to be signaled. 3076 */ 3077 mir->mir_inwservice = 0; 3078 3079 if (mir->mir_type != RPC_SERVER) { 3080 mutex_exit(&mir->mir_mutex); 3081 return; 3082 } 3083 3084 /* 3085 * If idle we call mir_svc_idle_start to start the timer (or wakeup 3086 * a close). Also make sure not to start the idle timer on the 3087 * listener stream. This can cause nfsd to send an orderly release 3088 * command on the listener stream. 3089 */ 3090 if (MIR_SVC_QUIESCED(mir) && !(mir->mir_listen_stream)) { 3091 RPCLOG(16, "mir_wsrv: calling mir_svc_idle_start on 0x%p " 3092 "because mir slot is idle\n", (void *)q); 3093 mir_svc_idle_start(q, mir); 3094 } 3095 3096 /* 3097 * If outbound flow control has been relieved, then allow new 3098 * inbound requests to be processed. 3099 */ 3100 if (mir->mir_hold_inbound) { 3101 mir->mir_hold_inbound = 0; 3102 qenable(RD(q)); 3103 } 3104 mutex_exit(&mir->mir_mutex); 3105 } 3106 3107 static void 3108 mir_disconnect(queue_t *q, mir_t *mir) 3109 { 3110 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 3111 3112 switch (mir->mir_type) { 3113 case RPC_CLIENT: 3114 /* 3115 * We are disconnecting, but not necessarily 3116 * closing. By not closing, we will fail to 3117 * pick up a possibly changed global timeout value, 3118 * unless we store it now. 3119 */ 3120 mir->mir_idle_timeout = clnt_idle_timeout; 3121 mir_clnt_idle_start(WR(q), mir); 3122 mutex_exit(&mir->mir_mutex); 3123 3124 /* 3125 * T_DISCON_REQ is passed to KRPC as an integer value 3126 * (this is not a TPI message). It is used as a 3127 * convenient value to indicate a sanity check 3128 * failure -- the same KRPC routine is also called 3129 * for T_DISCON_INDs and T_ORDREL_INDs. 3130 */ 3131 clnt_dispatch_notifyall(WR(q), T_DISCON_REQ, 0); 3132 break; 3133 3134 case RPC_SERVER: 3135 mir->mir_svc_no_more_msgs = 1; 3136 mir_svc_idle_stop(WR(q), mir); 3137 mutex_exit(&mir->mir_mutex); 3138 RPCLOG(16, "mir_disconnect: telling " 3139 "stream head listener to disconnect stream " 3140 "(0x%p)\n", (void *) q); 3141 (void) mir_svc_policy_notify(q, 2); 3142 break; 3143 3144 default: 3145 mutex_exit(&mir->mir_mutex); 3146 break; 3147 } 3148 } 3149 3150 /* 3151 * do a sanity check on the length of the fragment. 3152 * returns 1 if bad else 0. 3153 */ 3154 static int 3155 mir_check_len(queue_t *q, int32_t frag_len, 3156 mblk_t *head_mp) 3157 { 3158 mir_t *mir; 3159 3160 mir = (mir_t *)q->q_ptr; 3161 3162 /* 3163 * Do a sanity check on the message length. If this message is 3164 * getting excessively large, shut down the connection. 3165 */ 3166 3167 if ((frag_len <= 0) || (mir->mir_max_msg_sizep == NULL) || 3168 (frag_len <= *mir->mir_max_msg_sizep)) { 3169 return (0); 3170 } 3171 3172 freemsg(head_mp); 3173 mir->mir_head_mp = (mblk_t *)0; 3174 mir->mir_frag_len = -(int)sizeof (uint32_t); 3175 if (mir->mir_type != RPC_SERVER || mir->mir_setup_complete) { 3176 cmn_err(CE_NOTE, 3177 "KRPC: record fragment from %s of size(%d) exceeds " 3178 "maximum (%u). Disconnecting", 3179 (mir->mir_type == RPC_CLIENT) ? "server" : 3180 (mir->mir_type == RPC_SERVER) ? "client" : 3181 "test tool", 3182 frag_len, *mir->mir_max_msg_sizep); 3183 } 3184 3185 mir_disconnect(q, mir); 3186 return (1); 3187 } 3188