1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Kernel RPC filtering module 32 */ 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/strsubr.h> 39 #include <sys/tihdr.h> 40 #include <sys/timod.h> 41 #include <sys/tiuser.h> 42 #include <sys/debug.h> 43 #include <sys/signal.h> 44 #include <sys/pcb.h> 45 #include <sys/user.h> 46 #include <sys/errno.h> 47 #include <sys/cred.h> 48 #include <sys/policy.h> 49 #include <sys/inline.h> 50 #include <sys/cmn_err.h> 51 #include <sys/kmem.h> 52 #include <sys/file.h> 53 #include <sys/sysmacros.h> 54 #include <sys/systm.h> 55 #include <sys/t_lock.h> 56 #include <sys/ddi.h> 57 #include <sys/vtrace.h> 58 #include <sys/callb.h> 59 #include <sys/strsun.h> 60 61 #include <sys/strlog.h> 62 #include <rpc/rpc_com.h> 63 #include <inet/common.h> 64 #include <rpc/types.h> 65 #include <sys/time.h> 66 #include <rpc/xdr.h> 67 #include <rpc/auth.h> 68 #include <rpc/clnt.h> 69 #include <rpc/rpc_msg.h> 70 #include <rpc/clnt.h> 71 #include <rpc/svc.h> 72 #include <rpc/rpcsys.h> 73 #include <rpc/rpc_rdma.h> 74 75 /* 76 * This is the loadable module wrapper. 77 */ 78 #include <sys/conf.h> 79 #include <sys/modctl.h> 80 #include <sys/syscall.h> 81 82 extern struct streamtab rpcinfo; 83 84 static struct fmodsw fsw = { 85 "rpcmod", 86 &rpcinfo, 87 D_NEW|D_MP, 88 }; 89 90 /* 91 * Module linkage information for the kernel. 92 */ 93 94 static struct modlstrmod modlstrmod = { 95 &mod_strmodops, "rpc interface str mod", &fsw 96 }; 97 98 /* 99 * For the RPC system call. 100 */ 101 static struct sysent rpcsysent = { 102 2, 103 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, 104 rpcsys 105 }; 106 107 static struct modlsys modlsys = { 108 &mod_syscallops, 109 "RPC syscall", 110 &rpcsysent 111 }; 112 113 #ifdef _SYSCALL32_IMPL 114 static struct modlsys modlsys32 = { 115 &mod_syscallops32, 116 "32-bit RPC syscall", 117 &rpcsysent 118 }; 119 #endif /* _SYSCALL32_IMPL */ 120 121 static struct modlinkage modlinkage = { 122 MODREV_1, 123 { 124 &modlsys, 125 #ifdef _SYSCALL32_IMPL 126 &modlsys32, 127 #endif 128 &modlstrmod, 129 NULL 130 } 131 }; 132 133 int 134 _init(void) 135 { 136 int error = 0; 137 callb_id_t cid; 138 int status; 139 140 svc_init(); 141 clnt_init(); 142 cid = callb_add(connmgr_cpr_reset, 0, CB_CL_CPR_RPC, "rpc"); 143 144 if (error = mod_install(&modlinkage)) { 145 /* 146 * Could not install module, cleanup previous 147 * initialization work. 148 */ 149 clnt_fini(); 150 if (cid != NULL) 151 (void) callb_delete(cid); 152 153 return (error); 154 } 155 156 /* 157 * Load up the RDMA plugins and initialize the stats. Even if the 158 * plugins loadup fails, but rpcmod was successfully installed the 159 * counters still get initialized. 160 */ 161 rw_init(&rdma_lock, NULL, RW_DEFAULT, NULL); 162 mutex_init(&rdma_modload_lock, NULL, MUTEX_DEFAULT, NULL); 163 164 cv_init(&rdma_wait.svc_cv, NULL, CV_DEFAULT, NULL); 165 mutex_init(&rdma_wait.svc_lock, NULL, MUTEX_DEFAULT, NULL); 166 167 mt_kstat_init(); 168 169 /* 170 * Get our identification into ldi. This is used for loading 171 * other modules, e.g. rpcib. 172 */ 173 status = ldi_ident_from_mod(&modlinkage, &rpcmod_li); 174 if (status != 0) { 175 cmn_err(CE_WARN, "ldi_ident_from_mod fails with %d", status); 176 rpcmod_li = NULL; 177 } 178 179 return (error); 180 } 181 182 /* 183 * The unload entry point fails, because we advertise entry points into 184 * rpcmod from the rest of kRPC: rpcmod_release(). 185 */ 186 int 187 _fini(void) 188 { 189 return (EBUSY); 190 } 191 192 int 193 _info(struct modinfo *modinfop) 194 { 195 return (mod_info(&modlinkage, modinfop)); 196 } 197 198 extern int nulldev(); 199 200 #define RPCMOD_ID 2049 201 202 int rmm_open(), rmm_close(); 203 204 /* 205 * To save instructions, since STREAMS ignores the return value 206 * from these functions, they are defined as void here. Kind of icky, but... 207 */ 208 void rmm_rput(queue_t *, mblk_t *); 209 void rmm_wput(queue_t *, mblk_t *); 210 void rmm_rsrv(queue_t *); 211 void rmm_wsrv(queue_t *); 212 213 int rpcmodopen(), rpcmodclose(); 214 void rpcmodrput(), rpcmodwput(); 215 void rpcmodrsrv(), rpcmodwsrv(); 216 217 static void rpcmodwput_other(queue_t *, mblk_t *); 218 static int mir_close(queue_t *q); 219 static int mir_open(queue_t *q, dev_t *devp, int flag, int sflag, 220 cred_t *credp); 221 static void mir_rput(queue_t *q, mblk_t *mp); 222 static void mir_rsrv(queue_t *q); 223 static void mir_wput(queue_t *q, mblk_t *mp); 224 static void mir_wsrv(queue_t *q); 225 226 static struct module_info rpcmod_info = 227 {RPCMOD_ID, "rpcmod", 0, INFPSZ, 256*1024, 1024}; 228 229 /* 230 * Read side has no service procedure. 231 */ 232 static struct qinit rpcmodrinit = { 233 (int (*)())rmm_rput, 234 (int (*)())rmm_rsrv, 235 rmm_open, 236 rmm_close, 237 nulldev, 238 &rpcmod_info, 239 NULL 240 }; 241 242 /* 243 * The write put procedure is simply putnext to conserve stack space. 244 * The write service procedure is not used to queue data, but instead to 245 * synchronize with flow control. 246 */ 247 static struct qinit rpcmodwinit = { 248 (int (*)())rmm_wput, 249 (int (*)())rmm_wsrv, 250 rmm_open, 251 rmm_close, 252 nulldev, 253 &rpcmod_info, 254 NULL 255 }; 256 struct streamtab rpcinfo = { &rpcmodrinit, &rpcmodwinit, NULL, NULL }; 257 258 struct xprt_style_ops { 259 int (*xo_open)(); 260 int (*xo_close)(); 261 void (*xo_wput)(); 262 void (*xo_wsrv)(); 263 void (*xo_rput)(); 264 void (*xo_rsrv)(); 265 }; 266 267 static struct xprt_style_ops xprt_clts_ops = { 268 rpcmodopen, 269 rpcmodclose, 270 rpcmodwput, 271 rpcmodwsrv, 272 rpcmodrput, 273 NULL 274 }; 275 276 static struct xprt_style_ops xprt_cots_ops = { 277 mir_open, 278 mir_close, 279 mir_wput, 280 mir_wsrv, 281 mir_rput, 282 mir_rsrv 283 }; 284 285 /* 286 * Per rpcmod "slot" data structure. q->q_ptr points to one of these. 287 */ 288 struct rpcm { 289 void *rm_krpc_cell; /* Reserved for use by KRPC */ 290 struct xprt_style_ops *rm_ops; 291 int rm_type; /* Client or server side stream */ 292 #define RM_CLOSING 0x1 /* somebody is trying to close slot */ 293 uint_t rm_state; /* state of the slot. see above */ 294 uint_t rm_ref; /* cnt of external references to slot */ 295 kmutex_t rm_lock; /* mutex protecting above fields */ 296 kcondvar_t rm_cwait; /* condition for closing */ 297 zoneid_t rm_zoneid; /* zone which pushed rpcmod */ 298 }; 299 300 struct temp_slot { 301 void *cell; 302 struct xprt_style_ops *ops; 303 int type; 304 mblk_t *info_ack; 305 kmutex_t lock; 306 kcondvar_t wait; 307 }; 308 309 typedef struct mir_s { 310 void *mir_krpc_cell; /* Reserved for KRPC use. This field */ 311 /* must be first in the structure. */ 312 struct xprt_style_ops *rm_ops; 313 int mir_type; /* Client or server side stream */ 314 315 mblk_t *mir_head_mp; /* RPC msg in progress */ 316 /* 317 * mir_head_mp points the first mblk being collected in 318 * the current RPC message. Record headers are removed 319 * before data is linked into mir_head_mp. 320 */ 321 mblk_t *mir_tail_mp; /* Last mblk in mir_head_mp */ 322 /* 323 * mir_tail_mp points to the last mblk in the message 324 * chain starting at mir_head_mp. It is only valid 325 * if mir_head_mp is non-NULL and is used to add new 326 * data blocks to the end of chain quickly. 327 */ 328 329 int32_t mir_frag_len; /* Bytes seen in the current frag */ 330 /* 331 * mir_frag_len starts at -4 for beginning of each fragment. 332 * When this length is negative, it indicates the number of 333 * bytes that rpcmod needs to complete the record marker 334 * header. When it is positive or zero, it holds the number 335 * of bytes that have arrived for the current fragment and 336 * are held in mir_header_mp. 337 */ 338 339 int32_t mir_frag_header; 340 /* 341 * Fragment header as collected for the current fragment. 342 * It holds the last-fragment indicator and the number 343 * of bytes in the fragment. 344 */ 345 346 unsigned int 347 mir_ordrel_pending : 1, /* Sent T_ORDREL_REQ */ 348 mir_hold_inbound : 1, /* Hold inbound messages on server */ 349 /* side until outbound flow control */ 350 /* is relieved. */ 351 mir_closing : 1, /* The stream is being closed */ 352 mir_inrservice : 1, /* data queued or rd srv proc running */ 353 mir_inwservice : 1, /* data queued or wr srv proc running */ 354 mir_inwflushdata : 1, /* flush M_DATAs when srv runs */ 355 /* 356 * On client streams, mir_clntreq is 0 or 1; it is set 357 * to 1 whenever a new request is sent out (mir_wput) 358 * and cleared when the timer fires (mir_timer). If 359 * the timer fires with this value equal to 0, then the 360 * stream is considered idle and KRPC is notified. 361 */ 362 mir_clntreq : 1, 363 /* 364 * On server streams, stop accepting messages 365 */ 366 mir_svc_no_more_msgs : 1, 367 mir_listen_stream : 1, /* listen end point */ 368 mir_unused : 1, /* no longer used */ 369 mir_timer_call : 1, 370 mir_junk_fill_thru_bit_31 : 21; 371 372 int mir_setup_complete; /* server has initialized everything */ 373 timeout_id_t mir_timer_id; /* Timer for idle checks */ 374 clock_t mir_idle_timeout; /* Allowed idle time before shutdown */ 375 /* 376 * This value is copied from clnt_idle_timeout or 377 * svc_idle_timeout during the appropriate ioctl. 378 * Kept in milliseconds 379 */ 380 clock_t mir_use_timestamp; /* updated on client with each use */ 381 /* 382 * This value is set to lbolt 383 * every time a client stream sends or receives data. 384 * Even if the timer message arrives, we don't shutdown 385 * client unless: 386 * lbolt >= MSEC_TO_TICK(mir_idle_timeout)+mir_use_timestamp. 387 * This value is kept in HZ. 388 */ 389 390 uint_t *mir_max_msg_sizep; /* Reference to sanity check size */ 391 /* 392 * This pointer is set to &clnt_max_msg_size or 393 * &svc_max_msg_size during the appropriate ioctl. 394 */ 395 zoneid_t mir_zoneid; /* zone which pushed rpcmod */ 396 /* Server-side fields. */ 397 int mir_ref_cnt; /* Reference count: server side only */ 398 /* counts the number of references */ 399 /* that a kernel RPC server thread */ 400 /* (see svc_run()) has on this rpcmod */ 401 /* slot. Effectively, it is the */ 402 /* number * of unprocessed messages */ 403 /* that have been passed up to the */ 404 /* KRPC layer */ 405 406 mblk_t *mir_svc_pend_mp; /* Pending T_ORDREL_IND or */ 407 /* T_DISCON_IND */ 408 409 /* 410 * these fields are for both client and server, but for debugging, 411 * it is easier to have these last in the structure. 412 */ 413 kmutex_t mir_mutex; /* Mutex and condvar for close */ 414 kcondvar_t mir_condvar; /* synchronization. */ 415 kcondvar_t mir_timer_cv; /* Timer routine sync. */ 416 } mir_t; 417 418 void tmp_rput(queue_t *q, mblk_t *mp); 419 420 struct xprt_style_ops tmpops = { 421 NULL, 422 NULL, 423 putnext, 424 NULL, 425 tmp_rput, 426 NULL 427 }; 428 429 void 430 tmp_rput(queue_t *q, mblk_t *mp) 431 { 432 struct temp_slot *t = (struct temp_slot *)(q->q_ptr); 433 struct T_info_ack *pptr; 434 435 switch (mp->b_datap->db_type) { 436 case M_PCPROTO: 437 pptr = (struct T_info_ack *)mp->b_rptr; 438 switch (pptr->PRIM_type) { 439 case T_INFO_ACK: 440 mutex_enter(&t->lock); 441 t->info_ack = mp; 442 cv_signal(&t->wait); 443 mutex_exit(&t->lock); 444 return; 445 default: 446 break; 447 } 448 default: 449 break; 450 } 451 452 /* 453 * Not an info-ack, so free it. This is ok because we should 454 * not be receiving data until the open finishes: rpcmod 455 * is pushed well before the end-point is bound to an address. 456 */ 457 freemsg(mp); 458 } 459 460 int 461 rmm_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp) 462 { 463 mblk_t *bp; 464 struct temp_slot ts, *t; 465 struct T_info_ack *pptr; 466 int error = 0; 467 468 ASSERT(q != NULL); 469 /* 470 * Check for re-opens. 471 */ 472 if (q->q_ptr) { 473 TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, 474 "rpcmodopen_end:(%s)", "q->qptr"); 475 return (0); 476 } 477 478 t = &ts; 479 bzero(t, sizeof (*t)); 480 q->q_ptr = (void *)t; 481 WR(q)->q_ptr = (void *)t; 482 483 /* 484 * Allocate the required messages upfront. 485 */ 486 if ((bp = allocb_cred(sizeof (struct T_info_req) + 487 sizeof (struct T_info_ack), crp, curproc->p_pid)) == NULL) { 488 return (ENOBUFS); 489 } 490 491 mutex_init(&t->lock, NULL, MUTEX_DEFAULT, NULL); 492 cv_init(&t->wait, NULL, CV_DEFAULT, NULL); 493 494 t->ops = &tmpops; 495 496 qprocson(q); 497 bp->b_datap->db_type = M_PCPROTO; 498 *(int32_t *)bp->b_wptr = (int32_t)T_INFO_REQ; 499 bp->b_wptr += sizeof (struct T_info_req); 500 putnext(WR(q), bp); 501 502 mutex_enter(&t->lock); 503 while (t->info_ack == NULL) { 504 if (cv_wait_sig(&t->wait, &t->lock) == 0) { 505 error = EINTR; 506 break; 507 } 508 } 509 mutex_exit(&t->lock); 510 511 if (error) 512 goto out; 513 514 pptr = (struct T_info_ack *)t->info_ack->b_rptr; 515 516 if (pptr->SERV_type == T_CLTS) { 517 if ((error = rpcmodopen(q, devp, flag, sflag, crp)) == 0) 518 ((struct rpcm *)q->q_ptr)->rm_ops = &xprt_clts_ops; 519 } else { 520 if ((error = mir_open(q, devp, flag, sflag, crp)) == 0) 521 ((mir_t *)q->q_ptr)->rm_ops = &xprt_cots_ops; 522 } 523 524 out: 525 if (error) 526 qprocsoff(q); 527 528 freemsg(t->info_ack); 529 mutex_destroy(&t->lock); 530 cv_destroy(&t->wait); 531 532 return (error); 533 } 534 535 void 536 rmm_rput(queue_t *q, mblk_t *mp) 537 { 538 (*((struct temp_slot *)q->q_ptr)->ops->xo_rput)(q, mp); 539 } 540 541 void 542 rmm_rsrv(queue_t *q) 543 { 544 (*((struct temp_slot *)q->q_ptr)->ops->xo_rsrv)(q); 545 } 546 547 void 548 rmm_wput(queue_t *q, mblk_t *mp) 549 { 550 (*((struct temp_slot *)q->q_ptr)->ops->xo_wput)(q, mp); 551 } 552 553 void 554 rmm_wsrv(queue_t *q) 555 { 556 (*((struct temp_slot *)q->q_ptr)->ops->xo_wsrv)(q); 557 } 558 559 int 560 rmm_close(queue_t *q, int flag, cred_t *crp) 561 { 562 return ((*((struct temp_slot *)q->q_ptr)->ops->xo_close)(q, flag, crp)); 563 } 564 565 static void rpcmod_release(queue_t *, mblk_t *); 566 /* 567 * rpcmodopen - open routine gets called when the module gets pushed 568 * onto the stream. 569 */ 570 /*ARGSUSED*/ 571 int 572 rpcmodopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp) 573 { 574 struct rpcm *rmp; 575 576 extern void (*rpc_rele)(queue_t *, mblk_t *); 577 578 TRACE_0(TR_FAC_KRPC, TR_RPCMODOPEN_START, "rpcmodopen_start:"); 579 580 /* 581 * Initialize entry points to release a rpcmod slot (and an input 582 * message if supplied) and to send an output message to the module 583 * below rpcmod. 584 */ 585 if (rpc_rele == NULL) 586 rpc_rele = rpcmod_release; 587 588 /* 589 * Only sufficiently privileged users can use this module, and it 590 * is assumed that they will use this module properly, and NOT send 591 * bulk data from downstream. 592 */ 593 if (secpolicy_rpcmod_open(crp) != 0) 594 return (EPERM); 595 596 /* 597 * Allocate slot data structure. 598 */ 599 rmp = kmem_zalloc(sizeof (*rmp), KM_SLEEP); 600 601 mutex_init(&rmp->rm_lock, NULL, MUTEX_DEFAULT, NULL); 602 cv_init(&rmp->rm_cwait, NULL, CV_DEFAULT, NULL); 603 rmp->rm_zoneid = rpc_zoneid(); 604 /* 605 * slot type will be set by kRPC client and server ioctl's 606 */ 607 rmp->rm_type = 0; 608 609 q->q_ptr = (void *)rmp; 610 WR(q)->q_ptr = (void *)rmp; 611 612 TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, "rpcmodopen_end:(%s)", "end"); 613 return (0); 614 } 615 616 /* 617 * rpcmodclose - This routine gets called when the module gets popped 618 * off of the stream. 619 */ 620 /*ARGSUSED*/ 621 int 622 rpcmodclose(queue_t *q, int flag, cred_t *crp) 623 { 624 struct rpcm *rmp; 625 626 ASSERT(q != NULL); 627 rmp = (struct rpcm *)q->q_ptr; 628 629 /* 630 * Mark our state as closing. 631 */ 632 mutex_enter(&rmp->rm_lock); 633 rmp->rm_state |= RM_CLOSING; 634 635 /* 636 * Check and see if there are any messages on the queue. If so, send 637 * the messages, regardless whether the downstream module is ready to 638 * accept data. 639 */ 640 if (rmp->rm_type == RPC_SERVER) { 641 flushq(q, FLUSHDATA); 642 643 qenable(WR(q)); 644 645 if (rmp->rm_ref) { 646 mutex_exit(&rmp->rm_lock); 647 /* 648 * call into SVC to clean the queue 649 */ 650 svc_queueclean(q); 651 mutex_enter(&rmp->rm_lock); 652 653 /* 654 * Block while there are kRPC threads with a reference 655 * to this message. 656 */ 657 while (rmp->rm_ref) 658 cv_wait(&rmp->rm_cwait, &rmp->rm_lock); 659 } 660 661 mutex_exit(&rmp->rm_lock); 662 663 /* 664 * It is now safe to remove this queue from the stream. No kRPC 665 * threads have a reference to the stream, and none ever will, 666 * because RM_CLOSING is set. 667 */ 668 qprocsoff(q); 669 670 /* Notify kRPC that this stream is going away. */ 671 svc_queueclose(q); 672 } else { 673 mutex_exit(&rmp->rm_lock); 674 qprocsoff(q); 675 } 676 677 q->q_ptr = NULL; 678 WR(q)->q_ptr = NULL; 679 mutex_destroy(&rmp->rm_lock); 680 cv_destroy(&rmp->rm_cwait); 681 kmem_free(rmp, sizeof (*rmp)); 682 return (0); 683 } 684 685 #ifdef DEBUG 686 int rpcmod_send_msg_up = 0; 687 int rpcmod_send_uderr = 0; 688 int rpcmod_send_dup = 0; 689 int rpcmod_send_dup_cnt = 0; 690 #endif 691 692 /* 693 * rpcmodrput - Module read put procedure. This is called from 694 * the module, driver, or stream head downstream. 695 */ 696 void 697 rpcmodrput(queue_t *q, mblk_t *mp) 698 { 699 struct rpcm *rmp; 700 union T_primitives *pptr; 701 int hdrsz; 702 703 TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_START, "rpcmodrput_start:"); 704 705 ASSERT(q != NULL); 706 rmp = (struct rpcm *)q->q_ptr; 707 708 if (rmp->rm_type == 0) { 709 freemsg(mp); 710 return; 711 } 712 713 #ifdef DEBUG 714 if (rpcmod_send_msg_up > 0) { 715 mblk_t *nmp = copymsg(mp); 716 if (nmp) { 717 putnext(q, nmp); 718 rpcmod_send_msg_up--; 719 } 720 } 721 if ((rpcmod_send_uderr > 0) && mp->b_datap->db_type == M_PROTO) { 722 mblk_t *nmp; 723 struct T_unitdata_ind *data; 724 struct T_uderror_ind *ud; 725 int d; 726 data = (struct T_unitdata_ind *)mp->b_rptr; 727 if (data->PRIM_type == T_UNITDATA_IND) { 728 d = sizeof (*ud) - sizeof (*data); 729 nmp = allocb(mp->b_wptr - mp->b_rptr + d, BPRI_HI); 730 if (nmp) { 731 ud = (struct T_uderror_ind *)nmp->b_rptr; 732 ud->PRIM_type = T_UDERROR_IND; 733 ud->DEST_length = data->SRC_length; 734 ud->DEST_offset = data->SRC_offset + d; 735 ud->OPT_length = data->OPT_length; 736 ud->OPT_offset = data->OPT_offset + d; 737 ud->ERROR_type = ENETDOWN; 738 if (data->SRC_length) { 739 bcopy(mp->b_rptr + 740 data->SRC_offset, 741 nmp->b_rptr + 742 ud->DEST_offset, 743 data->SRC_length); 744 } 745 if (data->OPT_length) { 746 bcopy(mp->b_rptr + 747 data->OPT_offset, 748 nmp->b_rptr + 749 ud->OPT_offset, 750 data->OPT_length); 751 } 752 nmp->b_wptr += d; 753 nmp->b_wptr += (mp->b_wptr - mp->b_rptr); 754 nmp->b_datap->db_type = M_PROTO; 755 putnext(q, nmp); 756 rpcmod_send_uderr--; 757 } 758 } 759 } 760 #endif 761 switch (mp->b_datap->db_type) { 762 default: 763 putnext(q, mp); 764 break; 765 766 case M_PROTO: 767 case M_PCPROTO: 768 ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (int32_t)); 769 pptr = (union T_primitives *)mp->b_rptr; 770 771 /* 772 * Forward this message to krpc if it is data. 773 */ 774 if (pptr->type == T_UNITDATA_IND) { 775 mblk_t *nmp; 776 777 /* 778 * Check if the module is being popped. 779 */ 780 mutex_enter(&rmp->rm_lock); 781 if (rmp->rm_state & RM_CLOSING) { 782 mutex_exit(&rmp->rm_lock); 783 putnext(q, mp); 784 break; 785 } 786 787 switch (rmp->rm_type) { 788 case RPC_CLIENT: 789 mutex_exit(&rmp->rm_lock); 790 hdrsz = mp->b_wptr - mp->b_rptr; 791 792 /* 793 * Make sure the header is sane. 794 */ 795 if (hdrsz < TUNITDATAINDSZ || 796 hdrsz < (pptr->unitdata_ind.OPT_length + 797 pptr->unitdata_ind.OPT_offset) || 798 hdrsz < (pptr->unitdata_ind.SRC_length + 799 pptr->unitdata_ind.SRC_offset)) { 800 freemsg(mp); 801 return; 802 } 803 804 /* 805 * Call clnt_clts_dispatch_notify, so that it 806 * can pass the message to the proper caller. 807 * Don't discard the header just yet since the 808 * client may need the sender's address. 809 */ 810 clnt_clts_dispatch_notify(mp, hdrsz, 811 rmp->rm_zoneid); 812 return; 813 case RPC_SERVER: 814 /* 815 * rm_krpc_cell is exclusively used by the kRPC 816 * CLTS server 817 */ 818 if (rmp->rm_krpc_cell) { 819 #ifdef DEBUG 820 /* 821 * Test duplicate request cache and 822 * rm_ref count handling by sending a 823 * duplicate every so often, if 824 * desired. 825 */ 826 if (rpcmod_send_dup && 827 rpcmod_send_dup_cnt++ % 828 rpcmod_send_dup) 829 nmp = copymsg(mp); 830 else 831 nmp = NULL; 832 #endif 833 /* 834 * Raise the reference count on this 835 * module to prevent it from being 836 * popped before krpc generates the 837 * reply. 838 */ 839 rmp->rm_ref++; 840 mutex_exit(&rmp->rm_lock); 841 842 /* 843 * Submit the message to krpc. 844 */ 845 svc_queuereq(q, mp); 846 #ifdef DEBUG 847 /* 848 * Send duplicate if we created one. 849 */ 850 if (nmp) { 851 mutex_enter(&rmp->rm_lock); 852 rmp->rm_ref++; 853 mutex_exit(&rmp->rm_lock); 854 svc_queuereq(q, nmp); 855 } 856 #endif 857 } else { 858 mutex_exit(&rmp->rm_lock); 859 freemsg(mp); 860 } 861 return; 862 default: 863 mutex_exit(&rmp->rm_lock); 864 freemsg(mp); 865 return; 866 } /* end switch(rmp->rm_type) */ 867 } else if (pptr->type == T_UDERROR_IND) { 868 mutex_enter(&rmp->rm_lock); 869 hdrsz = mp->b_wptr - mp->b_rptr; 870 871 /* 872 * Make sure the header is sane 873 */ 874 if (hdrsz < TUDERRORINDSZ || 875 hdrsz < (pptr->uderror_ind.OPT_length + 876 pptr->uderror_ind.OPT_offset) || 877 hdrsz < (pptr->uderror_ind.DEST_length + 878 pptr->uderror_ind.DEST_offset)) { 879 mutex_exit(&rmp->rm_lock); 880 freemsg(mp); 881 return; 882 } 883 884 /* 885 * In the case where a unit data error has been 886 * received, all we need to do is clear the message from 887 * the queue. 888 */ 889 mutex_exit(&rmp->rm_lock); 890 freemsg(mp); 891 RPCLOG(32, "rpcmodrput: unitdata error received at " 892 "%ld\n", gethrestime_sec()); 893 return; 894 } /* end else if (pptr->type == T_UDERROR_IND) */ 895 896 putnext(q, mp); 897 break; 898 } /* end switch (mp->b_datap->db_type) */ 899 900 TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_END, 901 "rpcmodrput_end:"); 902 /* 903 * Return codes are not looked at by the STREAMS framework. 904 */ 905 } 906 907 /* 908 * write put procedure 909 */ 910 void 911 rpcmodwput(queue_t *q, mblk_t *mp) 912 { 913 struct rpcm *rmp; 914 915 ASSERT(q != NULL); 916 917 switch (mp->b_datap->db_type) { 918 case M_PROTO: 919 case M_PCPROTO: 920 break; 921 default: 922 rpcmodwput_other(q, mp); 923 return; 924 } 925 926 /* 927 * Check to see if we can send the message downstream. 928 */ 929 if (canputnext(q)) { 930 putnext(q, mp); 931 return; 932 } 933 934 rmp = (struct rpcm *)q->q_ptr; 935 ASSERT(rmp != NULL); 936 937 /* 938 * The first canputnext failed. Try again except this time with the 939 * lock held, so that we can check the state of the stream to see if 940 * it is closing. If either of these conditions evaluate to true 941 * then send the meesage. 942 */ 943 mutex_enter(&rmp->rm_lock); 944 if (canputnext(q) || (rmp->rm_state & RM_CLOSING)) { 945 mutex_exit(&rmp->rm_lock); 946 putnext(q, mp); 947 } else { 948 /* 949 * canputnext failed again and the stream is not closing. 950 * Place the message on the queue and let the service 951 * procedure handle the message. 952 */ 953 mutex_exit(&rmp->rm_lock); 954 (void) putq(q, mp); 955 } 956 } 957 958 static void 959 rpcmodwput_other(queue_t *q, mblk_t *mp) 960 { 961 struct rpcm *rmp; 962 struct iocblk *iocp; 963 964 rmp = (struct rpcm *)q->q_ptr; 965 ASSERT(rmp != NULL); 966 967 switch (mp->b_datap->db_type) { 968 case M_IOCTL: 969 iocp = (struct iocblk *)mp->b_rptr; 970 ASSERT(iocp != NULL); 971 switch (iocp->ioc_cmd) { 972 case RPC_CLIENT: 973 case RPC_SERVER: 974 mutex_enter(&rmp->rm_lock); 975 rmp->rm_type = iocp->ioc_cmd; 976 mutex_exit(&rmp->rm_lock); 977 mp->b_datap->db_type = M_IOCACK; 978 qreply(q, mp); 979 return; 980 default: 981 /* 982 * pass the ioctl downstream and hope someone 983 * down there knows how to handle it. 984 */ 985 putnext(q, mp); 986 return; 987 } 988 default: 989 break; 990 } 991 /* 992 * This is something we definitely do not know how to handle, just 993 * pass the message downstream 994 */ 995 putnext(q, mp); 996 } 997 998 /* 999 * Module write service procedure. This is called by downstream modules 1000 * for back enabling during flow control. 1001 */ 1002 void 1003 rpcmodwsrv(queue_t *q) 1004 { 1005 struct rpcm *rmp; 1006 mblk_t *mp = NULL; 1007 1008 rmp = (struct rpcm *)q->q_ptr; 1009 ASSERT(rmp != NULL); 1010 1011 /* 1012 * Get messages that may be queued and send them down stream 1013 */ 1014 while ((mp = getq(q)) != NULL) { 1015 /* 1016 * Optimize the service procedure for the server-side, by 1017 * avoiding a call to canputnext(). 1018 */ 1019 if (rmp->rm_type == RPC_SERVER || canputnext(q)) { 1020 putnext(q, mp); 1021 continue; 1022 } 1023 (void) putbq(q, mp); 1024 return; 1025 } 1026 } 1027 1028 static void 1029 rpcmod_release(queue_t *q, mblk_t *bp) 1030 { 1031 struct rpcm *rmp; 1032 1033 /* 1034 * For now, just free the message. 1035 */ 1036 if (bp) 1037 freemsg(bp); 1038 rmp = (struct rpcm *)q->q_ptr; 1039 1040 mutex_enter(&rmp->rm_lock); 1041 rmp->rm_ref--; 1042 1043 if (rmp->rm_ref == 0 && (rmp->rm_state & RM_CLOSING)) { 1044 cv_broadcast(&rmp->rm_cwait); 1045 } 1046 1047 mutex_exit(&rmp->rm_lock); 1048 } 1049 1050 /* 1051 * This part of rpcmod is pushed on a connection-oriented transport for use 1052 * by RPC. It serves to bypass the Stream head, implements 1053 * the record marking protocol, and dispatches incoming RPC messages. 1054 */ 1055 1056 /* Default idle timer values */ 1057 #define MIR_CLNT_IDLE_TIMEOUT (5 * (60 * 1000L)) /* 5 minutes */ 1058 #define MIR_SVC_IDLE_TIMEOUT (6 * (60 * 1000L)) /* 6 minutes */ 1059 #define MIR_SVC_ORDREL_TIMEOUT (10 * (60 * 1000L)) /* 10 minutes */ 1060 #define MIR_LASTFRAG 0x80000000 /* Record marker */ 1061 1062 #define DLEN(mp) (mp->b_cont ? msgdsize(mp) : (mp->b_wptr - mp->b_rptr)) 1063 1064 #define MIR_SVC_QUIESCED(mir) \ 1065 (mir->mir_ref_cnt == 0 && mir->mir_inrservice == 0) 1066 1067 #define MIR_CLEAR_INRSRV(mir_ptr) { \ 1068 (mir_ptr)->mir_inrservice = 0; \ 1069 if ((mir_ptr)->mir_type == RPC_SERVER && \ 1070 (mir_ptr)->mir_closing) \ 1071 cv_signal(&(mir_ptr)->mir_condvar); \ 1072 } 1073 1074 /* 1075 * Don't block service procedure (and mir_close) if 1076 * we are in the process of closing. 1077 */ 1078 #define MIR_WCANPUTNEXT(mir_ptr, write_q) \ 1079 (canputnext(write_q) || ((mir_ptr)->mir_svc_no_more_msgs == 1)) 1080 1081 static int mir_clnt_dup_request(queue_t *q, mblk_t *mp); 1082 static void mir_rput_proto(queue_t *q, mblk_t *mp); 1083 static int mir_svc_policy_notify(queue_t *q, int event); 1084 static void mir_svc_release(queue_t *wq, mblk_t *mp); 1085 static void mir_svc_start(queue_t *wq); 1086 static void mir_svc_idle_start(queue_t *, mir_t *); 1087 static void mir_svc_idle_stop(queue_t *, mir_t *); 1088 static void mir_svc_start_close(queue_t *, mir_t *); 1089 static void mir_clnt_idle_do_stop(queue_t *); 1090 static void mir_clnt_idle_stop(queue_t *, mir_t *); 1091 static void mir_clnt_idle_start(queue_t *, mir_t *); 1092 static void mir_wput(queue_t *q, mblk_t *mp); 1093 static void mir_wput_other(queue_t *q, mblk_t *mp); 1094 static void mir_wsrv(queue_t *q); 1095 static void mir_disconnect(queue_t *, mir_t *ir); 1096 static int mir_check_len(queue_t *, int32_t, mblk_t *); 1097 static void mir_timer(void *); 1098 1099 extern void (*mir_rele)(queue_t *, mblk_t *); 1100 extern void (*mir_start)(queue_t *); 1101 extern void (*clnt_stop_idle)(queue_t *); 1102 1103 clock_t clnt_idle_timeout = MIR_CLNT_IDLE_TIMEOUT; 1104 clock_t svc_idle_timeout = MIR_SVC_IDLE_TIMEOUT; 1105 1106 /* 1107 * Timeout for subsequent notifications of idle connection. This is 1108 * typically used to clean up after a wedged orderly release. 1109 */ 1110 clock_t svc_ordrel_timeout = MIR_SVC_ORDREL_TIMEOUT; /* milliseconds */ 1111 1112 extern uint_t *clnt_max_msg_sizep; 1113 extern uint_t *svc_max_msg_sizep; 1114 uint_t clnt_max_msg_size = RPC_MAXDATASIZE; 1115 uint_t svc_max_msg_size = RPC_MAXDATASIZE; 1116 uint_t mir_krpc_cell_null; 1117 1118 static void 1119 mir_timer_stop(mir_t *mir) 1120 { 1121 timeout_id_t tid; 1122 1123 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1124 1125 /* 1126 * Since the mir_mutex lock needs to be released to call 1127 * untimeout(), we need to make sure that no other thread 1128 * can start/stop the timer (changing mir_timer_id) during 1129 * that time. The mir_timer_call bit and the mir_timer_cv 1130 * condition variable are used to synchronize this. Setting 1131 * mir_timer_call also tells mir_timer() (refer to the comments 1132 * in mir_timer()) that it does not need to do anything. 1133 */ 1134 while (mir->mir_timer_call) 1135 cv_wait(&mir->mir_timer_cv, &mir->mir_mutex); 1136 mir->mir_timer_call = B_TRUE; 1137 1138 if ((tid = mir->mir_timer_id) != 0) { 1139 mir->mir_timer_id = 0; 1140 mutex_exit(&mir->mir_mutex); 1141 (void) untimeout(tid); 1142 mutex_enter(&mir->mir_mutex); 1143 } 1144 mir->mir_timer_call = B_FALSE; 1145 cv_broadcast(&mir->mir_timer_cv); 1146 } 1147 1148 static void 1149 mir_timer_start(queue_t *q, mir_t *mir, clock_t intrvl) 1150 { 1151 timeout_id_t tid; 1152 1153 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1154 1155 while (mir->mir_timer_call) 1156 cv_wait(&mir->mir_timer_cv, &mir->mir_mutex); 1157 mir->mir_timer_call = B_TRUE; 1158 1159 if ((tid = mir->mir_timer_id) != 0) { 1160 mutex_exit(&mir->mir_mutex); 1161 (void) untimeout(tid); 1162 mutex_enter(&mir->mir_mutex); 1163 } 1164 /* Only start the timer when it is not closing. */ 1165 if (!mir->mir_closing) { 1166 mir->mir_timer_id = timeout(mir_timer, q, 1167 MSEC_TO_TICK(intrvl)); 1168 } 1169 mir->mir_timer_call = B_FALSE; 1170 cv_broadcast(&mir->mir_timer_cv); 1171 } 1172 1173 static int 1174 mir_clnt_dup_request(queue_t *q, mblk_t *mp) 1175 { 1176 mblk_t *mp1; 1177 uint32_t new_xid; 1178 uint32_t old_xid; 1179 1180 ASSERT(MUTEX_HELD(&((mir_t *)q->q_ptr)->mir_mutex)); 1181 new_xid = BE32_TO_U32(&mp->b_rptr[4]); 1182 /* 1183 * This loop is a bit tacky -- it walks the STREAMS list of 1184 * flow-controlled messages. 1185 */ 1186 if ((mp1 = q->q_first) != NULL) { 1187 do { 1188 old_xid = BE32_TO_U32(&mp1->b_rptr[4]); 1189 if (new_xid == old_xid) 1190 return (1); 1191 } while ((mp1 = mp1->b_next) != NULL); 1192 } 1193 return (0); 1194 } 1195 1196 static int 1197 mir_close(queue_t *q) 1198 { 1199 mir_t *mir = q->q_ptr; 1200 mblk_t *mp; 1201 bool_t queue_cleaned = FALSE; 1202 1203 RPCLOG(32, "rpcmod: mir_close of q 0x%p\n", (void *)q); 1204 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1205 mutex_enter(&mir->mir_mutex); 1206 if ((mp = mir->mir_head_mp) != NULL) { 1207 mir->mir_head_mp = NULL; 1208 mir->mir_tail_mp = NULL; 1209 freemsg(mp); 1210 } 1211 /* 1212 * Set mir_closing so we get notified when MIR_SVC_QUIESCED() 1213 * is TRUE. And mir_timer_start() won't start the timer again. 1214 */ 1215 mir->mir_closing = B_TRUE; 1216 mir_timer_stop(mir); 1217 1218 if (mir->mir_type == RPC_SERVER) { 1219 flushq(q, FLUSHDATA); /* Ditch anything waiting on read q */ 1220 1221 /* 1222 * This will prevent more requests from arriving and 1223 * will force rpcmod to ignore flow control. 1224 */ 1225 mir_svc_start_close(WR(q), mir); 1226 1227 while ((!MIR_SVC_QUIESCED(mir)) || mir->mir_inwservice == 1) { 1228 1229 if (mir->mir_ref_cnt && !mir->mir_inrservice && 1230 (queue_cleaned == FALSE)) { 1231 /* 1232 * call into SVC to clean the queue 1233 */ 1234 mutex_exit(&mir->mir_mutex); 1235 svc_queueclean(q); 1236 queue_cleaned = TRUE; 1237 mutex_enter(&mir->mir_mutex); 1238 continue; 1239 } 1240 1241 /* 1242 * Bugid 1253810 - Force the write service 1243 * procedure to send its messages, regardless 1244 * whether the downstream module is ready 1245 * to accept data. 1246 */ 1247 if (mir->mir_inwservice == 1) 1248 qenable(WR(q)); 1249 1250 cv_wait(&mir->mir_condvar, &mir->mir_mutex); 1251 } 1252 1253 mutex_exit(&mir->mir_mutex); 1254 qprocsoff(q); 1255 1256 /* Notify KRPC that this stream is going away. */ 1257 svc_queueclose(q); 1258 } else { 1259 mutex_exit(&mir->mir_mutex); 1260 qprocsoff(q); 1261 } 1262 1263 mutex_destroy(&mir->mir_mutex); 1264 cv_destroy(&mir->mir_condvar); 1265 cv_destroy(&mir->mir_timer_cv); 1266 kmem_free(mir, sizeof (mir_t)); 1267 return (0); 1268 } 1269 1270 /* 1271 * This is server side only (RPC_SERVER). 1272 * 1273 * Exit idle mode. 1274 */ 1275 static void 1276 mir_svc_idle_stop(queue_t *q, mir_t *mir) 1277 { 1278 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1279 ASSERT((q->q_flag & QREADR) == 0); 1280 ASSERT(mir->mir_type == RPC_SERVER); 1281 RPCLOG(16, "rpcmod: mir_svc_idle_stop of q 0x%p\n", (void *)q); 1282 1283 mir_timer_stop(mir); 1284 } 1285 1286 /* 1287 * This is server side only (RPC_SERVER). 1288 * 1289 * Start idle processing, which will include setting idle timer if the 1290 * stream is not being closed. 1291 */ 1292 static void 1293 mir_svc_idle_start(queue_t *q, mir_t *mir) 1294 { 1295 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1296 ASSERT((q->q_flag & QREADR) == 0); 1297 ASSERT(mir->mir_type == RPC_SERVER); 1298 RPCLOG(16, "rpcmod: mir_svc_idle_start q 0x%p\n", (void *)q); 1299 1300 /* 1301 * Don't re-start idle timer if we are closing queues. 1302 */ 1303 if (mir->mir_closing) { 1304 RPCLOG(16, "mir_svc_idle_start - closing: 0x%p\n", 1305 (void *)q); 1306 1307 /* 1308 * We will call mir_svc_idle_start() whenever MIR_SVC_QUIESCED() 1309 * is true. When it is true, and we are in the process of 1310 * closing the stream, signal any thread waiting in 1311 * mir_close(). 1312 */ 1313 if (mir->mir_inwservice == 0) 1314 cv_signal(&mir->mir_condvar); 1315 1316 } else { 1317 RPCLOG(16, "mir_svc_idle_start - reset %s timer\n", 1318 mir->mir_ordrel_pending ? "ordrel" : "normal"); 1319 /* 1320 * Normal condition, start the idle timer. If an orderly 1321 * release has been sent, set the timeout to wait for the 1322 * client to close its side of the connection. Otherwise, 1323 * use the normal idle timeout. 1324 */ 1325 mir_timer_start(q, mir, mir->mir_ordrel_pending ? 1326 svc_ordrel_timeout : mir->mir_idle_timeout); 1327 } 1328 } 1329 1330 /* ARGSUSED */ 1331 static int 1332 mir_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1333 { 1334 mir_t *mir; 1335 1336 RPCLOG(32, "rpcmod: mir_open of q 0x%p\n", (void *)q); 1337 /* Set variables used directly by KRPC. */ 1338 if (!mir_rele) 1339 mir_rele = mir_svc_release; 1340 if (!mir_start) 1341 mir_start = mir_svc_start; 1342 if (!clnt_stop_idle) 1343 clnt_stop_idle = mir_clnt_idle_do_stop; 1344 if (!clnt_max_msg_sizep) 1345 clnt_max_msg_sizep = &clnt_max_msg_size; 1346 if (!svc_max_msg_sizep) 1347 svc_max_msg_sizep = &svc_max_msg_size; 1348 1349 /* Allocate a zero'ed out mir structure for this stream. */ 1350 mir = kmem_zalloc(sizeof (mir_t), KM_SLEEP); 1351 1352 /* 1353 * We set hold inbound here so that incoming messages will 1354 * be held on the read-side queue until the stream is completely 1355 * initialized with a RPC_CLIENT or RPC_SERVER ioctl. During 1356 * the ioctl processing, the flag is cleared and any messages that 1357 * arrived between the open and the ioctl are delivered to KRPC. 1358 * 1359 * Early data should never arrive on a client stream since 1360 * servers only respond to our requests and we do not send any. 1361 * until after the stream is initialized. Early data is 1362 * very common on a server stream where the client will start 1363 * sending data as soon as the connection is made (and this 1364 * is especially true with TCP where the protocol accepts the 1365 * connection before nfsd or KRPC is notified about it). 1366 */ 1367 1368 mir->mir_hold_inbound = 1; 1369 1370 /* 1371 * Start the record marker looking for a 4-byte header. When 1372 * this length is negative, it indicates that rpcmod is looking 1373 * for bytes to consume for the record marker header. When it 1374 * is positive, it holds the number of bytes that have arrived 1375 * for the current fragment and are being held in mir_header_mp. 1376 */ 1377 1378 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 1379 1380 mir->mir_zoneid = rpc_zoneid(); 1381 mutex_init(&mir->mir_mutex, NULL, MUTEX_DEFAULT, NULL); 1382 cv_init(&mir->mir_condvar, NULL, CV_DRIVER, NULL); 1383 cv_init(&mir->mir_timer_cv, NULL, CV_DRIVER, NULL); 1384 1385 q->q_ptr = (char *)mir; 1386 WR(q)->q_ptr = (char *)mir; 1387 1388 /* 1389 * We noenable the read-side queue because we don't want it 1390 * automatically enabled by putq. We enable it explicitly 1391 * in mir_wsrv when appropriate. (See additional comments on 1392 * flow control at the beginning of mir_rsrv.) 1393 */ 1394 noenable(q); 1395 1396 qprocson(q); 1397 return (0); 1398 } 1399 1400 /* 1401 * Read-side put routine for both the client and server side. Does the 1402 * record marking for incoming RPC messages, and when complete, dispatches 1403 * the message to either the client or server. 1404 */ 1405 static void 1406 mir_rput(queue_t *q, mblk_t *mp) 1407 { 1408 int excess; 1409 int32_t frag_len, frag_header; 1410 mblk_t *cont_mp, *head_mp, *tail_mp, *mp1; 1411 mir_t *mir = q->q_ptr; 1412 boolean_t stop_timer = B_FALSE; 1413 1414 ASSERT(mir != NULL); 1415 1416 /* 1417 * If the stream has not been set up as a RPC_CLIENT or RPC_SERVER 1418 * with the corresponding ioctl, then don't accept 1419 * any inbound data. This should never happen for streams 1420 * created by nfsd or client-side KRPC because they are careful 1421 * to set the mode of the stream before doing anything else. 1422 */ 1423 if (mir->mir_type == 0) { 1424 freemsg(mp); 1425 return; 1426 } 1427 1428 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1429 1430 switch (mp->b_datap->db_type) { 1431 case M_DATA: 1432 break; 1433 case M_PROTO: 1434 case M_PCPROTO: 1435 if (MBLKL(mp) < sizeof (t_scalar_t)) { 1436 RPCLOG(1, "mir_rput: runt TPI message (%d bytes)\n", 1437 (int)MBLKL(mp)); 1438 freemsg(mp); 1439 return; 1440 } 1441 if (((union T_primitives *)mp->b_rptr)->type != T_DATA_IND) { 1442 mir_rput_proto(q, mp); 1443 return; 1444 } 1445 1446 /* Throw away the T_DATA_IND block and continue with data. */ 1447 mp1 = mp; 1448 mp = mp->b_cont; 1449 freeb(mp1); 1450 break; 1451 case M_SETOPTS: 1452 /* 1453 * If a module on the stream is trying set the Stream head's 1454 * high water mark, then set our hiwater to the requested 1455 * value. We are the "stream head" for all inbound 1456 * data messages since messages are passed directly to KRPC. 1457 */ 1458 if (MBLKL(mp) >= sizeof (struct stroptions)) { 1459 struct stroptions *stropts; 1460 1461 stropts = (struct stroptions *)mp->b_rptr; 1462 if ((stropts->so_flags & SO_HIWAT) && 1463 !(stropts->so_flags & SO_BAND)) { 1464 (void) strqset(q, QHIWAT, 0, stropts->so_hiwat); 1465 } 1466 } 1467 putnext(q, mp); 1468 return; 1469 case M_FLUSH: 1470 RPCLOG(32, "mir_rput: ignoring M_FLUSH %x ", *mp->b_rptr); 1471 RPCLOG(32, "on q 0x%p\n", (void *)q); 1472 putnext(q, mp); 1473 return; 1474 default: 1475 putnext(q, mp); 1476 return; 1477 } 1478 1479 mutex_enter(&mir->mir_mutex); 1480 1481 /* 1482 * If this connection is closing, don't accept any new messages. 1483 */ 1484 if (mir->mir_svc_no_more_msgs) { 1485 ASSERT(mir->mir_type == RPC_SERVER); 1486 mutex_exit(&mir->mir_mutex); 1487 freemsg(mp); 1488 return; 1489 } 1490 1491 /* Get local copies for quicker access. */ 1492 frag_len = mir->mir_frag_len; 1493 frag_header = mir->mir_frag_header; 1494 head_mp = mir->mir_head_mp; 1495 tail_mp = mir->mir_tail_mp; 1496 1497 /* Loop, processing each message block in the mp chain separately. */ 1498 do { 1499 cont_mp = mp->b_cont; 1500 mp->b_cont = NULL; 1501 1502 /* 1503 * Drop zero-length mblks to prevent unbounded kernel memory 1504 * consumption. 1505 */ 1506 if (MBLKL(mp) == 0) { 1507 freeb(mp); 1508 continue; 1509 } 1510 1511 /* 1512 * If frag_len is negative, we're still in the process of 1513 * building frag_header -- try to complete it with this mblk. 1514 */ 1515 while (frag_len < 0 && mp->b_rptr < mp->b_wptr) { 1516 frag_len++; 1517 frag_header <<= 8; 1518 frag_header += *mp->b_rptr++; 1519 } 1520 1521 if (MBLKL(mp) == 0 && frag_len < 0) { 1522 /* 1523 * We consumed this mblk while trying to complete the 1524 * fragment header. Free it and move on. 1525 */ 1526 freeb(mp); 1527 continue; 1528 } 1529 1530 ASSERT(frag_len >= 0); 1531 1532 /* 1533 * Now frag_header has the number of bytes in this fragment 1534 * and we're just waiting to collect them all. Chain our 1535 * latest mblk onto the list and see if we now have enough 1536 * bytes to complete the fragment. 1537 */ 1538 if (head_mp == NULL) { 1539 ASSERT(tail_mp == NULL); 1540 head_mp = tail_mp = mp; 1541 } else { 1542 tail_mp->b_cont = mp; 1543 tail_mp = mp; 1544 } 1545 1546 frag_len += MBLKL(mp); 1547 excess = frag_len - (frag_header & ~MIR_LASTFRAG); 1548 if (excess < 0) { 1549 /* 1550 * We still haven't received enough data to complete 1551 * the fragment, so continue on to the next mblk. 1552 */ 1553 continue; 1554 } 1555 1556 /* 1557 * We've got a complete fragment. If there are excess bytes, 1558 * then they're part of the next fragment's header (of either 1559 * this RPC message or the next RPC message). Split that part 1560 * into its own mblk so that we can safely freeb() it when 1561 * building frag_header above. 1562 */ 1563 if (excess > 0) { 1564 if ((mp1 = dupb(mp)) == NULL && 1565 (mp1 = copyb(mp)) == NULL) { 1566 freemsg(head_mp); 1567 freemsg(cont_mp); 1568 RPCLOG0(1, "mir_rput: dupb/copyb failed\n"); 1569 mir->mir_frag_header = 0; 1570 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 1571 mir->mir_head_mp = NULL; 1572 mir->mir_tail_mp = NULL; 1573 mir_disconnect(q, mir); /* drops mir_mutex */ 1574 return; 1575 } 1576 1577 /* 1578 * Relink the message chain so that the next mblk is 1579 * the next fragment header, followed by the rest of 1580 * the message chain. 1581 */ 1582 mp1->b_cont = cont_mp; 1583 cont_mp = mp1; 1584 1585 /* 1586 * Data in the new mblk begins at the next fragment, 1587 * and data in the old mblk ends at the next fragment. 1588 */ 1589 mp1->b_rptr = mp1->b_wptr - excess; 1590 mp->b_wptr -= excess; 1591 } 1592 1593 /* 1594 * Reset frag_len and frag_header for the next fragment. 1595 */ 1596 frag_len = -(int32_t)sizeof (uint32_t); 1597 if (!(frag_header & MIR_LASTFRAG)) { 1598 /* 1599 * The current fragment is complete, but more 1600 * fragments need to be processed before we can 1601 * pass along the RPC message headed at head_mp. 1602 */ 1603 frag_header = 0; 1604 continue; 1605 } 1606 frag_header = 0; 1607 1608 /* 1609 * We've got a complete RPC message; pass it to the 1610 * appropriate consumer. 1611 */ 1612 switch (mir->mir_type) { 1613 case RPC_CLIENT: 1614 if (clnt_dispatch_notify(head_mp, mir->mir_zoneid)) { 1615 /* 1616 * Mark this stream as active. This marker 1617 * is used in mir_timer(). 1618 */ 1619 mir->mir_clntreq = 1; 1620 mir->mir_use_timestamp = ddi_get_lbolt(); 1621 } else { 1622 freemsg(head_mp); 1623 } 1624 break; 1625 1626 case RPC_SERVER: 1627 /* 1628 * Check for flow control before passing the 1629 * message to KRPC. 1630 */ 1631 if (!mir->mir_hold_inbound) { 1632 if (mir->mir_krpc_cell) { 1633 /* 1634 * If the reference count is 0 1635 * (not including this request), 1636 * then the stream is transitioning 1637 * from idle to non-idle. In this case, 1638 * we cancel the idle timer. 1639 */ 1640 if (mir->mir_ref_cnt++ == 0) 1641 stop_timer = B_TRUE; 1642 if (mir_check_len(q, 1643 (int32_t)msgdsize(mp), mp)) 1644 return; 1645 svc_queuereq(q, head_mp); /* to KRPC */ 1646 } else { 1647 /* 1648 * Count # of times this happens. Should 1649 * be never, but experience shows 1650 * otherwise. 1651 */ 1652 mir_krpc_cell_null++; 1653 freemsg(head_mp); 1654 } 1655 } else { 1656 /* 1657 * If the outbound side of the stream is 1658 * flow controlled, then hold this message 1659 * until client catches up. mir_hold_inbound 1660 * is set in mir_wput and cleared in mir_wsrv. 1661 */ 1662 (void) putq(q, head_mp); 1663 mir->mir_inrservice = B_TRUE; 1664 } 1665 break; 1666 default: 1667 RPCLOG(1, "mir_rput: unknown mir_type %d\n", 1668 mir->mir_type); 1669 freemsg(head_mp); 1670 break; 1671 } 1672 1673 /* 1674 * Reset the chain since we're starting on a new RPC message. 1675 */ 1676 head_mp = tail_mp = NULL; 1677 } while ((mp = cont_mp) != NULL); 1678 1679 /* 1680 * Sanity check the message length; if it's too large mir_check_len() 1681 * will shutdown the connection, drop mir_mutex, and return non-zero. 1682 */ 1683 if (head_mp != NULL && mir->mir_setup_complete && 1684 mir_check_len(q, frag_len, head_mp)) 1685 return; 1686 1687 /* Save our local copies back in the mir structure. */ 1688 mir->mir_frag_header = frag_header; 1689 mir->mir_frag_len = frag_len; 1690 mir->mir_head_mp = head_mp; 1691 mir->mir_tail_mp = tail_mp; 1692 1693 /* 1694 * The timer is stopped after the whole message chain is processed. 1695 * The reason is that stopping the timer releases the mir_mutex 1696 * lock temporarily. This means that the request can be serviced 1697 * while we are still processing the message chain. This is not 1698 * good. So we stop the timer here instead. 1699 * 1700 * Note that if the timer fires before we stop it, it will not 1701 * do any harm as MIR_SVC_QUIESCED() is false and mir_timer() 1702 * will just return. 1703 */ 1704 if (stop_timer) { 1705 RPCLOG(16, "mir_rput: stopping idle timer on 0x%p because " 1706 "ref cnt going to non zero\n", (void *)WR(q)); 1707 mir_svc_idle_stop(WR(q), mir); 1708 } 1709 mutex_exit(&mir->mir_mutex); 1710 } 1711 1712 static void 1713 mir_rput_proto(queue_t *q, mblk_t *mp) 1714 { 1715 mir_t *mir = (mir_t *)q->q_ptr; 1716 uint32_t type; 1717 uint32_t reason = 0; 1718 1719 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1720 1721 type = ((union T_primitives *)mp->b_rptr)->type; 1722 switch (mir->mir_type) { 1723 case RPC_CLIENT: 1724 switch (type) { 1725 case T_DISCON_IND: 1726 reason = ((struct T_discon_ind *) 1727 (mp->b_rptr))->DISCON_reason; 1728 /*FALLTHROUGH*/ 1729 case T_ORDREL_IND: 1730 mutex_enter(&mir->mir_mutex); 1731 if (mir->mir_head_mp) { 1732 freemsg(mir->mir_head_mp); 1733 mir->mir_head_mp = (mblk_t *)0; 1734 mir->mir_tail_mp = (mblk_t *)0; 1735 } 1736 /* 1737 * We are disconnecting, but not necessarily 1738 * closing. By not closing, we will fail to 1739 * pick up a possibly changed global timeout value, 1740 * unless we store it now. 1741 */ 1742 mir->mir_idle_timeout = clnt_idle_timeout; 1743 mir_clnt_idle_stop(WR(q), mir); 1744 1745 /* 1746 * Even though we are unconnected, we still 1747 * leave the idle timer going on the client. The 1748 * reason for is that if we've disconnected due 1749 * to a server-side disconnect, reset, or connection 1750 * timeout, there is a possibility the client may 1751 * retry the RPC request. This retry needs to done on 1752 * the same bound address for the server to interpret 1753 * it as such. However, we don't want 1754 * to wait forever for that possibility. If the 1755 * end-point stays unconnected for mir_idle_timeout 1756 * units of time, then that is a signal to the 1757 * connection manager to give up waiting for the 1758 * application (eg. NFS) to send a retry. 1759 */ 1760 mir_clnt_idle_start(WR(q), mir); 1761 mutex_exit(&mir->mir_mutex); 1762 clnt_dispatch_notifyall(WR(q), type, reason); 1763 freemsg(mp); 1764 return; 1765 case T_ERROR_ACK: 1766 { 1767 struct T_error_ack *terror; 1768 1769 terror = (struct T_error_ack *)mp->b_rptr; 1770 RPCLOG(1, "mir_rput_proto T_ERROR_ACK for queue 0x%p", 1771 (void *)q); 1772 RPCLOG(1, " ERROR_prim: %s,", 1773 rpc_tpiprim2name(terror->ERROR_prim)); 1774 RPCLOG(1, " TLI_error: %s,", 1775 rpc_tpierr2name(terror->TLI_error)); 1776 RPCLOG(1, " UNIX_error: %d\n", terror->UNIX_error); 1777 if (terror->ERROR_prim == T_DISCON_REQ) { 1778 clnt_dispatch_notifyall(WR(q), type, reason); 1779 freemsg(mp); 1780 return; 1781 } else { 1782 if (clnt_dispatch_notifyconn(WR(q), mp)) 1783 return; 1784 } 1785 break; 1786 } 1787 case T_OK_ACK: 1788 { 1789 struct T_ok_ack *tok = (struct T_ok_ack *)mp->b_rptr; 1790 1791 if (tok->CORRECT_prim == T_DISCON_REQ) { 1792 clnt_dispatch_notifyall(WR(q), type, reason); 1793 freemsg(mp); 1794 return; 1795 } else { 1796 if (clnt_dispatch_notifyconn(WR(q), mp)) 1797 return; 1798 } 1799 break; 1800 } 1801 case T_CONN_CON: 1802 case T_INFO_ACK: 1803 case T_OPTMGMT_ACK: 1804 if (clnt_dispatch_notifyconn(WR(q), mp)) 1805 return; 1806 break; 1807 case T_BIND_ACK: 1808 break; 1809 default: 1810 RPCLOG(1, "mir_rput: unexpected message %d " 1811 "for KRPC client\n", 1812 ((union T_primitives *)mp->b_rptr)->type); 1813 break; 1814 } 1815 break; 1816 1817 case RPC_SERVER: 1818 switch (type) { 1819 case T_BIND_ACK: 1820 { 1821 struct T_bind_ack *tbind; 1822 1823 /* 1824 * If this is a listening stream, then shut 1825 * off the idle timer. 1826 */ 1827 tbind = (struct T_bind_ack *)mp->b_rptr; 1828 if (tbind->CONIND_number > 0) { 1829 mutex_enter(&mir->mir_mutex); 1830 mir_svc_idle_stop(WR(q), mir); 1831 1832 /* 1833 * mark this as a listen endpoint 1834 * for special handling. 1835 */ 1836 1837 mir->mir_listen_stream = 1; 1838 mutex_exit(&mir->mir_mutex); 1839 } 1840 break; 1841 } 1842 case T_DISCON_IND: 1843 case T_ORDREL_IND: 1844 RPCLOG(16, "mir_rput_proto: got %s indication\n", 1845 type == T_DISCON_IND ? "disconnect" 1846 : "orderly release"); 1847 1848 /* 1849 * For listen endpoint just pass 1850 * on the message. 1851 */ 1852 1853 if (mir->mir_listen_stream) 1854 break; 1855 1856 mutex_enter(&mir->mir_mutex); 1857 1858 /* 1859 * If client wants to break off connection, record 1860 * that fact. 1861 */ 1862 mir_svc_start_close(WR(q), mir); 1863 1864 /* 1865 * If we are idle, then send the orderly release 1866 * or disconnect indication to nfsd. 1867 */ 1868 if (MIR_SVC_QUIESCED(mir)) { 1869 mutex_exit(&mir->mir_mutex); 1870 break; 1871 } 1872 1873 RPCLOG(16, "mir_rput_proto: not idle, so " 1874 "disconnect/ord rel indication not passed " 1875 "upstream on 0x%p\n", (void *)q); 1876 1877 /* 1878 * Hold the indication until we get idle 1879 * If there already is an indication stored, 1880 * replace it if the new one is a disconnect. The 1881 * reasoning is that disconnection takes less time 1882 * to process, and once a client decides to 1883 * disconnect, we should do that. 1884 */ 1885 if (mir->mir_svc_pend_mp) { 1886 if (type == T_DISCON_IND) { 1887 RPCLOG(16, "mir_rput_proto: replacing" 1888 " held disconnect/ord rel" 1889 " indication with disconnect on" 1890 " 0x%p\n", (void *)q); 1891 1892 freemsg(mir->mir_svc_pend_mp); 1893 mir->mir_svc_pend_mp = mp; 1894 } else { 1895 RPCLOG(16, "mir_rput_proto: already " 1896 "held a disconnect/ord rel " 1897 "indication. freeing ord rel " 1898 "ind on 0x%p\n", (void *)q); 1899 freemsg(mp); 1900 } 1901 } else 1902 mir->mir_svc_pend_mp = mp; 1903 1904 mutex_exit(&mir->mir_mutex); 1905 return; 1906 1907 default: 1908 /* nfsd handles server-side non-data messages. */ 1909 break; 1910 } 1911 break; 1912 1913 default: 1914 break; 1915 } 1916 1917 putnext(q, mp); 1918 } 1919 1920 /* 1921 * The server-side read queues are used to hold inbound messages while 1922 * outbound flow control is exerted. When outbound flow control is 1923 * relieved, mir_wsrv qenables the read-side queue. Read-side queues 1924 * are not enabled by STREAMS and are explicitly noenable'ed in mir_open. 1925 * 1926 * For the server side, we have two types of messages queued. The first type 1927 * are messages that are ready to be XDR decoded and and then sent to the 1928 * RPC program's dispatch routine. The second type are "raw" messages that 1929 * haven't been processed, i.e. assembled from rpc record fragements into 1930 * full requests. The only time we will see the second type of message 1931 * queued is if we have a memory allocation failure while processing a 1932 * a raw message. The field mir_first_non_processed_mblk will mark the 1933 * first such raw message. So the flow for server side is: 1934 * 1935 * - send processed queued messages to kRPC until we run out or find 1936 * one that needs additional processing because we were short on memory 1937 * earlier 1938 * - process a message that was deferred because of lack of 1939 * memory 1940 * - continue processing messages until the queue empties or we 1941 * have to stop because of lack of memory 1942 * - during each of the above phase, if the queue is empty and 1943 * there are no pending messages that were passed to the RPC 1944 * layer, send upstream the pending disconnect/ordrel indication if 1945 * there is one 1946 * 1947 * The read-side queue is also enabled by a bufcall callback if dupmsg 1948 * fails in mir_rput. 1949 */ 1950 static void 1951 mir_rsrv(queue_t *q) 1952 { 1953 mir_t *mir; 1954 mblk_t *mp; 1955 mblk_t *cmp = NULL; 1956 boolean_t stop_timer = B_FALSE; 1957 1958 mir = (mir_t *)q->q_ptr; 1959 mutex_enter(&mir->mir_mutex); 1960 1961 mp = NULL; 1962 switch (mir->mir_type) { 1963 case RPC_SERVER: 1964 if (mir->mir_ref_cnt == 0) 1965 mir->mir_hold_inbound = 0; 1966 if (mir->mir_hold_inbound) { 1967 1968 ASSERT(cmp == NULL); 1969 if (q->q_first == NULL) { 1970 1971 MIR_CLEAR_INRSRV(mir); 1972 1973 if (MIR_SVC_QUIESCED(mir)) { 1974 cmp = mir->mir_svc_pend_mp; 1975 mir->mir_svc_pend_mp = NULL; 1976 } 1977 } 1978 1979 mutex_exit(&mir->mir_mutex); 1980 1981 if (cmp != NULL) { 1982 RPCLOG(16, "mir_rsrv: line %d: sending a held " 1983 "disconnect/ord rel indication upstream\n", 1984 __LINE__); 1985 putnext(q, cmp); 1986 } 1987 1988 return; 1989 } 1990 while (mp = getq(q)) { 1991 if (mir->mir_krpc_cell && 1992 (mir->mir_svc_no_more_msgs == 0)) { 1993 /* 1994 * If we were idle, turn off idle timer since 1995 * we aren't idle any more. 1996 */ 1997 if (mir->mir_ref_cnt++ == 0) 1998 stop_timer = B_TRUE; 1999 if (mir_check_len(q, 2000 (int32_t)msgdsize(mp), mp)) 2001 return; 2002 svc_queuereq(q, mp); 2003 } else { 2004 /* 2005 * Count # of times this happens. Should be 2006 * never, but experience shows otherwise. 2007 */ 2008 if (mir->mir_krpc_cell == NULL) 2009 mir_krpc_cell_null++; 2010 freemsg(mp); 2011 } 2012 } 2013 break; 2014 case RPC_CLIENT: 2015 break; 2016 default: 2017 RPCLOG(1, "mir_rsrv: unexpected mir_type %d\n", mir->mir_type); 2018 2019 if (q->q_first == NULL) 2020 MIR_CLEAR_INRSRV(mir); 2021 2022 mutex_exit(&mir->mir_mutex); 2023 2024 return; 2025 } 2026 2027 /* 2028 * The timer is stopped after all the messages are processed. 2029 * The reason is that stopping the timer releases the mir_mutex 2030 * lock temporarily. This means that the request can be serviced 2031 * while we are still processing the message queue. This is not 2032 * good. So we stop the timer here instead. 2033 */ 2034 if (stop_timer) { 2035 RPCLOG(16, "mir_rsrv stopping idle timer on 0x%p because ref " 2036 "cnt going to non zero\n", (void *)WR(q)); 2037 mir_svc_idle_stop(WR(q), mir); 2038 } 2039 2040 if (q->q_first == NULL) { 2041 2042 MIR_CLEAR_INRSRV(mir); 2043 2044 ASSERT(cmp == NULL); 2045 if (mir->mir_type == RPC_SERVER && MIR_SVC_QUIESCED(mir)) { 2046 cmp = mir->mir_svc_pend_mp; 2047 mir->mir_svc_pend_mp = NULL; 2048 } 2049 2050 mutex_exit(&mir->mir_mutex); 2051 2052 if (cmp != NULL) { 2053 RPCLOG(16, "mir_rsrv: line %d: sending a held " 2054 "disconnect/ord rel indication upstream\n", 2055 __LINE__); 2056 putnext(q, cmp); 2057 } 2058 2059 return; 2060 } 2061 mutex_exit(&mir->mir_mutex); 2062 } 2063 2064 static int mir_svc_policy_fails; 2065 2066 /* 2067 * Called to send an event code to nfsd/lockd so that it initiates 2068 * connection close. 2069 */ 2070 static int 2071 mir_svc_policy_notify(queue_t *q, int event) 2072 { 2073 mblk_t *mp; 2074 #ifdef DEBUG 2075 mir_t *mir = (mir_t *)q->q_ptr; 2076 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2077 #endif 2078 ASSERT(q->q_flag & QREADR); 2079 2080 /* 2081 * Create an M_DATA message with the event code and pass it to the 2082 * Stream head (nfsd or whoever created the stream will consume it). 2083 */ 2084 mp = allocb(sizeof (int), BPRI_HI); 2085 2086 if (!mp) { 2087 2088 mir_svc_policy_fails++; 2089 RPCLOG(16, "mir_svc_policy_notify: could not allocate event " 2090 "%d\n", event); 2091 return (ENOMEM); 2092 } 2093 2094 U32_TO_BE32(event, mp->b_rptr); 2095 mp->b_wptr = mp->b_rptr + sizeof (int); 2096 putnext(q, mp); 2097 return (0); 2098 } 2099 2100 /* 2101 * Server side: start the close phase. We want to get this rpcmod slot in an 2102 * idle state before mir_close() is called. 2103 */ 2104 static void 2105 mir_svc_start_close(queue_t *wq, mir_t *mir) 2106 { 2107 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2108 ASSERT((wq->q_flag & QREADR) == 0); 2109 ASSERT(mir->mir_type == RPC_SERVER); 2110 2111 2112 /* 2113 * Do not accept any more messages. 2114 */ 2115 mir->mir_svc_no_more_msgs = 1; 2116 2117 /* 2118 * Next two statements will make the read service procedure invoke 2119 * svc_queuereq() on everything stuck in the streams read queue. 2120 * It's not necessary because enabling the write queue will 2121 * have the same effect, but why not speed the process along? 2122 */ 2123 mir->mir_hold_inbound = 0; 2124 qenable(RD(wq)); 2125 2126 /* 2127 * Meanwhile force the write service procedure to send the 2128 * responses downstream, regardless of flow control. 2129 */ 2130 qenable(wq); 2131 } 2132 2133 /* 2134 * This routine is called directly by KRPC after a request is completed, 2135 * whether a reply was sent or the request was dropped. 2136 */ 2137 static void 2138 mir_svc_release(queue_t *wq, mblk_t *mp) 2139 { 2140 mir_t *mir = (mir_t *)wq->q_ptr; 2141 mblk_t *cmp = NULL; 2142 2143 ASSERT((wq->q_flag & QREADR) == 0); 2144 if (mp) 2145 freemsg(mp); 2146 2147 mutex_enter(&mir->mir_mutex); 2148 2149 /* 2150 * Start idle processing if this is the last reference. 2151 */ 2152 if ((mir->mir_ref_cnt == 1) && (mir->mir_inrservice == 0)) { 2153 cmp = mir->mir_svc_pend_mp; 2154 mir->mir_svc_pend_mp = NULL; 2155 } 2156 2157 if (cmp) { 2158 RPCLOG(16, "mir_svc_release: sending a held " 2159 "disconnect/ord rel indication upstream on queue 0x%p\n", 2160 (void *)RD(wq)); 2161 2162 mutex_exit(&mir->mir_mutex); 2163 2164 putnext(RD(wq), cmp); 2165 2166 mutex_enter(&mir->mir_mutex); 2167 } 2168 2169 /* 2170 * Start idle processing if this is the last reference. 2171 */ 2172 if (mir->mir_ref_cnt == 1 && mir->mir_inrservice == 0) { 2173 2174 RPCLOG(16, "mir_svc_release starting idle timer on 0x%p " 2175 "because ref cnt is zero\n", (void *) wq); 2176 2177 mir_svc_idle_start(wq, mir); 2178 } 2179 2180 mir->mir_ref_cnt--; 2181 ASSERT(mir->mir_ref_cnt >= 0); 2182 2183 /* 2184 * Wake up the thread waiting to close. 2185 */ 2186 2187 if ((mir->mir_ref_cnt == 0) && mir->mir_closing) 2188 cv_signal(&mir->mir_condvar); 2189 2190 mutex_exit(&mir->mir_mutex); 2191 } 2192 2193 /* 2194 * This routine is called by server-side KRPC when it is ready to 2195 * handle inbound messages on the stream. 2196 */ 2197 static void 2198 mir_svc_start(queue_t *wq) 2199 { 2200 mir_t *mir = (mir_t *)wq->q_ptr; 2201 2202 /* 2203 * no longer need to take the mir_mutex because the 2204 * mir_setup_complete field has been moved out of 2205 * the binary field protected by the mir_mutex. 2206 */ 2207 2208 mir->mir_setup_complete = 1; 2209 qenable(RD(wq)); 2210 } 2211 2212 /* 2213 * client side wrapper for stopping timer with normal idle timeout. 2214 */ 2215 static void 2216 mir_clnt_idle_stop(queue_t *wq, mir_t *mir) 2217 { 2218 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2219 ASSERT((wq->q_flag & QREADR) == 0); 2220 ASSERT(mir->mir_type == RPC_CLIENT); 2221 2222 mir_timer_stop(mir); 2223 } 2224 2225 /* 2226 * client side wrapper for stopping timer with normal idle timeout. 2227 */ 2228 static void 2229 mir_clnt_idle_start(queue_t *wq, mir_t *mir) 2230 { 2231 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2232 ASSERT((wq->q_flag & QREADR) == 0); 2233 ASSERT(mir->mir_type == RPC_CLIENT); 2234 2235 mir_timer_start(wq, mir, mir->mir_idle_timeout); 2236 } 2237 2238 /* 2239 * client side only. Forces rpcmod to stop sending T_ORDREL_REQs on 2240 * end-points that aren't connected. 2241 */ 2242 static void 2243 mir_clnt_idle_do_stop(queue_t *wq) 2244 { 2245 mir_t *mir = (mir_t *)wq->q_ptr; 2246 2247 RPCLOG(1, "mir_clnt_idle_do_stop: wq 0x%p\n", (void *)wq); 2248 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2249 mutex_enter(&mir->mir_mutex); 2250 mir_clnt_idle_stop(wq, mir); 2251 mutex_exit(&mir->mir_mutex); 2252 } 2253 2254 /* 2255 * Timer handler. It handles idle timeout and memory shortage problem. 2256 */ 2257 static void 2258 mir_timer(void *arg) 2259 { 2260 queue_t *wq = (queue_t *)arg; 2261 mir_t *mir = (mir_t *)wq->q_ptr; 2262 boolean_t notify; 2263 clock_t now; 2264 2265 mutex_enter(&mir->mir_mutex); 2266 2267 /* 2268 * mir_timer_call is set only when either mir_timer_[start|stop] 2269 * is progressing. And mir_timer() can only be run while they 2270 * are progressing if the timer is being stopped. So just 2271 * return. 2272 */ 2273 if (mir->mir_timer_call) { 2274 mutex_exit(&mir->mir_mutex); 2275 return; 2276 } 2277 mir->mir_timer_id = 0; 2278 2279 switch (mir->mir_type) { 2280 case RPC_CLIENT: 2281 2282 /* 2283 * For clients, the timer fires at clnt_idle_timeout 2284 * intervals. If the activity marker (mir_clntreq) is 2285 * zero, then the stream has been idle since the last 2286 * timer event and we notify KRPC. If mir_clntreq is 2287 * non-zero, then the stream is active and we just 2288 * restart the timer for another interval. mir_clntreq 2289 * is set to 1 in mir_wput for every request passed 2290 * downstream. 2291 * 2292 * If this was a memory shortage timer reset the idle 2293 * timeout regardless; the mir_clntreq will not be a 2294 * valid indicator. 2295 * 2296 * The timer is initially started in mir_wput during 2297 * RPC_CLIENT ioctl processing. 2298 * 2299 * The timer interval can be changed for individual 2300 * streams with the ND variable "mir_idle_timeout". 2301 */ 2302 now = ddi_get_lbolt(); 2303 if (mir->mir_clntreq > 0 && mir->mir_use_timestamp + 2304 MSEC_TO_TICK(mir->mir_idle_timeout) - now >= 0) { 2305 clock_t tout; 2306 2307 tout = mir->mir_idle_timeout - 2308 TICK_TO_MSEC(now - mir->mir_use_timestamp); 2309 if (tout < 0) 2310 tout = 1000; 2311 #if 0 2312 printf("mir_timer[%d < %d + %d]: reset client timer " 2313 "to %d (ms)\n", TICK_TO_MSEC(now), 2314 TICK_TO_MSEC(mir->mir_use_timestamp), 2315 mir->mir_idle_timeout, tout); 2316 #endif 2317 mir->mir_clntreq = 0; 2318 mir_timer_start(wq, mir, tout); 2319 mutex_exit(&mir->mir_mutex); 2320 return; 2321 } 2322 #if 0 2323 printf("mir_timer[%d]: doing client timeout\n", now / hz); 2324 #endif 2325 /* 2326 * We are disconnecting, but not necessarily 2327 * closing. By not closing, we will fail to 2328 * pick up a possibly changed global timeout value, 2329 * unless we store it now. 2330 */ 2331 mir->mir_idle_timeout = clnt_idle_timeout; 2332 mir_clnt_idle_start(wq, mir); 2333 2334 mutex_exit(&mir->mir_mutex); 2335 /* 2336 * We pass T_ORDREL_REQ as an integer value 2337 * to KRPC as the indication that the stream 2338 * is idle. This is not a T_ORDREL_REQ message, 2339 * it is just a convenient value since we call 2340 * the same KRPC routine for T_ORDREL_INDs and 2341 * T_DISCON_INDs. 2342 */ 2343 clnt_dispatch_notifyall(wq, T_ORDREL_REQ, 0); 2344 return; 2345 2346 case RPC_SERVER: 2347 2348 /* 2349 * For servers, the timer is only running when the stream 2350 * is really idle or memory is short. The timer is started 2351 * by mir_wput when mir_type is set to RPC_SERVER and 2352 * by mir_svc_idle_start whenever the stream goes idle 2353 * (mir_ref_cnt == 0). The timer is cancelled in 2354 * mir_rput whenever a new inbound request is passed to KRPC 2355 * and the stream was previously idle. 2356 * 2357 * The timer interval can be changed for individual 2358 * streams with the ND variable "mir_idle_timeout". 2359 * 2360 * If the stream is not idle do nothing. 2361 */ 2362 if (!MIR_SVC_QUIESCED(mir)) { 2363 mutex_exit(&mir->mir_mutex); 2364 return; 2365 } 2366 2367 notify = !mir->mir_inrservice; 2368 mutex_exit(&mir->mir_mutex); 2369 2370 /* 2371 * If there is no packet queued up in read queue, the stream 2372 * is really idle so notify nfsd to close it. 2373 */ 2374 if (notify) { 2375 RPCLOG(16, "mir_timer: telling stream head listener " 2376 "to close stream (0x%p)\n", (void *) RD(wq)); 2377 (void) mir_svc_policy_notify(RD(wq), 1); 2378 } 2379 return; 2380 default: 2381 RPCLOG(1, "mir_timer: unexpected mir_type %d\n", 2382 mir->mir_type); 2383 mutex_exit(&mir->mir_mutex); 2384 return; 2385 } 2386 } 2387 2388 /* 2389 * Called by the RPC package to send either a call or a return, or a 2390 * transport connection request. Adds the record marking header. 2391 */ 2392 static void 2393 mir_wput(queue_t *q, mblk_t *mp) 2394 { 2395 uint_t frag_header; 2396 mir_t *mir = (mir_t *)q->q_ptr; 2397 uchar_t *rptr = mp->b_rptr; 2398 2399 if (!mir) { 2400 freemsg(mp); 2401 return; 2402 } 2403 2404 if (mp->b_datap->db_type != M_DATA) { 2405 mir_wput_other(q, mp); 2406 return; 2407 } 2408 2409 if (mir->mir_ordrel_pending == 1) { 2410 freemsg(mp); 2411 RPCLOG(16, "mir_wput wq 0x%p: got data after T_ORDREL_REQ\n", 2412 (void *)q); 2413 return; 2414 } 2415 2416 frag_header = (uint_t)DLEN(mp); 2417 frag_header |= MIR_LASTFRAG; 2418 2419 /* Stick in the 4 byte record marking header. */ 2420 if ((rptr - mp->b_datap->db_base) < sizeof (uint32_t) || 2421 !IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) { 2422 /* 2423 * Since we know that M_DATA messages are created exclusively 2424 * by KRPC, we expect that KRPC will leave room for our header 2425 * and 4 byte align which is normal for XDR. 2426 * If KRPC (or someone else) does not cooperate, then we 2427 * just throw away the message. 2428 */ 2429 RPCLOG(1, "mir_wput: KRPC did not leave space for record " 2430 "fragment header (%d bytes left)\n", 2431 (int)(rptr - mp->b_datap->db_base)); 2432 freemsg(mp); 2433 return; 2434 } 2435 rptr -= sizeof (uint32_t); 2436 *(uint32_t *)rptr = htonl(frag_header); 2437 mp->b_rptr = rptr; 2438 2439 mutex_enter(&mir->mir_mutex); 2440 if (mir->mir_type == RPC_CLIENT) { 2441 /* 2442 * For the client, set mir_clntreq to indicate that the 2443 * connection is active. 2444 */ 2445 mir->mir_clntreq = 1; 2446 mir->mir_use_timestamp = ddi_get_lbolt(); 2447 } 2448 2449 /* 2450 * If we haven't already queued some data and the downstream module 2451 * can accept more data, send it on, otherwise we queue the message 2452 * and take other actions depending on mir_type. 2453 */ 2454 if (!mir->mir_inwservice && MIR_WCANPUTNEXT(mir, q)) { 2455 mutex_exit(&mir->mir_mutex); 2456 2457 /* 2458 * Now we pass the RPC message downstream. 2459 */ 2460 putnext(q, mp); 2461 return; 2462 } 2463 2464 switch (mir->mir_type) { 2465 case RPC_CLIENT: 2466 /* 2467 * Check for a previous duplicate request on the 2468 * queue. If there is one, then we throw away 2469 * the current message and let the previous one 2470 * go through. If we can't find a duplicate, then 2471 * send this one. This tap dance is an effort 2472 * to reduce traffic and processing requirements 2473 * under load conditions. 2474 */ 2475 if (mir_clnt_dup_request(q, mp)) { 2476 mutex_exit(&mir->mir_mutex); 2477 freemsg(mp); 2478 return; 2479 } 2480 break; 2481 case RPC_SERVER: 2482 /* 2483 * Set mir_hold_inbound so that new inbound RPC 2484 * messages will be held until the client catches 2485 * up on the earlier replies. This flag is cleared 2486 * in mir_wsrv after flow control is relieved; 2487 * the read-side queue is also enabled at that time. 2488 */ 2489 mir->mir_hold_inbound = 1; 2490 break; 2491 default: 2492 RPCLOG(1, "mir_wput: unexpected mir_type %d\n", mir->mir_type); 2493 break; 2494 } 2495 mir->mir_inwservice = 1; 2496 (void) putq(q, mp); 2497 mutex_exit(&mir->mir_mutex); 2498 } 2499 2500 static void 2501 mir_wput_other(queue_t *q, mblk_t *mp) 2502 { 2503 mir_t *mir = (mir_t *)q->q_ptr; 2504 struct iocblk *iocp; 2505 uchar_t *rptr = mp->b_rptr; 2506 bool_t flush_in_svc = FALSE; 2507 2508 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2509 switch (mp->b_datap->db_type) { 2510 case M_IOCTL: 2511 iocp = (struct iocblk *)rptr; 2512 switch (iocp->ioc_cmd) { 2513 case RPC_CLIENT: 2514 mutex_enter(&mir->mir_mutex); 2515 if (mir->mir_type != 0 && 2516 mir->mir_type != iocp->ioc_cmd) { 2517 ioc_eperm: 2518 mutex_exit(&mir->mir_mutex); 2519 iocp->ioc_error = EPERM; 2520 iocp->ioc_count = 0; 2521 mp->b_datap->db_type = M_IOCACK; 2522 qreply(q, mp); 2523 return; 2524 } 2525 2526 mir->mir_type = iocp->ioc_cmd; 2527 2528 /* 2529 * Clear mir_hold_inbound which was set to 1 by 2530 * mir_open. This flag is not used on client 2531 * streams. 2532 */ 2533 mir->mir_hold_inbound = 0; 2534 mir->mir_max_msg_sizep = &clnt_max_msg_size; 2535 2536 /* 2537 * Start the idle timer. See mir_timer() for more 2538 * information on how client timers work. 2539 */ 2540 mir->mir_idle_timeout = clnt_idle_timeout; 2541 mir_clnt_idle_start(q, mir); 2542 mutex_exit(&mir->mir_mutex); 2543 2544 mp->b_datap->db_type = M_IOCACK; 2545 qreply(q, mp); 2546 return; 2547 case RPC_SERVER: 2548 mutex_enter(&mir->mir_mutex); 2549 if (mir->mir_type != 0 && 2550 mir->mir_type != iocp->ioc_cmd) 2551 goto ioc_eperm; 2552 2553 /* 2554 * We don't clear mir_hold_inbound here because 2555 * mir_hold_inbound is used in the flow control 2556 * model. If we cleared it here, then we'd commit 2557 * a small violation to the model where the transport 2558 * might immediately block downstream flow. 2559 */ 2560 2561 mir->mir_type = iocp->ioc_cmd; 2562 mir->mir_max_msg_sizep = &svc_max_msg_size; 2563 2564 /* 2565 * Start the idle timer. See mir_timer() for more 2566 * information on how server timers work. 2567 * 2568 * Note that it is important to start the idle timer 2569 * here so that connections time out even if we 2570 * never receive any data on them. 2571 */ 2572 mir->mir_idle_timeout = svc_idle_timeout; 2573 RPCLOG(16, "mir_wput_other starting idle timer on 0x%p " 2574 "because we got RPC_SERVER ioctl\n", (void *)q); 2575 mir_svc_idle_start(q, mir); 2576 mutex_exit(&mir->mir_mutex); 2577 2578 mp->b_datap->db_type = M_IOCACK; 2579 qreply(q, mp); 2580 return; 2581 default: 2582 break; 2583 } 2584 break; 2585 2586 case M_PROTO: 2587 if (mir->mir_type == RPC_CLIENT) { 2588 /* 2589 * We are likely being called from the context of a 2590 * service procedure. So we need to enqueue. However 2591 * enqueing may put our message behind data messages. 2592 * So flush the data first. 2593 */ 2594 flush_in_svc = TRUE; 2595 } 2596 if ((mp->b_wptr - rptr) < sizeof (uint32_t) || 2597 !IS_P2ALIGNED(rptr, sizeof (uint32_t))) 2598 break; 2599 2600 switch (((union T_primitives *)rptr)->type) { 2601 case T_DATA_REQ: 2602 /* Don't pass T_DATA_REQ messages downstream. */ 2603 freemsg(mp); 2604 return; 2605 case T_ORDREL_REQ: 2606 RPCLOG(8, "mir_wput_other wq 0x%p: got T_ORDREL_REQ\n", 2607 (void *)q); 2608 mutex_enter(&mir->mir_mutex); 2609 if (mir->mir_type != RPC_SERVER) { 2610 /* 2611 * We are likely being called from 2612 * clnt_dispatch_notifyall(). Sending 2613 * a T_ORDREL_REQ will result in 2614 * a some kind of _IND message being sent, 2615 * will be another call to 2616 * clnt_dispatch_notifyall(). To keep the stack 2617 * lean, queue this message. 2618 */ 2619 mir->mir_inwservice = 1; 2620 (void) putq(q, mp); 2621 mutex_exit(&mir->mir_mutex); 2622 return; 2623 } 2624 2625 /* 2626 * Mark the structure such that we don't accept any 2627 * more requests from client. We could defer this 2628 * until we actually send the orderly release 2629 * request downstream, but all that does is delay 2630 * the closing of this stream. 2631 */ 2632 RPCLOG(16, "mir_wput_other wq 0x%p: got T_ORDREL_REQ " 2633 " so calling mir_svc_start_close\n", (void *)q); 2634 2635 mir_svc_start_close(q, mir); 2636 2637 /* 2638 * If we have sent down a T_ORDREL_REQ, don't send 2639 * any more. 2640 */ 2641 if (mir->mir_ordrel_pending) { 2642 freemsg(mp); 2643 mutex_exit(&mir->mir_mutex); 2644 return; 2645 } 2646 2647 /* 2648 * If the stream is not idle, then we hold the 2649 * orderly release until it becomes idle. This 2650 * ensures that KRPC will be able to reply to 2651 * all requests that we have passed to it. 2652 * 2653 * We also queue the request if there is data already 2654 * queued, because we cannot allow the T_ORDREL_REQ 2655 * to go before data. When we had a separate reply 2656 * count, this was not a problem, because the 2657 * reply count was reconciled when mir_wsrv() 2658 * completed. 2659 */ 2660 if (!MIR_SVC_QUIESCED(mir) || 2661 mir->mir_inwservice == 1) { 2662 mir->mir_inwservice = 1; 2663 (void) putq(q, mp); 2664 2665 RPCLOG(16, "mir_wput_other: queuing " 2666 "T_ORDREL_REQ on 0x%p\n", (void *)q); 2667 2668 mutex_exit(&mir->mir_mutex); 2669 return; 2670 } 2671 2672 /* 2673 * Mark the structure so that we know we sent 2674 * an orderly release request, and reset the idle timer. 2675 */ 2676 mir->mir_ordrel_pending = 1; 2677 2678 RPCLOG(16, "mir_wput_other: calling mir_svc_idle_start" 2679 " on 0x%p because we got T_ORDREL_REQ\n", 2680 (void *)q); 2681 2682 mir_svc_idle_start(q, mir); 2683 mutex_exit(&mir->mir_mutex); 2684 2685 /* 2686 * When we break, we will putnext the T_ORDREL_REQ. 2687 */ 2688 break; 2689 2690 case T_CONN_REQ: 2691 mutex_enter(&mir->mir_mutex); 2692 if (mir->mir_head_mp != NULL) { 2693 freemsg(mir->mir_head_mp); 2694 mir->mir_head_mp = NULL; 2695 mir->mir_tail_mp = NULL; 2696 } 2697 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 2698 /* 2699 * Restart timer in case mir_clnt_idle_do_stop() was 2700 * called. 2701 */ 2702 mir->mir_idle_timeout = clnt_idle_timeout; 2703 mir_clnt_idle_stop(q, mir); 2704 mir_clnt_idle_start(q, mir); 2705 mutex_exit(&mir->mir_mutex); 2706 break; 2707 2708 default: 2709 /* 2710 * T_DISCON_REQ is one of the interesting default 2711 * cases here. Ideally, an M_FLUSH is done before 2712 * T_DISCON_REQ is done. However, that is somewhat 2713 * cumbersome for clnt_cots.c to do. So we queue 2714 * T_DISCON_REQ, and let the service procedure 2715 * flush all M_DATA. 2716 */ 2717 break; 2718 } 2719 /* fallthru */; 2720 default: 2721 if (mp->b_datap->db_type >= QPCTL) { 2722 if (mp->b_datap->db_type == M_FLUSH) { 2723 if (mir->mir_type == RPC_CLIENT && 2724 *mp->b_rptr & FLUSHW) { 2725 RPCLOG(32, "mir_wput_other: flushing " 2726 "wq 0x%p\n", (void *)q); 2727 if (*mp->b_rptr & FLUSHBAND) { 2728 flushband(q, *(mp->b_rptr + 1), 2729 FLUSHDATA); 2730 } else { 2731 flushq(q, FLUSHDATA); 2732 } 2733 } else { 2734 RPCLOG(32, "mir_wput_other: ignoring " 2735 "M_FLUSH on wq 0x%p\n", (void *)q); 2736 } 2737 } 2738 break; 2739 } 2740 2741 mutex_enter(&mir->mir_mutex); 2742 if (mir->mir_inwservice == 0 && MIR_WCANPUTNEXT(mir, q)) { 2743 mutex_exit(&mir->mir_mutex); 2744 break; 2745 } 2746 mir->mir_inwservice = 1; 2747 mir->mir_inwflushdata = flush_in_svc; 2748 (void) putq(q, mp); 2749 mutex_exit(&mir->mir_mutex); 2750 qenable(q); 2751 2752 return; 2753 } 2754 putnext(q, mp); 2755 } 2756 2757 static void 2758 mir_wsrv(queue_t *q) 2759 { 2760 mblk_t *mp; 2761 mir_t *mir; 2762 bool_t flushdata; 2763 2764 mir = (mir_t *)q->q_ptr; 2765 mutex_enter(&mir->mir_mutex); 2766 2767 flushdata = mir->mir_inwflushdata; 2768 mir->mir_inwflushdata = 0; 2769 2770 while (mp = getq(q)) { 2771 if (mp->b_datap->db_type == M_DATA) { 2772 /* 2773 * Do not send any more data if we have sent 2774 * a T_ORDREL_REQ. 2775 */ 2776 if (flushdata || mir->mir_ordrel_pending == 1) { 2777 freemsg(mp); 2778 continue; 2779 } 2780 2781 /* 2782 * Make sure that the stream can really handle more 2783 * data. 2784 */ 2785 if (!MIR_WCANPUTNEXT(mir, q)) { 2786 (void) putbq(q, mp); 2787 mutex_exit(&mir->mir_mutex); 2788 return; 2789 } 2790 2791 /* 2792 * Now we pass the RPC message downstream. 2793 */ 2794 mutex_exit(&mir->mir_mutex); 2795 putnext(q, mp); 2796 mutex_enter(&mir->mir_mutex); 2797 continue; 2798 } 2799 2800 /* 2801 * This is not an RPC message, pass it downstream 2802 * (ignoring flow control) if the server side is not sending a 2803 * T_ORDREL_REQ downstream. 2804 */ 2805 if (mir->mir_type != RPC_SERVER || 2806 ((union T_primitives *)mp->b_rptr)->type != 2807 T_ORDREL_REQ) { 2808 mutex_exit(&mir->mir_mutex); 2809 putnext(q, mp); 2810 mutex_enter(&mir->mir_mutex); 2811 continue; 2812 } 2813 2814 if (mir->mir_ordrel_pending == 1) { 2815 /* 2816 * Don't send two T_ORDRELs 2817 */ 2818 freemsg(mp); 2819 continue; 2820 } 2821 2822 /* 2823 * Mark the structure so that we know we sent an orderly 2824 * release request. We will check to see slot is idle at the 2825 * end of this routine, and if so, reset the idle timer to 2826 * handle orderly release timeouts. 2827 */ 2828 mir->mir_ordrel_pending = 1; 2829 RPCLOG(16, "mir_wsrv: sending ordrel req on q 0x%p\n", 2830 (void *)q); 2831 /* 2832 * Send the orderly release downstream. If there are other 2833 * pending replies we won't be able to send them. However, 2834 * the only reason we should send the orderly release is if 2835 * we were idle, or if an unusual event occurred. 2836 */ 2837 mutex_exit(&mir->mir_mutex); 2838 putnext(q, mp); 2839 mutex_enter(&mir->mir_mutex); 2840 } 2841 2842 if (q->q_first == NULL) 2843 /* 2844 * If we call mir_svc_idle_start() below, then 2845 * clearing mir_inwservice here will also result in 2846 * any thread waiting in mir_close() to be signaled. 2847 */ 2848 mir->mir_inwservice = 0; 2849 2850 if (mir->mir_type != RPC_SERVER) { 2851 mutex_exit(&mir->mir_mutex); 2852 return; 2853 } 2854 2855 /* 2856 * If idle we call mir_svc_idle_start to start the timer (or wakeup 2857 * a close). Also make sure not to start the idle timer on the 2858 * listener stream. This can cause nfsd to send an orderly release 2859 * command on the listener stream. 2860 */ 2861 if (MIR_SVC_QUIESCED(mir) && !(mir->mir_listen_stream)) { 2862 RPCLOG(16, "mir_wsrv: calling mir_svc_idle_start on 0x%p " 2863 "because mir slot is idle\n", (void *)q); 2864 mir_svc_idle_start(q, mir); 2865 } 2866 2867 /* 2868 * If outbound flow control has been relieved, then allow new 2869 * inbound requests to be processed. 2870 */ 2871 if (mir->mir_hold_inbound) { 2872 mir->mir_hold_inbound = 0; 2873 qenable(RD(q)); 2874 } 2875 mutex_exit(&mir->mir_mutex); 2876 } 2877 2878 static void 2879 mir_disconnect(queue_t *q, mir_t *mir) 2880 { 2881 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2882 2883 switch (mir->mir_type) { 2884 case RPC_CLIENT: 2885 /* 2886 * We are disconnecting, but not necessarily 2887 * closing. By not closing, we will fail to 2888 * pick up a possibly changed global timeout value, 2889 * unless we store it now. 2890 */ 2891 mir->mir_idle_timeout = clnt_idle_timeout; 2892 mir_clnt_idle_start(WR(q), mir); 2893 mutex_exit(&mir->mir_mutex); 2894 2895 /* 2896 * T_DISCON_REQ is passed to KRPC as an integer value 2897 * (this is not a TPI message). It is used as a 2898 * convenient value to indicate a sanity check 2899 * failure -- the same KRPC routine is also called 2900 * for T_DISCON_INDs and T_ORDREL_INDs. 2901 */ 2902 clnt_dispatch_notifyall(WR(q), T_DISCON_REQ, 0); 2903 break; 2904 2905 case RPC_SERVER: 2906 mir->mir_svc_no_more_msgs = 1; 2907 mir_svc_idle_stop(WR(q), mir); 2908 mutex_exit(&mir->mir_mutex); 2909 RPCLOG(16, "mir_disconnect: telling " 2910 "stream head listener to disconnect stream " 2911 "(0x%p)\n", (void *) q); 2912 (void) mir_svc_policy_notify(q, 2); 2913 break; 2914 2915 default: 2916 mutex_exit(&mir->mir_mutex); 2917 break; 2918 } 2919 } 2920 2921 /* 2922 * Sanity check the message length, and if it's too large, shutdown the 2923 * connection. Returns 1 if the connection is shutdown; 0 otherwise. 2924 */ 2925 static int 2926 mir_check_len(queue_t *q, int32_t frag_len, mblk_t *head_mp) 2927 { 2928 mir_t *mir = q->q_ptr; 2929 uint_t maxsize = 0; 2930 2931 if (mir->mir_max_msg_sizep != NULL) 2932 maxsize = *mir->mir_max_msg_sizep; 2933 2934 if (maxsize == 0 || frag_len <= (int)maxsize) 2935 return (0); 2936 2937 freemsg(head_mp); 2938 mir->mir_head_mp = NULL; 2939 mir->mir_tail_mp = NULL; 2940 mir->mir_frag_header = 0; 2941 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 2942 if (mir->mir_type != RPC_SERVER || mir->mir_setup_complete) { 2943 cmn_err(CE_NOTE, 2944 "KRPC: record fragment from %s of size(%d) exceeds " 2945 "maximum (%u). Disconnecting", 2946 (mir->mir_type == RPC_CLIENT) ? "server" : 2947 (mir->mir_type == RPC_SERVER) ? "client" : 2948 "test tool", frag_len, maxsize); 2949 } 2950 2951 mir_disconnect(q, mir); 2952 return (1); 2953 } 2954