1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2012 Milan Jurik. All rights reserved. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 /* 32 * Kernel RPC filtering module 33 */ 34 35 #include <sys/param.h> 36 #include <sys/types.h> 37 #include <sys/stream.h> 38 #include <sys/stropts.h> 39 #include <sys/strsubr.h> 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/tiuser.h> 43 #include <sys/debug.h> 44 #include <sys/signal.h> 45 #include <sys/pcb.h> 46 #include <sys/user.h> 47 #include <sys/errno.h> 48 #include <sys/cred.h> 49 #include <sys/policy.h> 50 #include <sys/inline.h> 51 #include <sys/cmn_err.h> 52 #include <sys/kmem.h> 53 #include <sys/file.h> 54 #include <sys/sysmacros.h> 55 #include <sys/systm.h> 56 #include <sys/t_lock.h> 57 #include <sys/ddi.h> 58 #include <sys/vtrace.h> 59 #include <sys/callb.h> 60 #include <sys/strsun.h> 61 62 #include <sys/strlog.h> 63 #include <rpc/rpc_com.h> 64 #include <inet/common.h> 65 #include <rpc/types.h> 66 #include <sys/time.h> 67 #include <rpc/xdr.h> 68 #include <rpc/auth.h> 69 #include <rpc/clnt.h> 70 #include <rpc/rpc_msg.h> 71 #include <rpc/clnt.h> 72 #include <rpc/svc.h> 73 #include <rpc/rpcsys.h> 74 #include <rpc/rpc_rdma.h> 75 76 /* 77 * This is the loadable module wrapper. 78 */ 79 #include <sys/conf.h> 80 #include <sys/modctl.h> 81 #include <sys/syscall.h> 82 83 extern struct streamtab rpcinfo; 84 85 static struct fmodsw fsw = { 86 "rpcmod", 87 &rpcinfo, 88 D_NEW|D_MP, 89 }; 90 91 /* 92 * Module linkage information for the kernel. 93 */ 94 95 static struct modlstrmod modlstrmod = { 96 &mod_strmodops, "rpc interface str mod", &fsw 97 }; 98 99 /* 100 * For the RPC system call. 101 */ 102 static struct sysent rpcsysent = { 103 2, 104 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, 105 rpcsys 106 }; 107 108 static struct modlsys modlsys = { 109 &mod_syscallops, 110 "RPC syscall", 111 &rpcsysent 112 }; 113 114 #ifdef _SYSCALL32_IMPL 115 static struct modlsys modlsys32 = { 116 &mod_syscallops32, 117 "32-bit RPC syscall", 118 &rpcsysent 119 }; 120 #endif /* _SYSCALL32_IMPL */ 121 122 static struct modlinkage modlinkage = { 123 MODREV_1, 124 { 125 &modlsys, 126 #ifdef _SYSCALL32_IMPL 127 &modlsys32, 128 #endif 129 &modlstrmod, 130 NULL 131 } 132 }; 133 134 int 135 _init(void) 136 { 137 int error = 0; 138 callb_id_t cid; 139 int status; 140 141 svc_init(); 142 clnt_init(); 143 cid = callb_add(connmgr_cpr_reset, 0, CB_CL_CPR_RPC, "rpc"); 144 145 if (error = mod_install(&modlinkage)) { 146 /* 147 * Could not install module, cleanup previous 148 * initialization work. 149 */ 150 clnt_fini(); 151 if (cid != NULL) 152 (void) callb_delete(cid); 153 154 return (error); 155 } 156 157 /* 158 * Load up the RDMA plugins and initialize the stats. Even if the 159 * plugins loadup fails, but rpcmod was successfully installed the 160 * counters still get initialized. 161 */ 162 rw_init(&rdma_lock, NULL, RW_DEFAULT, NULL); 163 mutex_init(&rdma_modload_lock, NULL, MUTEX_DEFAULT, NULL); 164 165 cv_init(&rdma_wait.svc_cv, NULL, CV_DEFAULT, NULL); 166 mutex_init(&rdma_wait.svc_lock, NULL, MUTEX_DEFAULT, NULL); 167 168 mt_kstat_init(); 169 170 /* 171 * Get our identification into ldi. This is used for loading 172 * other modules, e.g. rpcib. 173 */ 174 status = ldi_ident_from_mod(&modlinkage, &rpcmod_li); 175 if (status != 0) { 176 cmn_err(CE_WARN, "ldi_ident_from_mod fails with %d", status); 177 rpcmod_li = NULL; 178 } 179 180 return (error); 181 } 182 183 /* 184 * The unload entry point fails, because we advertise entry points into 185 * rpcmod from the rest of kRPC: rpcmod_release(). 186 */ 187 int 188 _fini(void) 189 { 190 return (EBUSY); 191 } 192 193 int 194 _info(struct modinfo *modinfop) 195 { 196 return (mod_info(&modlinkage, modinfop)); 197 } 198 199 extern int nulldev(); 200 201 #define RPCMOD_ID 2049 202 203 int rmm_open(queue_t *, dev_t *, int, int, cred_t *); 204 int rmm_close(queue_t *, int, cred_t *); 205 206 /* 207 * To save instructions, since STREAMS ignores the return value 208 * from these functions, they are defined as void here. Kind of icky, but... 209 */ 210 void rmm_rput(queue_t *, mblk_t *); 211 void rmm_wput(queue_t *, mblk_t *); 212 void rmm_rsrv(queue_t *); 213 void rmm_wsrv(queue_t *); 214 215 int rpcmodopen(queue_t *, dev_t *, int, int, cred_t *); 216 int rpcmodclose(queue_t *, int, cred_t *); 217 void rpcmodrput(queue_t *, mblk_t *); 218 void rpcmodwput(queue_t *, mblk_t *); 219 void rpcmodrsrv(); 220 void rpcmodwsrv(queue_t *); 221 222 static void rpcmodwput_other(queue_t *, mblk_t *); 223 static int mir_close(queue_t *q); 224 static int mir_open(queue_t *q, dev_t *devp, int flag, int sflag, 225 cred_t *credp); 226 static void mir_rput(queue_t *q, mblk_t *mp); 227 static void mir_rsrv(queue_t *q); 228 static void mir_wput(queue_t *q, mblk_t *mp); 229 static void mir_wsrv(queue_t *q); 230 231 static struct module_info rpcmod_info = 232 {RPCMOD_ID, "rpcmod", 0, INFPSZ, 256*1024, 1024}; 233 234 /* 235 * Read side has no service procedure. 236 */ 237 static struct qinit rpcmodrinit = { 238 (int (*)())rmm_rput, 239 (int (*)())rmm_rsrv, 240 rmm_open, 241 rmm_close, 242 nulldev, 243 &rpcmod_info, 244 NULL 245 }; 246 247 /* 248 * The write put procedure is simply putnext to conserve stack space. 249 * The write service procedure is not used to queue data, but instead to 250 * synchronize with flow control. 251 */ 252 static struct qinit rpcmodwinit = { 253 (int (*)())rmm_wput, 254 (int (*)())rmm_wsrv, 255 rmm_open, 256 rmm_close, 257 nulldev, 258 &rpcmod_info, 259 NULL 260 }; 261 struct streamtab rpcinfo = { &rpcmodrinit, &rpcmodwinit, NULL, NULL }; 262 263 struct xprt_style_ops { 264 int (*xo_open)(); 265 int (*xo_close)(); 266 void (*xo_wput)(); 267 void (*xo_wsrv)(); 268 void (*xo_rput)(); 269 void (*xo_rsrv)(); 270 }; 271 272 static struct xprt_style_ops xprt_clts_ops = { 273 rpcmodopen, 274 rpcmodclose, 275 rpcmodwput, 276 rpcmodwsrv, 277 rpcmodrput, 278 NULL 279 }; 280 281 static struct xprt_style_ops xprt_cots_ops = { 282 mir_open, 283 mir_close, 284 mir_wput, 285 mir_wsrv, 286 mir_rput, 287 mir_rsrv 288 }; 289 290 /* 291 * Per rpcmod "slot" data structure. q->q_ptr points to one of these. 292 */ 293 struct rpcm { 294 void *rm_krpc_cell; /* Reserved for use by KRPC */ 295 struct xprt_style_ops *rm_ops; 296 int rm_type; /* Client or server side stream */ 297 #define RM_CLOSING 0x1 /* somebody is trying to close slot */ 298 uint_t rm_state; /* state of the slot. see above */ 299 uint_t rm_ref; /* cnt of external references to slot */ 300 kmutex_t rm_lock; /* mutex protecting above fields */ 301 kcondvar_t rm_cwait; /* condition for closing */ 302 zoneid_t rm_zoneid; /* zone which pushed rpcmod */ 303 }; 304 305 struct temp_slot { 306 void *cell; 307 struct xprt_style_ops *ops; 308 int type; 309 mblk_t *info_ack; 310 kmutex_t lock; 311 kcondvar_t wait; 312 }; 313 314 typedef struct mir_s { 315 void *mir_krpc_cell; /* Reserved for KRPC use. This field */ 316 /* must be first in the structure. */ 317 struct xprt_style_ops *rm_ops; 318 int mir_type; /* Client or server side stream */ 319 320 mblk_t *mir_head_mp; /* RPC msg in progress */ 321 /* 322 * mir_head_mp points the first mblk being collected in 323 * the current RPC message. Record headers are removed 324 * before data is linked into mir_head_mp. 325 */ 326 mblk_t *mir_tail_mp; /* Last mblk in mir_head_mp */ 327 /* 328 * mir_tail_mp points to the last mblk in the message 329 * chain starting at mir_head_mp. It is only valid 330 * if mir_head_mp is non-NULL and is used to add new 331 * data blocks to the end of chain quickly. 332 */ 333 334 int32_t mir_frag_len; /* Bytes seen in the current frag */ 335 /* 336 * mir_frag_len starts at -4 for beginning of each fragment. 337 * When this length is negative, it indicates the number of 338 * bytes that rpcmod needs to complete the record marker 339 * header. When it is positive or zero, it holds the number 340 * of bytes that have arrived for the current fragment and 341 * are held in mir_header_mp. 342 */ 343 344 int32_t mir_frag_header; 345 /* 346 * Fragment header as collected for the current fragment. 347 * It holds the last-fragment indicator and the number 348 * of bytes in the fragment. 349 */ 350 351 unsigned int 352 mir_ordrel_pending : 1, /* Sent T_ORDREL_REQ */ 353 mir_hold_inbound : 1, /* Hold inbound messages on server */ 354 /* side until outbound flow control */ 355 /* is relieved. */ 356 mir_closing : 1, /* The stream is being closed */ 357 mir_inrservice : 1, /* data queued or rd srv proc running */ 358 mir_inwservice : 1, /* data queued or wr srv proc running */ 359 mir_inwflushdata : 1, /* flush M_DATAs when srv runs */ 360 /* 361 * On client streams, mir_clntreq is 0 or 1; it is set 362 * to 1 whenever a new request is sent out (mir_wput) 363 * and cleared when the timer fires (mir_timer). If 364 * the timer fires with this value equal to 0, then the 365 * stream is considered idle and KRPC is notified. 366 */ 367 mir_clntreq : 1, 368 /* 369 * On server streams, stop accepting messages 370 */ 371 mir_svc_no_more_msgs : 1, 372 mir_listen_stream : 1, /* listen end point */ 373 mir_unused : 1, /* no longer used */ 374 mir_timer_call : 1, 375 mir_junk_fill_thru_bit_31 : 21; 376 377 int mir_setup_complete; /* server has initialized everything */ 378 timeout_id_t mir_timer_id; /* Timer for idle checks */ 379 clock_t mir_idle_timeout; /* Allowed idle time before shutdown */ 380 /* 381 * This value is copied from clnt_idle_timeout or 382 * svc_idle_timeout during the appropriate ioctl. 383 * Kept in milliseconds 384 */ 385 clock_t mir_use_timestamp; /* updated on client with each use */ 386 /* 387 * This value is set to lbolt 388 * every time a client stream sends or receives data. 389 * Even if the timer message arrives, we don't shutdown 390 * client unless: 391 * lbolt >= MSEC_TO_TICK(mir_idle_timeout)+mir_use_timestamp. 392 * This value is kept in HZ. 393 */ 394 395 uint_t *mir_max_msg_sizep; /* Reference to sanity check size */ 396 /* 397 * This pointer is set to &clnt_max_msg_size or 398 * &svc_max_msg_size during the appropriate ioctl. 399 */ 400 zoneid_t mir_zoneid; /* zone which pushed rpcmod */ 401 /* Server-side fields. */ 402 int mir_ref_cnt; /* Reference count: server side only */ 403 /* counts the number of references */ 404 /* that a kernel RPC server thread */ 405 /* (see svc_run()) has on this rpcmod */ 406 /* slot. Effectively, it is the */ 407 /* number * of unprocessed messages */ 408 /* that have been passed up to the */ 409 /* KRPC layer */ 410 411 mblk_t *mir_svc_pend_mp; /* Pending T_ORDREL_IND or */ 412 /* T_DISCON_IND */ 413 414 /* 415 * these fields are for both client and server, but for debugging, 416 * it is easier to have these last in the structure. 417 */ 418 kmutex_t mir_mutex; /* Mutex and condvar for close */ 419 kcondvar_t mir_condvar; /* synchronization. */ 420 kcondvar_t mir_timer_cv; /* Timer routine sync. */ 421 } mir_t; 422 423 void tmp_rput(queue_t *q, mblk_t *mp); 424 425 struct xprt_style_ops tmpops = { 426 NULL, 427 NULL, 428 putnext, 429 NULL, 430 tmp_rput, 431 NULL 432 }; 433 434 void 435 tmp_rput(queue_t *q, mblk_t *mp) 436 { 437 struct temp_slot *t = (struct temp_slot *)(q->q_ptr); 438 struct T_info_ack *pptr; 439 440 switch (mp->b_datap->db_type) { 441 case M_PCPROTO: 442 pptr = (struct T_info_ack *)mp->b_rptr; 443 switch (pptr->PRIM_type) { 444 case T_INFO_ACK: 445 mutex_enter(&t->lock); 446 t->info_ack = mp; 447 cv_signal(&t->wait); 448 mutex_exit(&t->lock); 449 return; 450 default: 451 break; 452 } 453 default: 454 break; 455 } 456 457 /* 458 * Not an info-ack, so free it. This is ok because we should 459 * not be receiving data until the open finishes: rpcmod 460 * is pushed well before the end-point is bound to an address. 461 */ 462 freemsg(mp); 463 } 464 465 int 466 rmm_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp) 467 { 468 mblk_t *bp; 469 struct temp_slot ts, *t; 470 struct T_info_ack *pptr; 471 int error = 0; 472 473 ASSERT(q != NULL); 474 /* 475 * Check for re-opens. 476 */ 477 if (q->q_ptr) { 478 TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, 479 "rpcmodopen_end:(%s)", "q->qptr"); 480 return (0); 481 } 482 483 t = &ts; 484 bzero(t, sizeof (*t)); 485 q->q_ptr = (void *)t; 486 WR(q)->q_ptr = (void *)t; 487 488 /* 489 * Allocate the required messages upfront. 490 */ 491 if ((bp = allocb_cred(sizeof (struct T_info_req) + 492 sizeof (struct T_info_ack), crp, curproc->p_pid)) == NULL) { 493 return (ENOBUFS); 494 } 495 496 mutex_init(&t->lock, NULL, MUTEX_DEFAULT, NULL); 497 cv_init(&t->wait, NULL, CV_DEFAULT, NULL); 498 499 t->ops = &tmpops; 500 501 qprocson(q); 502 bp->b_datap->db_type = M_PCPROTO; 503 *(int32_t *)bp->b_wptr = (int32_t)T_INFO_REQ; 504 bp->b_wptr += sizeof (struct T_info_req); 505 putnext(WR(q), bp); 506 507 mutex_enter(&t->lock); 508 while (t->info_ack == NULL) { 509 if (cv_wait_sig(&t->wait, &t->lock) == 0) { 510 error = EINTR; 511 break; 512 } 513 } 514 mutex_exit(&t->lock); 515 516 if (error) 517 goto out; 518 519 pptr = (struct T_info_ack *)t->info_ack->b_rptr; 520 521 if (pptr->SERV_type == T_CLTS) { 522 if ((error = rpcmodopen(q, devp, flag, sflag, crp)) == 0) 523 ((struct rpcm *)q->q_ptr)->rm_ops = &xprt_clts_ops; 524 } else { 525 if ((error = mir_open(q, devp, flag, sflag, crp)) == 0) 526 ((mir_t *)q->q_ptr)->rm_ops = &xprt_cots_ops; 527 } 528 529 out: 530 if (error) 531 qprocsoff(q); 532 533 freemsg(t->info_ack); 534 mutex_destroy(&t->lock); 535 cv_destroy(&t->wait); 536 537 return (error); 538 } 539 540 void 541 rmm_rput(queue_t *q, mblk_t *mp) 542 { 543 (*((struct temp_slot *)q->q_ptr)->ops->xo_rput)(q, mp); 544 } 545 546 void 547 rmm_rsrv(queue_t *q) 548 { 549 (*((struct temp_slot *)q->q_ptr)->ops->xo_rsrv)(q); 550 } 551 552 void 553 rmm_wput(queue_t *q, mblk_t *mp) 554 { 555 (*((struct temp_slot *)q->q_ptr)->ops->xo_wput)(q, mp); 556 } 557 558 void 559 rmm_wsrv(queue_t *q) 560 { 561 (*((struct temp_slot *)q->q_ptr)->ops->xo_wsrv)(q); 562 } 563 564 int 565 rmm_close(queue_t *q, int flag, cred_t *crp) 566 { 567 return ((*((struct temp_slot *)q->q_ptr)->ops->xo_close)(q, flag, crp)); 568 } 569 570 static void rpcmod_release(queue_t *, mblk_t *); 571 /* 572 * rpcmodopen - open routine gets called when the module gets pushed 573 * onto the stream. 574 */ 575 /*ARGSUSED*/ 576 int 577 rpcmodopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp) 578 { 579 struct rpcm *rmp; 580 581 extern void (*rpc_rele)(queue_t *, mblk_t *); 582 583 TRACE_0(TR_FAC_KRPC, TR_RPCMODOPEN_START, "rpcmodopen_start:"); 584 585 /* 586 * Initialize entry points to release a rpcmod slot (and an input 587 * message if supplied) and to send an output message to the module 588 * below rpcmod. 589 */ 590 if (rpc_rele == NULL) 591 rpc_rele = rpcmod_release; 592 593 /* 594 * Only sufficiently privileged users can use this module, and it 595 * is assumed that they will use this module properly, and NOT send 596 * bulk data from downstream. 597 */ 598 if (secpolicy_rpcmod_open(crp) != 0) 599 return (EPERM); 600 601 /* 602 * Allocate slot data structure. 603 */ 604 rmp = kmem_zalloc(sizeof (*rmp), KM_SLEEP); 605 606 mutex_init(&rmp->rm_lock, NULL, MUTEX_DEFAULT, NULL); 607 cv_init(&rmp->rm_cwait, NULL, CV_DEFAULT, NULL); 608 rmp->rm_zoneid = rpc_zoneid(); 609 /* 610 * slot type will be set by kRPC client and server ioctl's 611 */ 612 rmp->rm_type = 0; 613 614 q->q_ptr = (void *)rmp; 615 WR(q)->q_ptr = (void *)rmp; 616 617 TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, "rpcmodopen_end:(%s)", "end"); 618 return (0); 619 } 620 621 /* 622 * rpcmodclose - This routine gets called when the module gets popped 623 * off of the stream. 624 */ 625 /*ARGSUSED*/ 626 int 627 rpcmodclose(queue_t *q, int flag, cred_t *crp) 628 { 629 struct rpcm *rmp; 630 631 ASSERT(q != NULL); 632 rmp = (struct rpcm *)q->q_ptr; 633 634 /* 635 * Mark our state as closing. 636 */ 637 mutex_enter(&rmp->rm_lock); 638 rmp->rm_state |= RM_CLOSING; 639 640 /* 641 * Check and see if there are any messages on the queue. If so, send 642 * the messages, regardless whether the downstream module is ready to 643 * accept data. 644 */ 645 if (rmp->rm_type == RPC_SERVER) { 646 flushq(q, FLUSHDATA); 647 648 qenable(WR(q)); 649 650 if (rmp->rm_ref) { 651 mutex_exit(&rmp->rm_lock); 652 /* 653 * call into SVC to clean the queue 654 */ 655 svc_queueclean(q); 656 mutex_enter(&rmp->rm_lock); 657 658 /* 659 * Block while there are kRPC threads with a reference 660 * to this message. 661 */ 662 while (rmp->rm_ref) 663 cv_wait(&rmp->rm_cwait, &rmp->rm_lock); 664 } 665 666 mutex_exit(&rmp->rm_lock); 667 668 /* 669 * It is now safe to remove this queue from the stream. No kRPC 670 * threads have a reference to the stream, and none ever will, 671 * because RM_CLOSING is set. 672 */ 673 qprocsoff(q); 674 675 /* Notify kRPC that this stream is going away. */ 676 svc_queueclose(q); 677 } else { 678 mutex_exit(&rmp->rm_lock); 679 qprocsoff(q); 680 } 681 682 q->q_ptr = NULL; 683 WR(q)->q_ptr = NULL; 684 mutex_destroy(&rmp->rm_lock); 685 cv_destroy(&rmp->rm_cwait); 686 kmem_free(rmp, sizeof (*rmp)); 687 return (0); 688 } 689 690 #ifdef DEBUG 691 int rpcmod_send_msg_up = 0; 692 int rpcmod_send_uderr = 0; 693 int rpcmod_send_dup = 0; 694 int rpcmod_send_dup_cnt = 0; 695 #endif 696 697 /* 698 * rpcmodrput - Module read put procedure. This is called from 699 * the module, driver, or stream head downstream. 700 */ 701 void 702 rpcmodrput(queue_t *q, mblk_t *mp) 703 { 704 struct rpcm *rmp; 705 union T_primitives *pptr; 706 int hdrsz; 707 708 TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_START, "rpcmodrput_start:"); 709 710 ASSERT(q != NULL); 711 rmp = (struct rpcm *)q->q_ptr; 712 713 if (rmp->rm_type == 0) { 714 freemsg(mp); 715 return; 716 } 717 718 #ifdef DEBUG 719 if (rpcmod_send_msg_up > 0) { 720 mblk_t *nmp = copymsg(mp); 721 if (nmp) { 722 putnext(q, nmp); 723 rpcmod_send_msg_up--; 724 } 725 } 726 if ((rpcmod_send_uderr > 0) && mp->b_datap->db_type == M_PROTO) { 727 mblk_t *nmp; 728 struct T_unitdata_ind *data; 729 struct T_uderror_ind *ud; 730 int d; 731 data = (struct T_unitdata_ind *)mp->b_rptr; 732 if (data->PRIM_type == T_UNITDATA_IND) { 733 d = sizeof (*ud) - sizeof (*data); 734 nmp = allocb(mp->b_wptr - mp->b_rptr + d, BPRI_HI); 735 if (nmp) { 736 ud = (struct T_uderror_ind *)nmp->b_rptr; 737 ud->PRIM_type = T_UDERROR_IND; 738 ud->DEST_length = data->SRC_length; 739 ud->DEST_offset = data->SRC_offset + d; 740 ud->OPT_length = data->OPT_length; 741 ud->OPT_offset = data->OPT_offset + d; 742 ud->ERROR_type = ENETDOWN; 743 if (data->SRC_length) { 744 bcopy(mp->b_rptr + 745 data->SRC_offset, 746 nmp->b_rptr + 747 ud->DEST_offset, 748 data->SRC_length); 749 } 750 if (data->OPT_length) { 751 bcopy(mp->b_rptr + 752 data->OPT_offset, 753 nmp->b_rptr + 754 ud->OPT_offset, 755 data->OPT_length); 756 } 757 nmp->b_wptr += d; 758 nmp->b_wptr += (mp->b_wptr - mp->b_rptr); 759 nmp->b_datap->db_type = M_PROTO; 760 putnext(q, nmp); 761 rpcmod_send_uderr--; 762 } 763 } 764 } 765 #endif 766 switch (mp->b_datap->db_type) { 767 default: 768 putnext(q, mp); 769 break; 770 771 case M_PROTO: 772 case M_PCPROTO: 773 ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (int32_t)); 774 pptr = (union T_primitives *)mp->b_rptr; 775 776 /* 777 * Forward this message to krpc if it is data. 778 */ 779 if (pptr->type == T_UNITDATA_IND) { 780 mblk_t *nmp; 781 782 /* 783 * Check if the module is being popped. 784 */ 785 mutex_enter(&rmp->rm_lock); 786 if (rmp->rm_state & RM_CLOSING) { 787 mutex_exit(&rmp->rm_lock); 788 putnext(q, mp); 789 break; 790 } 791 792 switch (rmp->rm_type) { 793 case RPC_CLIENT: 794 mutex_exit(&rmp->rm_lock); 795 hdrsz = mp->b_wptr - mp->b_rptr; 796 797 /* 798 * Make sure the header is sane. 799 */ 800 if (hdrsz < TUNITDATAINDSZ || 801 hdrsz < (pptr->unitdata_ind.OPT_length + 802 pptr->unitdata_ind.OPT_offset) || 803 hdrsz < (pptr->unitdata_ind.SRC_length + 804 pptr->unitdata_ind.SRC_offset)) { 805 freemsg(mp); 806 return; 807 } 808 809 /* 810 * Call clnt_clts_dispatch_notify, so that it 811 * can pass the message to the proper caller. 812 * Don't discard the header just yet since the 813 * client may need the sender's address. 814 */ 815 clnt_clts_dispatch_notify(mp, hdrsz, 816 rmp->rm_zoneid); 817 return; 818 case RPC_SERVER: 819 /* 820 * rm_krpc_cell is exclusively used by the kRPC 821 * CLTS server 822 */ 823 if (rmp->rm_krpc_cell) { 824 #ifdef DEBUG 825 /* 826 * Test duplicate request cache and 827 * rm_ref count handling by sending a 828 * duplicate every so often, if 829 * desired. 830 */ 831 if (rpcmod_send_dup && 832 rpcmod_send_dup_cnt++ % 833 rpcmod_send_dup) 834 nmp = copymsg(mp); 835 else 836 nmp = NULL; 837 #endif 838 /* 839 * Raise the reference count on this 840 * module to prevent it from being 841 * popped before krpc generates the 842 * reply. 843 */ 844 rmp->rm_ref++; 845 mutex_exit(&rmp->rm_lock); 846 847 /* 848 * Submit the message to krpc. 849 */ 850 svc_queuereq(q, mp); 851 #ifdef DEBUG 852 /* 853 * Send duplicate if we created one. 854 */ 855 if (nmp) { 856 mutex_enter(&rmp->rm_lock); 857 rmp->rm_ref++; 858 mutex_exit(&rmp->rm_lock); 859 svc_queuereq(q, nmp); 860 } 861 #endif 862 } else { 863 mutex_exit(&rmp->rm_lock); 864 freemsg(mp); 865 } 866 return; 867 default: 868 mutex_exit(&rmp->rm_lock); 869 freemsg(mp); 870 return; 871 } /* end switch(rmp->rm_type) */ 872 } else if (pptr->type == T_UDERROR_IND) { 873 mutex_enter(&rmp->rm_lock); 874 hdrsz = mp->b_wptr - mp->b_rptr; 875 876 /* 877 * Make sure the header is sane 878 */ 879 if (hdrsz < TUDERRORINDSZ || 880 hdrsz < (pptr->uderror_ind.OPT_length + 881 pptr->uderror_ind.OPT_offset) || 882 hdrsz < (pptr->uderror_ind.DEST_length + 883 pptr->uderror_ind.DEST_offset)) { 884 mutex_exit(&rmp->rm_lock); 885 freemsg(mp); 886 return; 887 } 888 889 /* 890 * In the case where a unit data error has been 891 * received, all we need to do is clear the message from 892 * the queue. 893 */ 894 mutex_exit(&rmp->rm_lock); 895 freemsg(mp); 896 RPCLOG(32, "rpcmodrput: unitdata error received at " 897 "%ld\n", gethrestime_sec()); 898 return; 899 } /* end else if (pptr->type == T_UDERROR_IND) */ 900 901 putnext(q, mp); 902 break; 903 } /* end switch (mp->b_datap->db_type) */ 904 905 TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_END, 906 "rpcmodrput_end:"); 907 /* 908 * Return codes are not looked at by the STREAMS framework. 909 */ 910 } 911 912 /* 913 * write put procedure 914 */ 915 void 916 rpcmodwput(queue_t *q, mblk_t *mp) 917 { 918 struct rpcm *rmp; 919 920 ASSERT(q != NULL); 921 922 switch (mp->b_datap->db_type) { 923 case M_PROTO: 924 case M_PCPROTO: 925 break; 926 default: 927 rpcmodwput_other(q, mp); 928 return; 929 } 930 931 /* 932 * Check to see if we can send the message downstream. 933 */ 934 if (canputnext(q)) { 935 putnext(q, mp); 936 return; 937 } 938 939 rmp = (struct rpcm *)q->q_ptr; 940 ASSERT(rmp != NULL); 941 942 /* 943 * The first canputnext failed. Try again except this time with the 944 * lock held, so that we can check the state of the stream to see if 945 * it is closing. If either of these conditions evaluate to true 946 * then send the meesage. 947 */ 948 mutex_enter(&rmp->rm_lock); 949 if (canputnext(q) || (rmp->rm_state & RM_CLOSING)) { 950 mutex_exit(&rmp->rm_lock); 951 putnext(q, mp); 952 } else { 953 /* 954 * canputnext failed again and the stream is not closing. 955 * Place the message on the queue and let the service 956 * procedure handle the message. 957 */ 958 mutex_exit(&rmp->rm_lock); 959 (void) putq(q, mp); 960 } 961 } 962 963 static void 964 rpcmodwput_other(queue_t *q, mblk_t *mp) 965 { 966 struct rpcm *rmp; 967 struct iocblk *iocp; 968 969 rmp = (struct rpcm *)q->q_ptr; 970 ASSERT(rmp != NULL); 971 972 switch (mp->b_datap->db_type) { 973 case M_IOCTL: 974 iocp = (struct iocblk *)mp->b_rptr; 975 ASSERT(iocp != NULL); 976 switch (iocp->ioc_cmd) { 977 case RPC_CLIENT: 978 case RPC_SERVER: 979 mutex_enter(&rmp->rm_lock); 980 rmp->rm_type = iocp->ioc_cmd; 981 mutex_exit(&rmp->rm_lock); 982 mp->b_datap->db_type = M_IOCACK; 983 qreply(q, mp); 984 return; 985 default: 986 /* 987 * pass the ioctl downstream and hope someone 988 * down there knows how to handle it. 989 */ 990 putnext(q, mp); 991 return; 992 } 993 default: 994 break; 995 } 996 /* 997 * This is something we definitely do not know how to handle, just 998 * pass the message downstream 999 */ 1000 putnext(q, mp); 1001 } 1002 1003 /* 1004 * Module write service procedure. This is called by downstream modules 1005 * for back enabling during flow control. 1006 */ 1007 void 1008 rpcmodwsrv(queue_t *q) 1009 { 1010 struct rpcm *rmp; 1011 mblk_t *mp = NULL; 1012 1013 rmp = (struct rpcm *)q->q_ptr; 1014 ASSERT(rmp != NULL); 1015 1016 /* 1017 * Get messages that may be queued and send them down stream 1018 */ 1019 while ((mp = getq(q)) != NULL) { 1020 /* 1021 * Optimize the service procedure for the server-side, by 1022 * avoiding a call to canputnext(). 1023 */ 1024 if (rmp->rm_type == RPC_SERVER || canputnext(q)) { 1025 putnext(q, mp); 1026 continue; 1027 } 1028 (void) putbq(q, mp); 1029 return; 1030 } 1031 } 1032 1033 static void 1034 rpcmod_release(queue_t *q, mblk_t *bp) 1035 { 1036 struct rpcm *rmp; 1037 1038 /* 1039 * For now, just free the message. 1040 */ 1041 if (bp) 1042 freemsg(bp); 1043 rmp = (struct rpcm *)q->q_ptr; 1044 1045 mutex_enter(&rmp->rm_lock); 1046 rmp->rm_ref--; 1047 1048 if (rmp->rm_ref == 0 && (rmp->rm_state & RM_CLOSING)) { 1049 cv_broadcast(&rmp->rm_cwait); 1050 } 1051 1052 mutex_exit(&rmp->rm_lock); 1053 } 1054 1055 /* 1056 * This part of rpcmod is pushed on a connection-oriented transport for use 1057 * by RPC. It serves to bypass the Stream head, implements 1058 * the record marking protocol, and dispatches incoming RPC messages. 1059 */ 1060 1061 /* Default idle timer values */ 1062 #define MIR_CLNT_IDLE_TIMEOUT (5 * (60 * 1000L)) /* 5 minutes */ 1063 #define MIR_SVC_IDLE_TIMEOUT (6 * (60 * 1000L)) /* 6 minutes */ 1064 #define MIR_SVC_ORDREL_TIMEOUT (10 * (60 * 1000L)) /* 10 minutes */ 1065 #define MIR_LASTFRAG 0x80000000 /* Record marker */ 1066 1067 #define MIR_SVC_QUIESCED(mir) \ 1068 (mir->mir_ref_cnt == 0 && mir->mir_inrservice == 0) 1069 1070 #define MIR_CLEAR_INRSRV(mir_ptr) { \ 1071 (mir_ptr)->mir_inrservice = 0; \ 1072 if ((mir_ptr)->mir_type == RPC_SERVER && \ 1073 (mir_ptr)->mir_closing) \ 1074 cv_signal(&(mir_ptr)->mir_condvar); \ 1075 } 1076 1077 /* 1078 * Don't block service procedure (and mir_close) if 1079 * we are in the process of closing. 1080 */ 1081 #define MIR_WCANPUTNEXT(mir_ptr, write_q) \ 1082 (canputnext(write_q) || ((mir_ptr)->mir_svc_no_more_msgs == 1)) 1083 1084 static int mir_clnt_dup_request(queue_t *q, mblk_t *mp); 1085 static void mir_rput_proto(queue_t *q, mblk_t *mp); 1086 static int mir_svc_policy_notify(queue_t *q, int event); 1087 static void mir_svc_release(queue_t *wq, mblk_t *mp); 1088 static void mir_svc_start(queue_t *wq); 1089 static void mir_svc_idle_start(queue_t *, mir_t *); 1090 static void mir_svc_idle_stop(queue_t *, mir_t *); 1091 static void mir_svc_start_close(queue_t *, mir_t *); 1092 static void mir_clnt_idle_do_stop(queue_t *); 1093 static void mir_clnt_idle_stop(queue_t *, mir_t *); 1094 static void mir_clnt_idle_start(queue_t *, mir_t *); 1095 static void mir_wput(queue_t *q, mblk_t *mp); 1096 static void mir_wput_other(queue_t *q, mblk_t *mp); 1097 static void mir_wsrv(queue_t *q); 1098 static void mir_disconnect(queue_t *, mir_t *ir); 1099 static int mir_check_len(queue_t *, int32_t, mblk_t *); 1100 static void mir_timer(void *); 1101 1102 extern void (*mir_rele)(queue_t *, mblk_t *); 1103 extern void (*mir_start)(queue_t *); 1104 extern void (*clnt_stop_idle)(queue_t *); 1105 1106 clock_t clnt_idle_timeout = MIR_CLNT_IDLE_TIMEOUT; 1107 clock_t svc_idle_timeout = MIR_SVC_IDLE_TIMEOUT; 1108 1109 /* 1110 * Timeout for subsequent notifications of idle connection. This is 1111 * typically used to clean up after a wedged orderly release. 1112 */ 1113 clock_t svc_ordrel_timeout = MIR_SVC_ORDREL_TIMEOUT; /* milliseconds */ 1114 1115 extern uint_t *clnt_max_msg_sizep; 1116 extern uint_t *svc_max_msg_sizep; 1117 uint_t clnt_max_msg_size = RPC_MAXDATASIZE; 1118 uint_t svc_max_msg_size = RPC_MAXDATASIZE; 1119 uint_t mir_krpc_cell_null; 1120 1121 static void 1122 mir_timer_stop(mir_t *mir) 1123 { 1124 timeout_id_t tid; 1125 1126 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1127 1128 /* 1129 * Since the mir_mutex lock needs to be released to call 1130 * untimeout(), we need to make sure that no other thread 1131 * can start/stop the timer (changing mir_timer_id) during 1132 * that time. The mir_timer_call bit and the mir_timer_cv 1133 * condition variable are used to synchronize this. Setting 1134 * mir_timer_call also tells mir_timer() (refer to the comments 1135 * in mir_timer()) that it does not need to do anything. 1136 */ 1137 while (mir->mir_timer_call) 1138 cv_wait(&mir->mir_timer_cv, &mir->mir_mutex); 1139 mir->mir_timer_call = B_TRUE; 1140 1141 if ((tid = mir->mir_timer_id) != 0) { 1142 mir->mir_timer_id = 0; 1143 mutex_exit(&mir->mir_mutex); 1144 (void) untimeout(tid); 1145 mutex_enter(&mir->mir_mutex); 1146 } 1147 mir->mir_timer_call = B_FALSE; 1148 cv_broadcast(&mir->mir_timer_cv); 1149 } 1150 1151 static void 1152 mir_timer_start(queue_t *q, mir_t *mir, clock_t intrvl) 1153 { 1154 timeout_id_t tid; 1155 1156 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1157 1158 while (mir->mir_timer_call) 1159 cv_wait(&mir->mir_timer_cv, &mir->mir_mutex); 1160 mir->mir_timer_call = B_TRUE; 1161 1162 if ((tid = mir->mir_timer_id) != 0) { 1163 mutex_exit(&mir->mir_mutex); 1164 (void) untimeout(tid); 1165 mutex_enter(&mir->mir_mutex); 1166 } 1167 /* Only start the timer when it is not closing. */ 1168 if (!mir->mir_closing) { 1169 mir->mir_timer_id = timeout(mir_timer, q, 1170 MSEC_TO_TICK(intrvl)); 1171 } 1172 mir->mir_timer_call = B_FALSE; 1173 cv_broadcast(&mir->mir_timer_cv); 1174 } 1175 1176 static int 1177 mir_clnt_dup_request(queue_t *q, mblk_t *mp) 1178 { 1179 mblk_t *mp1; 1180 uint32_t new_xid; 1181 uint32_t old_xid; 1182 1183 ASSERT(MUTEX_HELD(&((mir_t *)q->q_ptr)->mir_mutex)); 1184 new_xid = BE32_TO_U32(&mp->b_rptr[4]); 1185 /* 1186 * This loop is a bit tacky -- it walks the STREAMS list of 1187 * flow-controlled messages. 1188 */ 1189 if ((mp1 = q->q_first) != NULL) { 1190 do { 1191 old_xid = BE32_TO_U32(&mp1->b_rptr[4]); 1192 if (new_xid == old_xid) 1193 return (1); 1194 } while ((mp1 = mp1->b_next) != NULL); 1195 } 1196 return (0); 1197 } 1198 1199 static int 1200 mir_close(queue_t *q) 1201 { 1202 mir_t *mir = q->q_ptr; 1203 mblk_t *mp; 1204 bool_t queue_cleaned = FALSE; 1205 1206 RPCLOG(32, "rpcmod: mir_close of q 0x%p\n", (void *)q); 1207 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1208 mutex_enter(&mir->mir_mutex); 1209 if ((mp = mir->mir_head_mp) != NULL) { 1210 mir->mir_head_mp = NULL; 1211 mir->mir_tail_mp = NULL; 1212 freemsg(mp); 1213 } 1214 /* 1215 * Set mir_closing so we get notified when MIR_SVC_QUIESCED() 1216 * is TRUE. And mir_timer_start() won't start the timer again. 1217 */ 1218 mir->mir_closing = B_TRUE; 1219 mir_timer_stop(mir); 1220 1221 if (mir->mir_type == RPC_SERVER) { 1222 flushq(q, FLUSHDATA); /* Ditch anything waiting on read q */ 1223 1224 /* 1225 * This will prevent more requests from arriving and 1226 * will force rpcmod to ignore flow control. 1227 */ 1228 mir_svc_start_close(WR(q), mir); 1229 1230 while ((!MIR_SVC_QUIESCED(mir)) || mir->mir_inwservice == 1) { 1231 1232 if (mir->mir_ref_cnt && !mir->mir_inrservice && 1233 (queue_cleaned == FALSE)) { 1234 /* 1235 * call into SVC to clean the queue 1236 */ 1237 mutex_exit(&mir->mir_mutex); 1238 svc_queueclean(q); 1239 queue_cleaned = TRUE; 1240 mutex_enter(&mir->mir_mutex); 1241 continue; 1242 } 1243 1244 /* 1245 * Bugid 1253810 - Force the write service 1246 * procedure to send its messages, regardless 1247 * whether the downstream module is ready 1248 * to accept data. 1249 */ 1250 if (mir->mir_inwservice == 1) 1251 qenable(WR(q)); 1252 1253 cv_wait(&mir->mir_condvar, &mir->mir_mutex); 1254 } 1255 1256 mutex_exit(&mir->mir_mutex); 1257 qprocsoff(q); 1258 1259 /* Notify KRPC that this stream is going away. */ 1260 svc_queueclose(q); 1261 } else { 1262 mutex_exit(&mir->mir_mutex); 1263 qprocsoff(q); 1264 } 1265 1266 mutex_destroy(&mir->mir_mutex); 1267 cv_destroy(&mir->mir_condvar); 1268 cv_destroy(&mir->mir_timer_cv); 1269 kmem_free(mir, sizeof (mir_t)); 1270 return (0); 1271 } 1272 1273 /* 1274 * This is server side only (RPC_SERVER). 1275 * 1276 * Exit idle mode. 1277 */ 1278 static void 1279 mir_svc_idle_stop(queue_t *q, mir_t *mir) 1280 { 1281 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1282 ASSERT((q->q_flag & QREADR) == 0); 1283 ASSERT(mir->mir_type == RPC_SERVER); 1284 RPCLOG(16, "rpcmod: mir_svc_idle_stop of q 0x%p\n", (void *)q); 1285 1286 mir_timer_stop(mir); 1287 } 1288 1289 /* 1290 * This is server side only (RPC_SERVER). 1291 * 1292 * Start idle processing, which will include setting idle timer if the 1293 * stream is not being closed. 1294 */ 1295 static void 1296 mir_svc_idle_start(queue_t *q, mir_t *mir) 1297 { 1298 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1299 ASSERT((q->q_flag & QREADR) == 0); 1300 ASSERT(mir->mir_type == RPC_SERVER); 1301 RPCLOG(16, "rpcmod: mir_svc_idle_start q 0x%p\n", (void *)q); 1302 1303 /* 1304 * Don't re-start idle timer if we are closing queues. 1305 */ 1306 if (mir->mir_closing) { 1307 RPCLOG(16, "mir_svc_idle_start - closing: 0x%p\n", 1308 (void *)q); 1309 1310 /* 1311 * We will call mir_svc_idle_start() whenever MIR_SVC_QUIESCED() 1312 * is true. When it is true, and we are in the process of 1313 * closing the stream, signal any thread waiting in 1314 * mir_close(). 1315 */ 1316 if (mir->mir_inwservice == 0) 1317 cv_signal(&mir->mir_condvar); 1318 1319 } else { 1320 RPCLOG(16, "mir_svc_idle_start - reset %s timer\n", 1321 mir->mir_ordrel_pending ? "ordrel" : "normal"); 1322 /* 1323 * Normal condition, start the idle timer. If an orderly 1324 * release has been sent, set the timeout to wait for the 1325 * client to close its side of the connection. Otherwise, 1326 * use the normal idle timeout. 1327 */ 1328 mir_timer_start(q, mir, mir->mir_ordrel_pending ? 1329 svc_ordrel_timeout : mir->mir_idle_timeout); 1330 } 1331 } 1332 1333 /* ARGSUSED */ 1334 static int 1335 mir_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1336 { 1337 mir_t *mir; 1338 1339 RPCLOG(32, "rpcmod: mir_open of q 0x%p\n", (void *)q); 1340 /* Set variables used directly by KRPC. */ 1341 if (!mir_rele) 1342 mir_rele = mir_svc_release; 1343 if (!mir_start) 1344 mir_start = mir_svc_start; 1345 if (!clnt_stop_idle) 1346 clnt_stop_idle = mir_clnt_idle_do_stop; 1347 if (!clnt_max_msg_sizep) 1348 clnt_max_msg_sizep = &clnt_max_msg_size; 1349 if (!svc_max_msg_sizep) 1350 svc_max_msg_sizep = &svc_max_msg_size; 1351 1352 /* Allocate a zero'ed out mir structure for this stream. */ 1353 mir = kmem_zalloc(sizeof (mir_t), KM_SLEEP); 1354 1355 /* 1356 * We set hold inbound here so that incoming messages will 1357 * be held on the read-side queue until the stream is completely 1358 * initialized with a RPC_CLIENT or RPC_SERVER ioctl. During 1359 * the ioctl processing, the flag is cleared and any messages that 1360 * arrived between the open and the ioctl are delivered to KRPC. 1361 * 1362 * Early data should never arrive on a client stream since 1363 * servers only respond to our requests and we do not send any. 1364 * until after the stream is initialized. Early data is 1365 * very common on a server stream where the client will start 1366 * sending data as soon as the connection is made (and this 1367 * is especially true with TCP where the protocol accepts the 1368 * connection before nfsd or KRPC is notified about it). 1369 */ 1370 1371 mir->mir_hold_inbound = 1; 1372 1373 /* 1374 * Start the record marker looking for a 4-byte header. When 1375 * this length is negative, it indicates that rpcmod is looking 1376 * for bytes to consume for the record marker header. When it 1377 * is positive, it holds the number of bytes that have arrived 1378 * for the current fragment and are being held in mir_header_mp. 1379 */ 1380 1381 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 1382 1383 mir->mir_zoneid = rpc_zoneid(); 1384 mutex_init(&mir->mir_mutex, NULL, MUTEX_DEFAULT, NULL); 1385 cv_init(&mir->mir_condvar, NULL, CV_DRIVER, NULL); 1386 cv_init(&mir->mir_timer_cv, NULL, CV_DRIVER, NULL); 1387 1388 q->q_ptr = (char *)mir; 1389 WR(q)->q_ptr = (char *)mir; 1390 1391 /* 1392 * We noenable the read-side queue because we don't want it 1393 * automatically enabled by putq. We enable it explicitly 1394 * in mir_wsrv when appropriate. (See additional comments on 1395 * flow control at the beginning of mir_rsrv.) 1396 */ 1397 noenable(q); 1398 1399 qprocson(q); 1400 return (0); 1401 } 1402 1403 /* 1404 * Read-side put routine for both the client and server side. Does the 1405 * record marking for incoming RPC messages, and when complete, dispatches 1406 * the message to either the client or server. 1407 */ 1408 static void 1409 mir_rput(queue_t *q, mblk_t *mp) 1410 { 1411 int excess; 1412 int32_t frag_len, frag_header; 1413 mblk_t *cont_mp, *head_mp, *tail_mp, *mp1; 1414 mir_t *mir = q->q_ptr; 1415 boolean_t stop_timer = B_FALSE; 1416 1417 ASSERT(mir != NULL); 1418 1419 /* 1420 * If the stream has not been set up as a RPC_CLIENT or RPC_SERVER 1421 * with the corresponding ioctl, then don't accept 1422 * any inbound data. This should never happen for streams 1423 * created by nfsd or client-side KRPC because they are careful 1424 * to set the mode of the stream before doing anything else. 1425 */ 1426 if (mir->mir_type == 0) { 1427 freemsg(mp); 1428 return; 1429 } 1430 1431 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1432 1433 switch (mp->b_datap->db_type) { 1434 case M_DATA: 1435 break; 1436 case M_PROTO: 1437 case M_PCPROTO: 1438 if (MBLKL(mp) < sizeof (t_scalar_t)) { 1439 RPCLOG(1, "mir_rput: runt TPI message (%d bytes)\n", 1440 (int)MBLKL(mp)); 1441 freemsg(mp); 1442 return; 1443 } 1444 if (((union T_primitives *)mp->b_rptr)->type != T_DATA_IND) { 1445 mir_rput_proto(q, mp); 1446 return; 1447 } 1448 1449 /* Throw away the T_DATA_IND block and continue with data. */ 1450 mp1 = mp; 1451 mp = mp->b_cont; 1452 freeb(mp1); 1453 break; 1454 case M_SETOPTS: 1455 /* 1456 * If a module on the stream is trying set the Stream head's 1457 * high water mark, then set our hiwater to the requested 1458 * value. We are the "stream head" for all inbound 1459 * data messages since messages are passed directly to KRPC. 1460 */ 1461 if (MBLKL(mp) >= sizeof (struct stroptions)) { 1462 struct stroptions *stropts; 1463 1464 stropts = (struct stroptions *)mp->b_rptr; 1465 if ((stropts->so_flags & SO_HIWAT) && 1466 !(stropts->so_flags & SO_BAND)) { 1467 (void) strqset(q, QHIWAT, 0, stropts->so_hiwat); 1468 } 1469 } 1470 putnext(q, mp); 1471 return; 1472 case M_FLUSH: 1473 RPCLOG(32, "mir_rput: ignoring M_FLUSH %x ", *mp->b_rptr); 1474 RPCLOG(32, "on q 0x%p\n", (void *)q); 1475 putnext(q, mp); 1476 return; 1477 default: 1478 putnext(q, mp); 1479 return; 1480 } 1481 1482 mutex_enter(&mir->mir_mutex); 1483 1484 /* 1485 * If this connection is closing, don't accept any new messages. 1486 */ 1487 if (mir->mir_svc_no_more_msgs) { 1488 ASSERT(mir->mir_type == RPC_SERVER); 1489 mutex_exit(&mir->mir_mutex); 1490 freemsg(mp); 1491 return; 1492 } 1493 1494 /* Get local copies for quicker access. */ 1495 frag_len = mir->mir_frag_len; 1496 frag_header = mir->mir_frag_header; 1497 head_mp = mir->mir_head_mp; 1498 tail_mp = mir->mir_tail_mp; 1499 1500 /* Loop, processing each message block in the mp chain separately. */ 1501 do { 1502 cont_mp = mp->b_cont; 1503 mp->b_cont = NULL; 1504 1505 /* 1506 * Drop zero-length mblks to prevent unbounded kernel memory 1507 * consumption. 1508 */ 1509 if (MBLKL(mp) == 0) { 1510 freeb(mp); 1511 continue; 1512 } 1513 1514 /* 1515 * If frag_len is negative, we're still in the process of 1516 * building frag_header -- try to complete it with this mblk. 1517 */ 1518 while (frag_len < 0 && mp->b_rptr < mp->b_wptr) { 1519 frag_len++; 1520 frag_header <<= 8; 1521 frag_header += *mp->b_rptr++; 1522 } 1523 1524 if (MBLKL(mp) == 0 && frag_len < 0) { 1525 /* 1526 * We consumed this mblk while trying to complete the 1527 * fragment header. Free it and move on. 1528 */ 1529 freeb(mp); 1530 continue; 1531 } 1532 1533 ASSERT(frag_len >= 0); 1534 1535 /* 1536 * Now frag_header has the number of bytes in this fragment 1537 * and we're just waiting to collect them all. Chain our 1538 * latest mblk onto the list and see if we now have enough 1539 * bytes to complete the fragment. 1540 */ 1541 if (head_mp == NULL) { 1542 ASSERT(tail_mp == NULL); 1543 head_mp = tail_mp = mp; 1544 } else { 1545 tail_mp->b_cont = mp; 1546 tail_mp = mp; 1547 } 1548 1549 frag_len += MBLKL(mp); 1550 excess = frag_len - (frag_header & ~MIR_LASTFRAG); 1551 if (excess < 0) { 1552 /* 1553 * We still haven't received enough data to complete 1554 * the fragment, so continue on to the next mblk. 1555 */ 1556 continue; 1557 } 1558 1559 /* 1560 * We've got a complete fragment. If there are excess bytes, 1561 * then they're part of the next fragment's header (of either 1562 * this RPC message or the next RPC message). Split that part 1563 * into its own mblk so that we can safely freeb() it when 1564 * building frag_header above. 1565 */ 1566 if (excess > 0) { 1567 if ((mp1 = dupb(mp)) == NULL && 1568 (mp1 = copyb(mp)) == NULL) { 1569 freemsg(head_mp); 1570 freemsg(cont_mp); 1571 RPCLOG0(1, "mir_rput: dupb/copyb failed\n"); 1572 mir->mir_frag_header = 0; 1573 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 1574 mir->mir_head_mp = NULL; 1575 mir->mir_tail_mp = NULL; 1576 mir_disconnect(q, mir); /* drops mir_mutex */ 1577 return; 1578 } 1579 1580 /* 1581 * Relink the message chain so that the next mblk is 1582 * the next fragment header, followed by the rest of 1583 * the message chain. 1584 */ 1585 mp1->b_cont = cont_mp; 1586 cont_mp = mp1; 1587 1588 /* 1589 * Data in the new mblk begins at the next fragment, 1590 * and data in the old mblk ends at the next fragment. 1591 */ 1592 mp1->b_rptr = mp1->b_wptr - excess; 1593 mp->b_wptr -= excess; 1594 } 1595 1596 /* 1597 * Reset frag_len and frag_header for the next fragment. 1598 */ 1599 frag_len = -(int32_t)sizeof (uint32_t); 1600 if (!(frag_header & MIR_LASTFRAG)) { 1601 /* 1602 * The current fragment is complete, but more 1603 * fragments need to be processed before we can 1604 * pass along the RPC message headed at head_mp. 1605 */ 1606 frag_header = 0; 1607 continue; 1608 } 1609 frag_header = 0; 1610 1611 /* 1612 * We've got a complete RPC message; pass it to the 1613 * appropriate consumer. 1614 */ 1615 switch (mir->mir_type) { 1616 case RPC_CLIENT: 1617 if (clnt_dispatch_notify(head_mp, mir->mir_zoneid)) { 1618 /* 1619 * Mark this stream as active. This marker 1620 * is used in mir_timer(). 1621 */ 1622 mir->mir_clntreq = 1; 1623 mir->mir_use_timestamp = ddi_get_lbolt(); 1624 } else { 1625 freemsg(head_mp); 1626 } 1627 break; 1628 1629 case RPC_SERVER: 1630 /* 1631 * Check for flow control before passing the 1632 * message to KRPC. 1633 */ 1634 if (!mir->mir_hold_inbound) { 1635 if (mir->mir_krpc_cell) { 1636 /* 1637 * If the reference count is 0 1638 * (not including this request), 1639 * then the stream is transitioning 1640 * from idle to non-idle. In this case, 1641 * we cancel the idle timer. 1642 */ 1643 if (mir->mir_ref_cnt++ == 0) 1644 stop_timer = B_TRUE; 1645 if (mir_check_len(q, 1646 (int32_t)msgdsize(mp), mp)) 1647 return; 1648 svc_queuereq(q, head_mp); /* to KRPC */ 1649 } else { 1650 /* 1651 * Count # of times this happens. Should 1652 * be never, but experience shows 1653 * otherwise. 1654 */ 1655 mir_krpc_cell_null++; 1656 freemsg(head_mp); 1657 } 1658 } else { 1659 /* 1660 * If the outbound side of the stream is 1661 * flow controlled, then hold this message 1662 * until client catches up. mir_hold_inbound 1663 * is set in mir_wput and cleared in mir_wsrv. 1664 */ 1665 (void) putq(q, head_mp); 1666 mir->mir_inrservice = B_TRUE; 1667 } 1668 break; 1669 default: 1670 RPCLOG(1, "mir_rput: unknown mir_type %d\n", 1671 mir->mir_type); 1672 freemsg(head_mp); 1673 break; 1674 } 1675 1676 /* 1677 * Reset the chain since we're starting on a new RPC message. 1678 */ 1679 head_mp = tail_mp = NULL; 1680 } while ((mp = cont_mp) != NULL); 1681 1682 /* 1683 * Sanity check the message length; if it's too large mir_check_len() 1684 * will shutdown the connection, drop mir_mutex, and return non-zero. 1685 */ 1686 if (head_mp != NULL && mir->mir_setup_complete && 1687 mir_check_len(q, frag_len, head_mp)) 1688 return; 1689 1690 /* Save our local copies back in the mir structure. */ 1691 mir->mir_frag_header = frag_header; 1692 mir->mir_frag_len = frag_len; 1693 mir->mir_head_mp = head_mp; 1694 mir->mir_tail_mp = tail_mp; 1695 1696 /* 1697 * The timer is stopped after the whole message chain is processed. 1698 * The reason is that stopping the timer releases the mir_mutex 1699 * lock temporarily. This means that the request can be serviced 1700 * while we are still processing the message chain. This is not 1701 * good. So we stop the timer here instead. 1702 * 1703 * Note that if the timer fires before we stop it, it will not 1704 * do any harm as MIR_SVC_QUIESCED() is false and mir_timer() 1705 * will just return. 1706 */ 1707 if (stop_timer) { 1708 RPCLOG(16, "mir_rput: stopping idle timer on 0x%p because " 1709 "ref cnt going to non zero\n", (void *)WR(q)); 1710 mir_svc_idle_stop(WR(q), mir); 1711 } 1712 mutex_exit(&mir->mir_mutex); 1713 } 1714 1715 static void 1716 mir_rput_proto(queue_t *q, mblk_t *mp) 1717 { 1718 mir_t *mir = (mir_t *)q->q_ptr; 1719 uint32_t type; 1720 uint32_t reason = 0; 1721 1722 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1723 1724 type = ((union T_primitives *)mp->b_rptr)->type; 1725 switch (mir->mir_type) { 1726 case RPC_CLIENT: 1727 switch (type) { 1728 case T_DISCON_IND: 1729 reason = ((struct T_discon_ind *) 1730 (mp->b_rptr))->DISCON_reason; 1731 /*FALLTHROUGH*/ 1732 case T_ORDREL_IND: 1733 mutex_enter(&mir->mir_mutex); 1734 if (mir->mir_head_mp) { 1735 freemsg(mir->mir_head_mp); 1736 mir->mir_head_mp = (mblk_t *)0; 1737 mir->mir_tail_mp = (mblk_t *)0; 1738 } 1739 /* 1740 * We are disconnecting, but not necessarily 1741 * closing. By not closing, we will fail to 1742 * pick up a possibly changed global timeout value, 1743 * unless we store it now. 1744 */ 1745 mir->mir_idle_timeout = clnt_idle_timeout; 1746 mir_clnt_idle_stop(WR(q), mir); 1747 1748 /* 1749 * Even though we are unconnected, we still 1750 * leave the idle timer going on the client. The 1751 * reason for is that if we've disconnected due 1752 * to a server-side disconnect, reset, or connection 1753 * timeout, there is a possibility the client may 1754 * retry the RPC request. This retry needs to done on 1755 * the same bound address for the server to interpret 1756 * it as such. However, we don't want 1757 * to wait forever for that possibility. If the 1758 * end-point stays unconnected for mir_idle_timeout 1759 * units of time, then that is a signal to the 1760 * connection manager to give up waiting for the 1761 * application (eg. NFS) to send a retry. 1762 */ 1763 mir_clnt_idle_start(WR(q), mir); 1764 mutex_exit(&mir->mir_mutex); 1765 clnt_dispatch_notifyall(WR(q), type, reason); 1766 freemsg(mp); 1767 return; 1768 case T_ERROR_ACK: 1769 { 1770 struct T_error_ack *terror; 1771 1772 terror = (struct T_error_ack *)mp->b_rptr; 1773 RPCLOG(1, "mir_rput_proto T_ERROR_ACK for queue 0x%p", 1774 (void *)q); 1775 RPCLOG(1, " ERROR_prim: %s,", 1776 rpc_tpiprim2name(terror->ERROR_prim)); 1777 RPCLOG(1, " TLI_error: %s,", 1778 rpc_tpierr2name(terror->TLI_error)); 1779 RPCLOG(1, " UNIX_error: %d\n", terror->UNIX_error); 1780 if (terror->ERROR_prim == T_DISCON_REQ) { 1781 clnt_dispatch_notifyall(WR(q), type, reason); 1782 freemsg(mp); 1783 return; 1784 } else { 1785 if (clnt_dispatch_notifyconn(WR(q), mp)) 1786 return; 1787 } 1788 break; 1789 } 1790 case T_OK_ACK: 1791 { 1792 struct T_ok_ack *tok = (struct T_ok_ack *)mp->b_rptr; 1793 1794 if (tok->CORRECT_prim == T_DISCON_REQ) { 1795 clnt_dispatch_notifyall(WR(q), type, reason); 1796 freemsg(mp); 1797 return; 1798 } else { 1799 if (clnt_dispatch_notifyconn(WR(q), mp)) 1800 return; 1801 } 1802 break; 1803 } 1804 case T_CONN_CON: 1805 case T_INFO_ACK: 1806 case T_OPTMGMT_ACK: 1807 if (clnt_dispatch_notifyconn(WR(q), mp)) 1808 return; 1809 break; 1810 case T_BIND_ACK: 1811 break; 1812 default: 1813 RPCLOG(1, "mir_rput: unexpected message %d " 1814 "for KRPC client\n", 1815 ((union T_primitives *)mp->b_rptr)->type); 1816 break; 1817 } 1818 break; 1819 1820 case RPC_SERVER: 1821 switch (type) { 1822 case T_BIND_ACK: 1823 { 1824 struct T_bind_ack *tbind; 1825 1826 /* 1827 * If this is a listening stream, then shut 1828 * off the idle timer. 1829 */ 1830 tbind = (struct T_bind_ack *)mp->b_rptr; 1831 if (tbind->CONIND_number > 0) { 1832 mutex_enter(&mir->mir_mutex); 1833 mir_svc_idle_stop(WR(q), mir); 1834 1835 /* 1836 * mark this as a listen endpoint 1837 * for special handling. 1838 */ 1839 1840 mir->mir_listen_stream = 1; 1841 mutex_exit(&mir->mir_mutex); 1842 } 1843 break; 1844 } 1845 case T_DISCON_IND: 1846 case T_ORDREL_IND: 1847 RPCLOG(16, "mir_rput_proto: got %s indication\n", 1848 type == T_DISCON_IND ? "disconnect" 1849 : "orderly release"); 1850 1851 /* 1852 * For listen endpoint just pass 1853 * on the message. 1854 */ 1855 1856 if (mir->mir_listen_stream) 1857 break; 1858 1859 mutex_enter(&mir->mir_mutex); 1860 1861 /* 1862 * If client wants to break off connection, record 1863 * that fact. 1864 */ 1865 mir_svc_start_close(WR(q), mir); 1866 1867 /* 1868 * If we are idle, then send the orderly release 1869 * or disconnect indication to nfsd. 1870 */ 1871 if (MIR_SVC_QUIESCED(mir)) { 1872 mutex_exit(&mir->mir_mutex); 1873 break; 1874 } 1875 1876 RPCLOG(16, "mir_rput_proto: not idle, so " 1877 "disconnect/ord rel indication not passed " 1878 "upstream on 0x%p\n", (void *)q); 1879 1880 /* 1881 * Hold the indication until we get idle 1882 * If there already is an indication stored, 1883 * replace it if the new one is a disconnect. The 1884 * reasoning is that disconnection takes less time 1885 * to process, and once a client decides to 1886 * disconnect, we should do that. 1887 */ 1888 if (mir->mir_svc_pend_mp) { 1889 if (type == T_DISCON_IND) { 1890 RPCLOG(16, "mir_rput_proto: replacing" 1891 " held disconnect/ord rel" 1892 " indication with disconnect on" 1893 " 0x%p\n", (void *)q); 1894 1895 freemsg(mir->mir_svc_pend_mp); 1896 mir->mir_svc_pend_mp = mp; 1897 } else { 1898 RPCLOG(16, "mir_rput_proto: already " 1899 "held a disconnect/ord rel " 1900 "indication. freeing ord rel " 1901 "ind on 0x%p\n", (void *)q); 1902 freemsg(mp); 1903 } 1904 } else 1905 mir->mir_svc_pend_mp = mp; 1906 1907 mutex_exit(&mir->mir_mutex); 1908 return; 1909 1910 default: 1911 /* nfsd handles server-side non-data messages. */ 1912 break; 1913 } 1914 break; 1915 1916 default: 1917 break; 1918 } 1919 1920 putnext(q, mp); 1921 } 1922 1923 /* 1924 * The server-side read queues are used to hold inbound messages while 1925 * outbound flow control is exerted. When outbound flow control is 1926 * relieved, mir_wsrv qenables the read-side queue. Read-side queues 1927 * are not enabled by STREAMS and are explicitly noenable'ed in mir_open. 1928 * 1929 * For the server side, we have two types of messages queued. The first type 1930 * are messages that are ready to be XDR decoded and and then sent to the 1931 * RPC program's dispatch routine. The second type are "raw" messages that 1932 * haven't been processed, i.e. assembled from rpc record fragements into 1933 * full requests. The only time we will see the second type of message 1934 * queued is if we have a memory allocation failure while processing a 1935 * a raw message. The field mir_first_non_processed_mblk will mark the 1936 * first such raw message. So the flow for server side is: 1937 * 1938 * - send processed queued messages to kRPC until we run out or find 1939 * one that needs additional processing because we were short on memory 1940 * earlier 1941 * - process a message that was deferred because of lack of 1942 * memory 1943 * - continue processing messages until the queue empties or we 1944 * have to stop because of lack of memory 1945 * - during each of the above phase, if the queue is empty and 1946 * there are no pending messages that were passed to the RPC 1947 * layer, send upstream the pending disconnect/ordrel indication if 1948 * there is one 1949 * 1950 * The read-side queue is also enabled by a bufcall callback if dupmsg 1951 * fails in mir_rput. 1952 */ 1953 static void 1954 mir_rsrv(queue_t *q) 1955 { 1956 mir_t *mir; 1957 mblk_t *mp; 1958 mblk_t *cmp = NULL; 1959 boolean_t stop_timer = B_FALSE; 1960 1961 mir = (mir_t *)q->q_ptr; 1962 mutex_enter(&mir->mir_mutex); 1963 1964 mp = NULL; 1965 switch (mir->mir_type) { 1966 case RPC_SERVER: 1967 if (mir->mir_ref_cnt == 0) 1968 mir->mir_hold_inbound = 0; 1969 if (mir->mir_hold_inbound) { 1970 1971 ASSERT(cmp == NULL); 1972 if (q->q_first == NULL) { 1973 1974 MIR_CLEAR_INRSRV(mir); 1975 1976 if (MIR_SVC_QUIESCED(mir)) { 1977 cmp = mir->mir_svc_pend_mp; 1978 mir->mir_svc_pend_mp = NULL; 1979 } 1980 } 1981 1982 mutex_exit(&mir->mir_mutex); 1983 1984 if (cmp != NULL) { 1985 RPCLOG(16, "mir_rsrv: line %d: sending a held " 1986 "disconnect/ord rel indication upstream\n", 1987 __LINE__); 1988 putnext(q, cmp); 1989 } 1990 1991 return; 1992 } 1993 while (mp = getq(q)) { 1994 if (mir->mir_krpc_cell && 1995 (mir->mir_svc_no_more_msgs == 0)) { 1996 /* 1997 * If we were idle, turn off idle timer since 1998 * we aren't idle any more. 1999 */ 2000 if (mir->mir_ref_cnt++ == 0) 2001 stop_timer = B_TRUE; 2002 if (mir_check_len(q, 2003 (int32_t)msgdsize(mp), mp)) 2004 return; 2005 svc_queuereq(q, mp); 2006 } else { 2007 /* 2008 * Count # of times this happens. Should be 2009 * never, but experience shows otherwise. 2010 */ 2011 if (mir->mir_krpc_cell == NULL) 2012 mir_krpc_cell_null++; 2013 freemsg(mp); 2014 } 2015 } 2016 break; 2017 case RPC_CLIENT: 2018 break; 2019 default: 2020 RPCLOG(1, "mir_rsrv: unexpected mir_type %d\n", mir->mir_type); 2021 2022 if (q->q_first == NULL) 2023 MIR_CLEAR_INRSRV(mir); 2024 2025 mutex_exit(&mir->mir_mutex); 2026 2027 return; 2028 } 2029 2030 /* 2031 * The timer is stopped after all the messages are processed. 2032 * The reason is that stopping the timer releases the mir_mutex 2033 * lock temporarily. This means that the request can be serviced 2034 * while we are still processing the message queue. This is not 2035 * good. So we stop the timer here instead. 2036 */ 2037 if (stop_timer) { 2038 RPCLOG(16, "mir_rsrv stopping idle timer on 0x%p because ref " 2039 "cnt going to non zero\n", (void *)WR(q)); 2040 mir_svc_idle_stop(WR(q), mir); 2041 } 2042 2043 if (q->q_first == NULL) { 2044 2045 MIR_CLEAR_INRSRV(mir); 2046 2047 ASSERT(cmp == NULL); 2048 if (mir->mir_type == RPC_SERVER && MIR_SVC_QUIESCED(mir)) { 2049 cmp = mir->mir_svc_pend_mp; 2050 mir->mir_svc_pend_mp = NULL; 2051 } 2052 2053 mutex_exit(&mir->mir_mutex); 2054 2055 if (cmp != NULL) { 2056 RPCLOG(16, "mir_rsrv: line %d: sending a held " 2057 "disconnect/ord rel indication upstream\n", 2058 __LINE__); 2059 putnext(q, cmp); 2060 } 2061 2062 return; 2063 } 2064 mutex_exit(&mir->mir_mutex); 2065 } 2066 2067 static int mir_svc_policy_fails; 2068 2069 /* 2070 * Called to send an event code to nfsd/lockd so that it initiates 2071 * connection close. 2072 */ 2073 static int 2074 mir_svc_policy_notify(queue_t *q, int event) 2075 { 2076 mblk_t *mp; 2077 #ifdef DEBUG 2078 mir_t *mir = (mir_t *)q->q_ptr; 2079 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2080 #endif 2081 ASSERT(q->q_flag & QREADR); 2082 2083 /* 2084 * Create an M_DATA message with the event code and pass it to the 2085 * Stream head (nfsd or whoever created the stream will consume it). 2086 */ 2087 mp = allocb(sizeof (int), BPRI_HI); 2088 2089 if (!mp) { 2090 2091 mir_svc_policy_fails++; 2092 RPCLOG(16, "mir_svc_policy_notify: could not allocate event " 2093 "%d\n", event); 2094 return (ENOMEM); 2095 } 2096 2097 U32_TO_BE32(event, mp->b_rptr); 2098 mp->b_wptr = mp->b_rptr + sizeof (int); 2099 putnext(q, mp); 2100 return (0); 2101 } 2102 2103 /* 2104 * Server side: start the close phase. We want to get this rpcmod slot in an 2105 * idle state before mir_close() is called. 2106 */ 2107 static void 2108 mir_svc_start_close(queue_t *wq, mir_t *mir) 2109 { 2110 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2111 ASSERT((wq->q_flag & QREADR) == 0); 2112 ASSERT(mir->mir_type == RPC_SERVER); 2113 2114 2115 /* 2116 * Do not accept any more messages. 2117 */ 2118 mir->mir_svc_no_more_msgs = 1; 2119 2120 /* 2121 * Next two statements will make the read service procedure invoke 2122 * svc_queuereq() on everything stuck in the streams read queue. 2123 * It's not necessary because enabling the write queue will 2124 * have the same effect, but why not speed the process along? 2125 */ 2126 mir->mir_hold_inbound = 0; 2127 qenable(RD(wq)); 2128 2129 /* 2130 * Meanwhile force the write service procedure to send the 2131 * responses downstream, regardless of flow control. 2132 */ 2133 qenable(wq); 2134 } 2135 2136 /* 2137 * This routine is called directly by KRPC after a request is completed, 2138 * whether a reply was sent or the request was dropped. 2139 */ 2140 static void 2141 mir_svc_release(queue_t *wq, mblk_t *mp) 2142 { 2143 mir_t *mir = (mir_t *)wq->q_ptr; 2144 mblk_t *cmp = NULL; 2145 2146 ASSERT((wq->q_flag & QREADR) == 0); 2147 if (mp) 2148 freemsg(mp); 2149 2150 mutex_enter(&mir->mir_mutex); 2151 2152 /* 2153 * Start idle processing if this is the last reference. 2154 */ 2155 if ((mir->mir_ref_cnt == 1) && (mir->mir_inrservice == 0)) { 2156 cmp = mir->mir_svc_pend_mp; 2157 mir->mir_svc_pend_mp = NULL; 2158 } 2159 2160 if (cmp) { 2161 RPCLOG(16, "mir_svc_release: sending a held " 2162 "disconnect/ord rel indication upstream on queue 0x%p\n", 2163 (void *)RD(wq)); 2164 2165 mutex_exit(&mir->mir_mutex); 2166 2167 putnext(RD(wq), cmp); 2168 2169 mutex_enter(&mir->mir_mutex); 2170 } 2171 2172 /* 2173 * Start idle processing if this is the last reference. 2174 */ 2175 if (mir->mir_ref_cnt == 1 && mir->mir_inrservice == 0) { 2176 2177 RPCLOG(16, "mir_svc_release starting idle timer on 0x%p " 2178 "because ref cnt is zero\n", (void *) wq); 2179 2180 mir_svc_idle_start(wq, mir); 2181 } 2182 2183 mir->mir_ref_cnt--; 2184 ASSERT(mir->mir_ref_cnt >= 0); 2185 2186 /* 2187 * Wake up the thread waiting to close. 2188 */ 2189 2190 if ((mir->mir_ref_cnt == 0) && mir->mir_closing) 2191 cv_signal(&mir->mir_condvar); 2192 2193 mutex_exit(&mir->mir_mutex); 2194 } 2195 2196 /* 2197 * This routine is called by server-side KRPC when it is ready to 2198 * handle inbound messages on the stream. 2199 */ 2200 static void 2201 mir_svc_start(queue_t *wq) 2202 { 2203 mir_t *mir = (mir_t *)wq->q_ptr; 2204 2205 /* 2206 * no longer need to take the mir_mutex because the 2207 * mir_setup_complete field has been moved out of 2208 * the binary field protected by the mir_mutex. 2209 */ 2210 2211 mir->mir_setup_complete = 1; 2212 qenable(RD(wq)); 2213 } 2214 2215 /* 2216 * client side wrapper for stopping timer with normal idle timeout. 2217 */ 2218 static void 2219 mir_clnt_idle_stop(queue_t *wq, mir_t *mir) 2220 { 2221 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2222 ASSERT((wq->q_flag & QREADR) == 0); 2223 ASSERT(mir->mir_type == RPC_CLIENT); 2224 2225 mir_timer_stop(mir); 2226 } 2227 2228 /* 2229 * client side wrapper for stopping timer with normal idle timeout. 2230 */ 2231 static void 2232 mir_clnt_idle_start(queue_t *wq, mir_t *mir) 2233 { 2234 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2235 ASSERT((wq->q_flag & QREADR) == 0); 2236 ASSERT(mir->mir_type == RPC_CLIENT); 2237 2238 mir_timer_start(wq, mir, mir->mir_idle_timeout); 2239 } 2240 2241 /* 2242 * client side only. Forces rpcmod to stop sending T_ORDREL_REQs on 2243 * end-points that aren't connected. 2244 */ 2245 static void 2246 mir_clnt_idle_do_stop(queue_t *wq) 2247 { 2248 mir_t *mir = (mir_t *)wq->q_ptr; 2249 2250 RPCLOG(1, "mir_clnt_idle_do_stop: wq 0x%p\n", (void *)wq); 2251 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2252 mutex_enter(&mir->mir_mutex); 2253 mir_clnt_idle_stop(wq, mir); 2254 mutex_exit(&mir->mir_mutex); 2255 } 2256 2257 /* 2258 * Timer handler. It handles idle timeout and memory shortage problem. 2259 */ 2260 static void 2261 mir_timer(void *arg) 2262 { 2263 queue_t *wq = (queue_t *)arg; 2264 mir_t *mir = (mir_t *)wq->q_ptr; 2265 boolean_t notify; 2266 clock_t now; 2267 2268 mutex_enter(&mir->mir_mutex); 2269 2270 /* 2271 * mir_timer_call is set only when either mir_timer_[start|stop] 2272 * is progressing. And mir_timer() can only be run while they 2273 * are progressing if the timer is being stopped. So just 2274 * return. 2275 */ 2276 if (mir->mir_timer_call) { 2277 mutex_exit(&mir->mir_mutex); 2278 return; 2279 } 2280 mir->mir_timer_id = 0; 2281 2282 switch (mir->mir_type) { 2283 case RPC_CLIENT: 2284 2285 /* 2286 * For clients, the timer fires at clnt_idle_timeout 2287 * intervals. If the activity marker (mir_clntreq) is 2288 * zero, then the stream has been idle since the last 2289 * timer event and we notify KRPC. If mir_clntreq is 2290 * non-zero, then the stream is active and we just 2291 * restart the timer for another interval. mir_clntreq 2292 * is set to 1 in mir_wput for every request passed 2293 * downstream. 2294 * 2295 * If this was a memory shortage timer reset the idle 2296 * timeout regardless; the mir_clntreq will not be a 2297 * valid indicator. 2298 * 2299 * The timer is initially started in mir_wput during 2300 * RPC_CLIENT ioctl processing. 2301 * 2302 * The timer interval can be changed for individual 2303 * streams with the ND variable "mir_idle_timeout". 2304 */ 2305 now = ddi_get_lbolt(); 2306 if (mir->mir_clntreq > 0 && mir->mir_use_timestamp + 2307 MSEC_TO_TICK(mir->mir_idle_timeout) - now >= 0) { 2308 clock_t tout; 2309 2310 tout = mir->mir_idle_timeout - 2311 TICK_TO_MSEC(now - mir->mir_use_timestamp); 2312 if (tout < 0) 2313 tout = 1000; 2314 #if 0 2315 printf("mir_timer[%d < %d + %d]: reset client timer " 2316 "to %d (ms)\n", TICK_TO_MSEC(now), 2317 TICK_TO_MSEC(mir->mir_use_timestamp), 2318 mir->mir_idle_timeout, tout); 2319 #endif 2320 mir->mir_clntreq = 0; 2321 mir_timer_start(wq, mir, tout); 2322 mutex_exit(&mir->mir_mutex); 2323 return; 2324 } 2325 #if 0 2326 printf("mir_timer[%d]: doing client timeout\n", now / hz); 2327 #endif 2328 /* 2329 * We are disconnecting, but not necessarily 2330 * closing. By not closing, we will fail to 2331 * pick up a possibly changed global timeout value, 2332 * unless we store it now. 2333 */ 2334 mir->mir_idle_timeout = clnt_idle_timeout; 2335 mir_clnt_idle_start(wq, mir); 2336 2337 mutex_exit(&mir->mir_mutex); 2338 /* 2339 * We pass T_ORDREL_REQ as an integer value 2340 * to KRPC as the indication that the stream 2341 * is idle. This is not a T_ORDREL_REQ message, 2342 * it is just a convenient value since we call 2343 * the same KRPC routine for T_ORDREL_INDs and 2344 * T_DISCON_INDs. 2345 */ 2346 clnt_dispatch_notifyall(wq, T_ORDREL_REQ, 0); 2347 return; 2348 2349 case RPC_SERVER: 2350 2351 /* 2352 * For servers, the timer is only running when the stream 2353 * is really idle or memory is short. The timer is started 2354 * by mir_wput when mir_type is set to RPC_SERVER and 2355 * by mir_svc_idle_start whenever the stream goes idle 2356 * (mir_ref_cnt == 0). The timer is cancelled in 2357 * mir_rput whenever a new inbound request is passed to KRPC 2358 * and the stream was previously idle. 2359 * 2360 * The timer interval can be changed for individual 2361 * streams with the ND variable "mir_idle_timeout". 2362 * 2363 * If the stream is not idle do nothing. 2364 */ 2365 if (!MIR_SVC_QUIESCED(mir)) { 2366 mutex_exit(&mir->mir_mutex); 2367 return; 2368 } 2369 2370 notify = !mir->mir_inrservice; 2371 mutex_exit(&mir->mir_mutex); 2372 2373 /* 2374 * If there is no packet queued up in read queue, the stream 2375 * is really idle so notify nfsd to close it. 2376 */ 2377 if (notify) { 2378 RPCLOG(16, "mir_timer: telling stream head listener " 2379 "to close stream (0x%p)\n", (void *) RD(wq)); 2380 (void) mir_svc_policy_notify(RD(wq), 1); 2381 } 2382 return; 2383 default: 2384 RPCLOG(1, "mir_timer: unexpected mir_type %d\n", 2385 mir->mir_type); 2386 mutex_exit(&mir->mir_mutex); 2387 return; 2388 } 2389 } 2390 2391 /* 2392 * Called by the RPC package to send either a call or a return, or a 2393 * transport connection request. Adds the record marking header. 2394 */ 2395 static void 2396 mir_wput(queue_t *q, mblk_t *mp) 2397 { 2398 uint_t frag_header; 2399 mir_t *mir = (mir_t *)q->q_ptr; 2400 uchar_t *rptr = mp->b_rptr; 2401 2402 if (!mir) { 2403 freemsg(mp); 2404 return; 2405 } 2406 2407 if (mp->b_datap->db_type != M_DATA) { 2408 mir_wput_other(q, mp); 2409 return; 2410 } 2411 2412 if (mir->mir_ordrel_pending == 1) { 2413 freemsg(mp); 2414 RPCLOG(16, "mir_wput wq 0x%p: got data after T_ORDREL_REQ\n", 2415 (void *)q); 2416 return; 2417 } 2418 2419 frag_header = (uint_t)DLEN(mp); 2420 frag_header |= MIR_LASTFRAG; 2421 2422 /* Stick in the 4 byte record marking header. */ 2423 if ((rptr - mp->b_datap->db_base) < sizeof (uint32_t) || 2424 !IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) { 2425 /* 2426 * Since we know that M_DATA messages are created exclusively 2427 * by KRPC, we expect that KRPC will leave room for our header 2428 * and 4 byte align which is normal for XDR. 2429 * If KRPC (or someone else) does not cooperate, then we 2430 * just throw away the message. 2431 */ 2432 RPCLOG(1, "mir_wput: KRPC did not leave space for record " 2433 "fragment header (%d bytes left)\n", 2434 (int)(rptr - mp->b_datap->db_base)); 2435 freemsg(mp); 2436 return; 2437 } 2438 rptr -= sizeof (uint32_t); 2439 *(uint32_t *)rptr = htonl(frag_header); 2440 mp->b_rptr = rptr; 2441 2442 mutex_enter(&mir->mir_mutex); 2443 if (mir->mir_type == RPC_CLIENT) { 2444 /* 2445 * For the client, set mir_clntreq to indicate that the 2446 * connection is active. 2447 */ 2448 mir->mir_clntreq = 1; 2449 mir->mir_use_timestamp = ddi_get_lbolt(); 2450 } 2451 2452 /* 2453 * If we haven't already queued some data and the downstream module 2454 * can accept more data, send it on, otherwise we queue the message 2455 * and take other actions depending on mir_type. 2456 */ 2457 if (!mir->mir_inwservice && MIR_WCANPUTNEXT(mir, q)) { 2458 mutex_exit(&mir->mir_mutex); 2459 2460 /* 2461 * Now we pass the RPC message downstream. 2462 */ 2463 putnext(q, mp); 2464 return; 2465 } 2466 2467 switch (mir->mir_type) { 2468 case RPC_CLIENT: 2469 /* 2470 * Check for a previous duplicate request on the 2471 * queue. If there is one, then we throw away 2472 * the current message and let the previous one 2473 * go through. If we can't find a duplicate, then 2474 * send this one. This tap dance is an effort 2475 * to reduce traffic and processing requirements 2476 * under load conditions. 2477 */ 2478 if (mir_clnt_dup_request(q, mp)) { 2479 mutex_exit(&mir->mir_mutex); 2480 freemsg(mp); 2481 return; 2482 } 2483 break; 2484 case RPC_SERVER: 2485 /* 2486 * Set mir_hold_inbound so that new inbound RPC 2487 * messages will be held until the client catches 2488 * up on the earlier replies. This flag is cleared 2489 * in mir_wsrv after flow control is relieved; 2490 * the read-side queue is also enabled at that time. 2491 */ 2492 mir->mir_hold_inbound = 1; 2493 break; 2494 default: 2495 RPCLOG(1, "mir_wput: unexpected mir_type %d\n", mir->mir_type); 2496 break; 2497 } 2498 mir->mir_inwservice = 1; 2499 (void) putq(q, mp); 2500 mutex_exit(&mir->mir_mutex); 2501 } 2502 2503 static void 2504 mir_wput_other(queue_t *q, mblk_t *mp) 2505 { 2506 mir_t *mir = (mir_t *)q->q_ptr; 2507 struct iocblk *iocp; 2508 uchar_t *rptr = mp->b_rptr; 2509 bool_t flush_in_svc = FALSE; 2510 2511 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2512 switch (mp->b_datap->db_type) { 2513 case M_IOCTL: 2514 iocp = (struct iocblk *)rptr; 2515 switch (iocp->ioc_cmd) { 2516 case RPC_CLIENT: 2517 mutex_enter(&mir->mir_mutex); 2518 if (mir->mir_type != 0 && 2519 mir->mir_type != iocp->ioc_cmd) { 2520 ioc_eperm: 2521 mutex_exit(&mir->mir_mutex); 2522 iocp->ioc_error = EPERM; 2523 iocp->ioc_count = 0; 2524 mp->b_datap->db_type = M_IOCACK; 2525 qreply(q, mp); 2526 return; 2527 } 2528 2529 mir->mir_type = iocp->ioc_cmd; 2530 2531 /* 2532 * Clear mir_hold_inbound which was set to 1 by 2533 * mir_open. This flag is not used on client 2534 * streams. 2535 */ 2536 mir->mir_hold_inbound = 0; 2537 mir->mir_max_msg_sizep = &clnt_max_msg_size; 2538 2539 /* 2540 * Start the idle timer. See mir_timer() for more 2541 * information on how client timers work. 2542 */ 2543 mir->mir_idle_timeout = clnt_idle_timeout; 2544 mir_clnt_idle_start(q, mir); 2545 mutex_exit(&mir->mir_mutex); 2546 2547 mp->b_datap->db_type = M_IOCACK; 2548 qreply(q, mp); 2549 return; 2550 case RPC_SERVER: 2551 mutex_enter(&mir->mir_mutex); 2552 if (mir->mir_type != 0 && 2553 mir->mir_type != iocp->ioc_cmd) 2554 goto ioc_eperm; 2555 2556 /* 2557 * We don't clear mir_hold_inbound here because 2558 * mir_hold_inbound is used in the flow control 2559 * model. If we cleared it here, then we'd commit 2560 * a small violation to the model where the transport 2561 * might immediately block downstream flow. 2562 */ 2563 2564 mir->mir_type = iocp->ioc_cmd; 2565 mir->mir_max_msg_sizep = &svc_max_msg_size; 2566 2567 /* 2568 * Start the idle timer. See mir_timer() for more 2569 * information on how server timers work. 2570 * 2571 * Note that it is important to start the idle timer 2572 * here so that connections time out even if we 2573 * never receive any data on them. 2574 */ 2575 mir->mir_idle_timeout = svc_idle_timeout; 2576 RPCLOG(16, "mir_wput_other starting idle timer on 0x%p " 2577 "because we got RPC_SERVER ioctl\n", (void *)q); 2578 mir_svc_idle_start(q, mir); 2579 mutex_exit(&mir->mir_mutex); 2580 2581 mp->b_datap->db_type = M_IOCACK; 2582 qreply(q, mp); 2583 return; 2584 default: 2585 break; 2586 } 2587 break; 2588 2589 case M_PROTO: 2590 if (mir->mir_type == RPC_CLIENT) { 2591 /* 2592 * We are likely being called from the context of a 2593 * service procedure. So we need to enqueue. However 2594 * enqueing may put our message behind data messages. 2595 * So flush the data first. 2596 */ 2597 flush_in_svc = TRUE; 2598 } 2599 if ((mp->b_wptr - rptr) < sizeof (uint32_t) || 2600 !IS_P2ALIGNED(rptr, sizeof (uint32_t))) 2601 break; 2602 2603 switch (((union T_primitives *)rptr)->type) { 2604 case T_DATA_REQ: 2605 /* Don't pass T_DATA_REQ messages downstream. */ 2606 freemsg(mp); 2607 return; 2608 case T_ORDREL_REQ: 2609 RPCLOG(8, "mir_wput_other wq 0x%p: got T_ORDREL_REQ\n", 2610 (void *)q); 2611 mutex_enter(&mir->mir_mutex); 2612 if (mir->mir_type != RPC_SERVER) { 2613 /* 2614 * We are likely being called from 2615 * clnt_dispatch_notifyall(). Sending 2616 * a T_ORDREL_REQ will result in 2617 * a some kind of _IND message being sent, 2618 * will be another call to 2619 * clnt_dispatch_notifyall(). To keep the stack 2620 * lean, queue this message. 2621 */ 2622 mir->mir_inwservice = 1; 2623 (void) putq(q, mp); 2624 mutex_exit(&mir->mir_mutex); 2625 return; 2626 } 2627 2628 /* 2629 * Mark the structure such that we don't accept any 2630 * more requests from client. We could defer this 2631 * until we actually send the orderly release 2632 * request downstream, but all that does is delay 2633 * the closing of this stream. 2634 */ 2635 RPCLOG(16, "mir_wput_other wq 0x%p: got T_ORDREL_REQ " 2636 " so calling mir_svc_start_close\n", (void *)q); 2637 2638 mir_svc_start_close(q, mir); 2639 2640 /* 2641 * If we have sent down a T_ORDREL_REQ, don't send 2642 * any more. 2643 */ 2644 if (mir->mir_ordrel_pending) { 2645 freemsg(mp); 2646 mutex_exit(&mir->mir_mutex); 2647 return; 2648 } 2649 2650 /* 2651 * If the stream is not idle, then we hold the 2652 * orderly release until it becomes idle. This 2653 * ensures that KRPC will be able to reply to 2654 * all requests that we have passed to it. 2655 * 2656 * We also queue the request if there is data already 2657 * queued, because we cannot allow the T_ORDREL_REQ 2658 * to go before data. When we had a separate reply 2659 * count, this was not a problem, because the 2660 * reply count was reconciled when mir_wsrv() 2661 * completed. 2662 */ 2663 if (!MIR_SVC_QUIESCED(mir) || 2664 mir->mir_inwservice == 1) { 2665 mir->mir_inwservice = 1; 2666 (void) putq(q, mp); 2667 2668 RPCLOG(16, "mir_wput_other: queuing " 2669 "T_ORDREL_REQ on 0x%p\n", (void *)q); 2670 2671 mutex_exit(&mir->mir_mutex); 2672 return; 2673 } 2674 2675 /* 2676 * Mark the structure so that we know we sent 2677 * an orderly release request, and reset the idle timer. 2678 */ 2679 mir->mir_ordrel_pending = 1; 2680 2681 RPCLOG(16, "mir_wput_other: calling mir_svc_idle_start" 2682 " on 0x%p because we got T_ORDREL_REQ\n", 2683 (void *)q); 2684 2685 mir_svc_idle_start(q, mir); 2686 mutex_exit(&mir->mir_mutex); 2687 2688 /* 2689 * When we break, we will putnext the T_ORDREL_REQ. 2690 */ 2691 break; 2692 2693 case T_CONN_REQ: 2694 mutex_enter(&mir->mir_mutex); 2695 if (mir->mir_head_mp != NULL) { 2696 freemsg(mir->mir_head_mp); 2697 mir->mir_head_mp = NULL; 2698 mir->mir_tail_mp = NULL; 2699 } 2700 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 2701 /* 2702 * Restart timer in case mir_clnt_idle_do_stop() was 2703 * called. 2704 */ 2705 mir->mir_idle_timeout = clnt_idle_timeout; 2706 mir_clnt_idle_stop(q, mir); 2707 mir_clnt_idle_start(q, mir); 2708 mutex_exit(&mir->mir_mutex); 2709 break; 2710 2711 default: 2712 /* 2713 * T_DISCON_REQ is one of the interesting default 2714 * cases here. Ideally, an M_FLUSH is done before 2715 * T_DISCON_REQ is done. However, that is somewhat 2716 * cumbersome for clnt_cots.c to do. So we queue 2717 * T_DISCON_REQ, and let the service procedure 2718 * flush all M_DATA. 2719 */ 2720 break; 2721 } 2722 /* fallthru */; 2723 default: 2724 if (mp->b_datap->db_type >= QPCTL) { 2725 if (mp->b_datap->db_type == M_FLUSH) { 2726 if (mir->mir_type == RPC_CLIENT && 2727 *mp->b_rptr & FLUSHW) { 2728 RPCLOG(32, "mir_wput_other: flushing " 2729 "wq 0x%p\n", (void *)q); 2730 if (*mp->b_rptr & FLUSHBAND) { 2731 flushband(q, *(mp->b_rptr + 1), 2732 FLUSHDATA); 2733 } else { 2734 flushq(q, FLUSHDATA); 2735 } 2736 } else { 2737 RPCLOG(32, "mir_wput_other: ignoring " 2738 "M_FLUSH on wq 0x%p\n", (void *)q); 2739 } 2740 } 2741 break; 2742 } 2743 2744 mutex_enter(&mir->mir_mutex); 2745 if (mir->mir_inwservice == 0 && MIR_WCANPUTNEXT(mir, q)) { 2746 mutex_exit(&mir->mir_mutex); 2747 break; 2748 } 2749 mir->mir_inwservice = 1; 2750 mir->mir_inwflushdata = flush_in_svc; 2751 (void) putq(q, mp); 2752 mutex_exit(&mir->mir_mutex); 2753 qenable(q); 2754 2755 return; 2756 } 2757 putnext(q, mp); 2758 } 2759 2760 static void 2761 mir_wsrv(queue_t *q) 2762 { 2763 mblk_t *mp; 2764 mir_t *mir; 2765 bool_t flushdata; 2766 2767 mir = (mir_t *)q->q_ptr; 2768 mutex_enter(&mir->mir_mutex); 2769 2770 flushdata = mir->mir_inwflushdata; 2771 mir->mir_inwflushdata = 0; 2772 2773 while (mp = getq(q)) { 2774 if (mp->b_datap->db_type == M_DATA) { 2775 /* 2776 * Do not send any more data if we have sent 2777 * a T_ORDREL_REQ. 2778 */ 2779 if (flushdata || mir->mir_ordrel_pending == 1) { 2780 freemsg(mp); 2781 continue; 2782 } 2783 2784 /* 2785 * Make sure that the stream can really handle more 2786 * data. 2787 */ 2788 if (!MIR_WCANPUTNEXT(mir, q)) { 2789 (void) putbq(q, mp); 2790 mutex_exit(&mir->mir_mutex); 2791 return; 2792 } 2793 2794 /* 2795 * Now we pass the RPC message downstream. 2796 */ 2797 mutex_exit(&mir->mir_mutex); 2798 putnext(q, mp); 2799 mutex_enter(&mir->mir_mutex); 2800 continue; 2801 } 2802 2803 /* 2804 * This is not an RPC message, pass it downstream 2805 * (ignoring flow control) if the server side is not sending a 2806 * T_ORDREL_REQ downstream. 2807 */ 2808 if (mir->mir_type != RPC_SERVER || 2809 ((union T_primitives *)mp->b_rptr)->type != 2810 T_ORDREL_REQ) { 2811 mutex_exit(&mir->mir_mutex); 2812 putnext(q, mp); 2813 mutex_enter(&mir->mir_mutex); 2814 continue; 2815 } 2816 2817 if (mir->mir_ordrel_pending == 1) { 2818 /* 2819 * Don't send two T_ORDRELs 2820 */ 2821 freemsg(mp); 2822 continue; 2823 } 2824 2825 /* 2826 * Mark the structure so that we know we sent an orderly 2827 * release request. We will check to see slot is idle at the 2828 * end of this routine, and if so, reset the idle timer to 2829 * handle orderly release timeouts. 2830 */ 2831 mir->mir_ordrel_pending = 1; 2832 RPCLOG(16, "mir_wsrv: sending ordrel req on q 0x%p\n", 2833 (void *)q); 2834 /* 2835 * Send the orderly release downstream. If there are other 2836 * pending replies we won't be able to send them. However, 2837 * the only reason we should send the orderly release is if 2838 * we were idle, or if an unusual event occurred. 2839 */ 2840 mutex_exit(&mir->mir_mutex); 2841 putnext(q, mp); 2842 mutex_enter(&mir->mir_mutex); 2843 } 2844 2845 if (q->q_first == NULL) 2846 /* 2847 * If we call mir_svc_idle_start() below, then 2848 * clearing mir_inwservice here will also result in 2849 * any thread waiting in mir_close() to be signaled. 2850 */ 2851 mir->mir_inwservice = 0; 2852 2853 if (mir->mir_type != RPC_SERVER) { 2854 mutex_exit(&mir->mir_mutex); 2855 return; 2856 } 2857 2858 /* 2859 * If idle we call mir_svc_idle_start to start the timer (or wakeup 2860 * a close). Also make sure not to start the idle timer on the 2861 * listener stream. This can cause nfsd to send an orderly release 2862 * command on the listener stream. 2863 */ 2864 if (MIR_SVC_QUIESCED(mir) && !(mir->mir_listen_stream)) { 2865 RPCLOG(16, "mir_wsrv: calling mir_svc_idle_start on 0x%p " 2866 "because mir slot is idle\n", (void *)q); 2867 mir_svc_idle_start(q, mir); 2868 } 2869 2870 /* 2871 * If outbound flow control has been relieved, then allow new 2872 * inbound requests to be processed. 2873 */ 2874 if (mir->mir_hold_inbound) { 2875 mir->mir_hold_inbound = 0; 2876 qenable(RD(q)); 2877 } 2878 mutex_exit(&mir->mir_mutex); 2879 } 2880 2881 static void 2882 mir_disconnect(queue_t *q, mir_t *mir) 2883 { 2884 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2885 2886 switch (mir->mir_type) { 2887 case RPC_CLIENT: 2888 /* 2889 * We are disconnecting, but not necessarily 2890 * closing. By not closing, we will fail to 2891 * pick up a possibly changed global timeout value, 2892 * unless we store it now. 2893 */ 2894 mir->mir_idle_timeout = clnt_idle_timeout; 2895 mir_clnt_idle_start(WR(q), mir); 2896 mutex_exit(&mir->mir_mutex); 2897 2898 /* 2899 * T_DISCON_REQ is passed to KRPC as an integer value 2900 * (this is not a TPI message). It is used as a 2901 * convenient value to indicate a sanity check 2902 * failure -- the same KRPC routine is also called 2903 * for T_DISCON_INDs and T_ORDREL_INDs. 2904 */ 2905 clnt_dispatch_notifyall(WR(q), T_DISCON_REQ, 0); 2906 break; 2907 2908 case RPC_SERVER: 2909 mir->mir_svc_no_more_msgs = 1; 2910 mir_svc_idle_stop(WR(q), mir); 2911 mutex_exit(&mir->mir_mutex); 2912 RPCLOG(16, "mir_disconnect: telling " 2913 "stream head listener to disconnect stream " 2914 "(0x%p)\n", (void *) q); 2915 (void) mir_svc_policy_notify(q, 2); 2916 break; 2917 2918 default: 2919 mutex_exit(&mir->mir_mutex); 2920 break; 2921 } 2922 } 2923 2924 /* 2925 * Sanity check the message length, and if it's too large, shutdown the 2926 * connection. Returns 1 if the connection is shutdown; 0 otherwise. 2927 */ 2928 static int 2929 mir_check_len(queue_t *q, int32_t frag_len, mblk_t *head_mp) 2930 { 2931 mir_t *mir = q->q_ptr; 2932 uint_t maxsize = 0; 2933 2934 if (mir->mir_max_msg_sizep != NULL) 2935 maxsize = *mir->mir_max_msg_sizep; 2936 2937 if (maxsize == 0 || frag_len <= (int)maxsize) 2938 return (0); 2939 2940 freemsg(head_mp); 2941 mir->mir_head_mp = NULL; 2942 mir->mir_tail_mp = NULL; 2943 mir->mir_frag_header = 0; 2944 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 2945 if (mir->mir_type != RPC_SERVER || mir->mir_setup_complete) { 2946 cmn_err(CE_NOTE, 2947 "KRPC: record fragment from %s of size(%d) exceeds " 2948 "maximum (%u). Disconnecting", 2949 (mir->mir_type == RPC_CLIENT) ? "server" : 2950 (mir->mir_type == RPC_SERVER) ? "client" : 2951 "test tool", frag_len, maxsize); 2952 } 2953 2954 mir_disconnect(q, mir); 2955 return (1); 2956 } 2957