1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * Kernel RPC filtering module 34 */ 35 36 #include <sys/param.h> 37 #include <sys/types.h> 38 #include <sys/stream.h> 39 #include <sys/stropts.h> 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/tiuser.h> 43 #include <sys/debug.h> 44 #include <sys/signal.h> 45 #include <sys/pcb.h> 46 #include <sys/user.h> 47 #include <sys/errno.h> 48 #include <sys/cred.h> 49 #include <sys/policy.h> 50 #include <sys/inline.h> 51 #include <sys/cmn_err.h> 52 #include <sys/kmem.h> 53 #include <sys/file.h> 54 #include <sys/sysmacros.h> 55 #include <sys/systm.h> 56 #include <sys/t_lock.h> 57 #include <sys/ddi.h> 58 #include <sys/vtrace.h> 59 #include <sys/callb.h> 60 61 #include <sys/strlog.h> 62 #include <rpc/rpc_com.h> 63 #include <inet/common.h> 64 #include <rpc/types.h> 65 #include <sys/time.h> 66 #include <rpc/xdr.h> 67 #include <rpc/auth.h> 68 #include <rpc/clnt.h> 69 #include <rpc/rpc_msg.h> 70 #include <rpc/clnt.h> 71 #include <rpc/svc.h> 72 #include <rpc/rpcsys.h> 73 #include <rpc/rpc_rdma.h> 74 75 /* 76 * This is the loadable module wrapper. 77 */ 78 #include <sys/conf.h> 79 #include <sys/modctl.h> 80 #include <sys/syscall.h> 81 82 extern struct streamtab rpcinfo; 83 84 static struct fmodsw fsw = { 85 "rpcmod", 86 &rpcinfo, 87 D_NEW|D_MP, 88 }; 89 90 /* 91 * Module linkage information for the kernel. 92 */ 93 94 static struct modlstrmod modlstrmod = { 95 &mod_strmodops, "rpc interface str mod", &fsw 96 }; 97 98 /* 99 * For the RPC system call. 100 */ 101 static struct sysent rpcsysent = { 102 2, 103 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, 104 rpcsys 105 }; 106 107 static struct modlsys modlsys = { 108 &mod_syscallops, 109 "RPC syscall", 110 &rpcsysent 111 }; 112 113 #ifdef _SYSCALL32_IMPL 114 static struct modlsys modlsys32 = { 115 &mod_syscallops32, 116 "32-bit RPC syscall", 117 &rpcsysent 118 }; 119 #endif /* _SYSCALL32_IMPL */ 120 121 static struct modlinkage modlinkage = { 122 MODREV_1, 123 { 124 &modlsys, 125 #ifdef _SYSCALL32_IMPL 126 &modlsys32, 127 #endif 128 &modlstrmod, 129 NULL 130 } 131 }; 132 133 int 134 _init(void) 135 { 136 int error = 0; 137 callb_id_t cid; 138 int status; 139 140 svc_init(); 141 clnt_init(); 142 cid = callb_add(connmgr_cpr_reset, 0, CB_CL_CPR_RPC, "rpc"); 143 144 if (error = mod_install(&modlinkage)) { 145 /* 146 * Could not install module, cleanup previous 147 * initialization work. 148 */ 149 clnt_fini(); 150 if (cid != NULL) 151 (void) callb_delete(cid); 152 153 return (error); 154 } 155 156 /* 157 * Load up the RDMA plugins and initialize the stats. Even if the 158 * plugins loadup fails, but rpcmod was successfully installed the 159 * counters still get initialized. 160 */ 161 rw_init(&rdma_lock, NULL, RW_DEFAULT, NULL); 162 mutex_init(&rdma_modload_lock, NULL, MUTEX_DEFAULT, NULL); 163 mt_kstat_init(); 164 165 /* 166 * Get our identification into ldi. This is used for loading 167 * other modules, e.g. rpcib. 168 */ 169 status = ldi_ident_from_mod(&modlinkage, &rpcmod_li); 170 if (status != 0) { 171 cmn_err(CE_WARN, "ldi_ident_from_mod fails with %d", status); 172 rpcmod_li = NULL; 173 } 174 175 return (error); 176 } 177 178 /* 179 * The unload entry point fails, because we advertise entry points into 180 * rpcmod from the rest of kRPC: rpcmod_release(). 181 */ 182 int 183 _fini(void) 184 { 185 return (EBUSY); 186 } 187 188 int 189 _info(struct modinfo *modinfop) 190 { 191 return (mod_info(&modlinkage, modinfop)); 192 } 193 194 extern int nulldev(); 195 196 #define RPCMOD_ID 2049 197 198 int rmm_open(), rmm_close(); 199 200 /* 201 * To save instructions, since STREAMS ignores the return value 202 * from these functions, they are defined as void here. Kind of icky, but... 203 */ 204 void rmm_rput(queue_t *, mblk_t *); 205 void rmm_wput(queue_t *, mblk_t *); 206 void rmm_rsrv(queue_t *); 207 void rmm_wsrv(queue_t *); 208 209 int rpcmodopen(), rpcmodclose(); 210 void rpcmodrput(), rpcmodwput(); 211 void rpcmodrsrv(), rpcmodwsrv(); 212 213 static void rpcmodwput_other(queue_t *, mblk_t *); 214 static int mir_close(queue_t *q); 215 static int mir_open(queue_t *q, dev_t *devp, int flag, int sflag, 216 cred_t *credp); 217 static void mir_rput(queue_t *q, mblk_t *mp); 218 static void mir_rsrv(queue_t *q); 219 static void mir_wput(queue_t *q, mblk_t *mp); 220 static void mir_wsrv(queue_t *q); 221 222 static struct module_info rpcmod_info = 223 {RPCMOD_ID, "rpcmod", 0, INFPSZ, 256*1024, 1024}; 224 225 /* 226 * Read side has no service procedure. 227 */ 228 static struct qinit rpcmodrinit = { 229 (int (*)())rmm_rput, 230 (int (*)())rmm_rsrv, 231 rmm_open, 232 rmm_close, 233 nulldev, 234 &rpcmod_info, 235 NULL 236 }; 237 238 /* 239 * The write put procedure is simply putnext to conserve stack space. 240 * The write service procedure is not used to queue data, but instead to 241 * synchronize with flow control. 242 */ 243 static struct qinit rpcmodwinit = { 244 (int (*)())rmm_wput, 245 (int (*)())rmm_wsrv, 246 rmm_open, 247 rmm_close, 248 nulldev, 249 &rpcmod_info, 250 NULL 251 }; 252 struct streamtab rpcinfo = { &rpcmodrinit, &rpcmodwinit, NULL, NULL }; 253 254 struct xprt_style_ops { 255 int (*xo_open)(); 256 int (*xo_close)(); 257 void (*xo_wput)(); 258 void (*xo_wsrv)(); 259 void (*xo_rput)(); 260 void (*xo_rsrv)(); 261 }; 262 263 static struct xprt_style_ops xprt_clts_ops = { 264 rpcmodopen, 265 rpcmodclose, 266 rpcmodwput, 267 rpcmodwsrv, 268 rpcmodrput, 269 NULL 270 }; 271 272 static struct xprt_style_ops xprt_cots_ops = { 273 mir_open, 274 mir_close, 275 mir_wput, 276 mir_wsrv, 277 mir_rput, 278 mir_rsrv 279 }; 280 281 /* 282 * Per rpcmod "slot" data structure. q->q_ptr points to one of these. 283 */ 284 struct rpcm { 285 void *rm_krpc_cell; /* Reserved for use by KRPC */ 286 struct xprt_style_ops *rm_ops; 287 int rm_type; /* Client or server side stream */ 288 #define RM_CLOSING 0x1 /* somebody is trying to close slot */ 289 uint_t rm_state; /* state of the slot. see above */ 290 uint_t rm_ref; /* cnt of external references to slot */ 291 kmutex_t rm_lock; /* mutex protecting above fields */ 292 kcondvar_t rm_cwait; /* condition for closing */ 293 zoneid_t rm_zoneid; /* zone which pushed rpcmod */ 294 }; 295 296 struct temp_slot { 297 void *cell; 298 struct xprt_style_ops *ops; 299 int type; 300 mblk_t *info_ack; 301 kmutex_t lock; 302 kcondvar_t wait; 303 }; 304 305 typedef struct mir_s { 306 void *mir_krpc_cell; /* Reserved for KRPC use. This field */ 307 /* must be first in the structure. */ 308 struct xprt_style_ops *rm_ops; 309 int mir_type; /* Client or server side stream */ 310 311 mblk_t *mir_head_mp; /* RPC msg in progress */ 312 /* 313 * mir_head_mp points the first mblk being collected in 314 * the current RPC message. Record headers are removed 315 * before data is linked into mir_head_mp. 316 */ 317 mblk_t *mir_tail_mp; /* Last mblk in mir_head_mp */ 318 /* 319 * mir_tail_mp points to the last mblk in the message 320 * chain starting at mir_head_mp. It is only valid 321 * if mir_head_mp is non-NULL and is used to add new 322 * data blocks to the end of chain quickly. 323 */ 324 325 int32_t mir_frag_len; /* Bytes seen in the current frag */ 326 /* 327 * mir_frag_len starts at -4 for beginning of each fragment. 328 * When this length is negative, it indicates the number of 329 * bytes that rpcmod needs to complete the record marker 330 * header. When it is positive or zero, it holds the number 331 * of bytes that have arrived for the current fragment and 332 * are held in mir_header_mp. 333 */ 334 335 int32_t mir_frag_header; 336 /* 337 * Fragment header as collected for the current fragment. 338 * It holds the last-fragment indicator and the number 339 * of bytes in the fragment. 340 */ 341 342 unsigned int 343 mir_ordrel_pending : 1, /* Sent T_ORDREL_REQ */ 344 mir_hold_inbound : 1, /* Hold inbound messages on server */ 345 /* side until outbound flow control */ 346 /* is relieved. */ 347 mir_closing : 1, /* The stream is being closed */ 348 mir_inrservice : 1, /* data queued or rd srv proc running */ 349 mir_inwservice : 1, /* data queued or wr srv proc running */ 350 mir_inwflushdata : 1, /* flush M_DATAs when srv runs */ 351 /* 352 * On client streams, mir_clntreq is 0 or 1; it is set 353 * to 1 whenever a new request is sent out (mir_wput) 354 * and cleared when the timer fires (mir_timer). If 355 * the timer fires with this value equal to 0, then the 356 * stream is considered idle and KRPC is notified. 357 */ 358 mir_clntreq : 1, 359 /* 360 * On server streams, stop accepting messages 361 */ 362 mir_svc_no_more_msgs : 1, 363 mir_listen_stream : 1, /* listen end point */ 364 mir_unused : 1, /* no longer used */ 365 mir_timer_call : 1, 366 mir_junk_fill_thru_bit_31 : 21; 367 368 int mir_setup_complete; /* server has initialized everything */ 369 timeout_id_t mir_timer_id; /* Timer for idle checks */ 370 clock_t mir_idle_timeout; /* Allowed idle time before shutdown */ 371 /* 372 * This value is copied from clnt_idle_timeout or 373 * svc_idle_timeout during the appropriate ioctl. 374 * Kept in milliseconds 375 */ 376 clock_t mir_use_timestamp; /* updated on client with each use */ 377 /* 378 * This value is set to lbolt 379 * every time a client stream sends or receives data. 380 * Even if the timer message arrives, we don't shutdown 381 * client unless: 382 * lbolt >= MSEC_TO_TICK(mir_idle_timeout)+mir_use_timestamp. 383 * This value is kept in HZ. 384 */ 385 386 uint_t *mir_max_msg_sizep; /* Reference to sanity check size */ 387 /* 388 * This pointer is set to &clnt_max_msg_size or 389 * &svc_max_msg_size during the appropriate ioctl. 390 */ 391 zoneid_t mir_zoneid; /* zone which pushed rpcmod */ 392 /* Server-side fields. */ 393 int mir_ref_cnt; /* Reference count: server side only */ 394 /* counts the number of references */ 395 /* that a kernel RPC server thread */ 396 /* (see svc_run()) has on this rpcmod */ 397 /* slot. Effectively, it is the */ 398 /* number * of unprocessed messages */ 399 /* that have been passed up to the */ 400 /* KRPC layer */ 401 402 mblk_t *mir_svc_pend_mp; /* Pending T_ORDREL_IND or */ 403 /* T_DISCON_IND */ 404 405 /* 406 * these fields are for both client and server, but for debugging, 407 * it is easier to have these last in the structure. 408 */ 409 kmutex_t mir_mutex; /* Mutex and condvar for close */ 410 kcondvar_t mir_condvar; /* synchronization. */ 411 kcondvar_t mir_timer_cv; /* Timer routine sync. */ 412 } mir_t; 413 414 void tmp_rput(queue_t *q, mblk_t *mp); 415 416 struct xprt_style_ops tmpops = { 417 NULL, 418 NULL, 419 putnext, 420 NULL, 421 tmp_rput, 422 NULL 423 }; 424 425 void 426 tmp_rput(queue_t *q, mblk_t *mp) 427 { 428 struct temp_slot *t = (struct temp_slot *)(q->q_ptr); 429 struct T_info_ack *pptr; 430 431 switch (mp->b_datap->db_type) { 432 case M_PCPROTO: 433 pptr = (struct T_info_ack *)mp->b_rptr; 434 switch (pptr->PRIM_type) { 435 case T_INFO_ACK: 436 mutex_enter(&t->lock); 437 t->info_ack = mp; 438 cv_signal(&t->wait); 439 mutex_exit(&t->lock); 440 return; 441 default: 442 break; 443 } 444 default: 445 break; 446 } 447 448 /* 449 * Not an info-ack, so free it. This is ok because we should 450 * not be receiving data until the open finishes: rpcmod 451 * is pushed well before the end-point is bound to an address. 452 */ 453 freemsg(mp); 454 } 455 456 int 457 rmm_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp) 458 { 459 mblk_t *bp; 460 struct temp_slot ts, *t; 461 struct T_info_ack *pptr; 462 int error = 0; 463 464 ASSERT(q != NULL); 465 /* 466 * Check for re-opens. 467 */ 468 if (q->q_ptr) { 469 TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, 470 "rpcmodopen_end:(%s)", "q->qptr"); 471 return (0); 472 } 473 474 t = &ts; 475 bzero(t, sizeof (*t)); 476 q->q_ptr = (void *)t; 477 WR(q)->q_ptr = (void *)t; 478 479 /* 480 * Allocate the required messages upfront. 481 */ 482 if ((bp = allocb(sizeof (struct T_info_req) + 483 sizeof (struct T_info_ack), BPRI_LO)) == (mblk_t *)NULL) { 484 return (ENOBUFS); 485 } 486 487 mutex_init(&t->lock, NULL, MUTEX_DEFAULT, NULL); 488 cv_init(&t->wait, NULL, CV_DEFAULT, NULL); 489 490 t->ops = &tmpops; 491 492 qprocson(q); 493 bp->b_datap->db_type = M_PCPROTO; 494 *(int32_t *)bp->b_wptr = (int32_t)T_INFO_REQ; 495 bp->b_wptr += sizeof (struct T_info_req); 496 putnext(WR(q), bp); 497 498 mutex_enter(&t->lock); 499 while (t->info_ack == NULL) { 500 if (cv_wait_sig(&t->wait, &t->lock) == 0) { 501 error = EINTR; 502 break; 503 } 504 } 505 mutex_exit(&t->lock); 506 507 if (error) 508 goto out; 509 510 pptr = (struct T_info_ack *)t->info_ack->b_rptr; 511 512 if (pptr->SERV_type == T_CLTS) { 513 if ((error = rpcmodopen(q, devp, flag, sflag, crp)) == 0) 514 ((struct rpcm *)q->q_ptr)->rm_ops = &xprt_clts_ops; 515 } else { 516 if ((error = mir_open(q, devp, flag, sflag, crp)) == 0) 517 ((mir_t *)q->q_ptr)->rm_ops = &xprt_cots_ops; 518 } 519 520 out: 521 if (error) 522 qprocsoff(q); 523 524 freemsg(t->info_ack); 525 mutex_destroy(&t->lock); 526 cv_destroy(&t->wait); 527 528 return (error); 529 } 530 531 void 532 rmm_rput(queue_t *q, mblk_t *mp) 533 { 534 (*((struct temp_slot *)q->q_ptr)->ops->xo_rput)(q, mp); 535 } 536 537 void 538 rmm_rsrv(queue_t *q) 539 { 540 (*((struct temp_slot *)q->q_ptr)->ops->xo_rsrv)(q); 541 } 542 543 void 544 rmm_wput(queue_t *q, mblk_t *mp) 545 { 546 (*((struct temp_slot *)q->q_ptr)->ops->xo_wput)(q, mp); 547 } 548 549 void 550 rmm_wsrv(queue_t *q) 551 { 552 (*((struct temp_slot *)q->q_ptr)->ops->xo_wsrv)(q); 553 } 554 555 int 556 rmm_close(queue_t *q, int flag, cred_t *crp) 557 { 558 return ((*((struct temp_slot *)q->q_ptr)->ops->xo_close)(q, flag, crp)); 559 } 560 561 /* 562 * rpcmodopen - open routine gets called when the module gets pushed 563 * onto the stream. 564 */ 565 /*ARGSUSED*/ 566 int 567 rpcmodopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp) 568 { 569 struct rpcm *rmp; 570 571 extern void (*rpc_rele)(queue_t *, mblk_t *); 572 static void rpcmod_release(queue_t *, mblk_t *); 573 574 TRACE_0(TR_FAC_KRPC, TR_RPCMODOPEN_START, "rpcmodopen_start:"); 575 576 /* 577 * Initialize entry points to release a rpcmod slot (and an input 578 * message if supplied) and to send an output message to the module 579 * below rpcmod. 580 */ 581 if (rpc_rele == NULL) 582 rpc_rele = rpcmod_release; 583 584 /* 585 * Only sufficiently privileged users can use this module, and it 586 * is assumed that they will use this module properly, and NOT send 587 * bulk data from downstream. 588 */ 589 if (secpolicy_rpcmod_open(crp) != 0) 590 return (EPERM); 591 592 /* 593 * Allocate slot data structure. 594 */ 595 rmp = kmem_zalloc(sizeof (*rmp), KM_SLEEP); 596 597 mutex_init(&rmp->rm_lock, NULL, MUTEX_DEFAULT, NULL); 598 cv_init(&rmp->rm_cwait, NULL, CV_DEFAULT, NULL); 599 rmp->rm_zoneid = rpc_zoneid(); 600 /* 601 * slot type will be set by kRPC client and server ioctl's 602 */ 603 rmp->rm_type = 0; 604 605 q->q_ptr = (void *)rmp; 606 WR(q)->q_ptr = (void *)rmp; 607 608 TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, "rpcmodopen_end:(%s)", "end"); 609 return (0); 610 } 611 612 /* 613 * rpcmodclose - This routine gets called when the module gets popped 614 * off of the stream. 615 */ 616 /*ARGSUSED*/ 617 int 618 rpcmodclose(queue_t *q, int flag, cred_t *crp) 619 { 620 struct rpcm *rmp; 621 622 ASSERT(q != NULL); 623 rmp = (struct rpcm *)q->q_ptr; 624 625 /* 626 * Mark our state as closing. 627 */ 628 mutex_enter(&rmp->rm_lock); 629 rmp->rm_state |= RM_CLOSING; 630 631 /* 632 * Check and see if there are any messages on the queue. If so, send 633 * the messages, regardless whether the downstream module is ready to 634 * accept data. 635 */ 636 if (rmp->rm_type == RPC_SERVER) { 637 flushq(q, FLUSHDATA); 638 639 qenable(WR(q)); 640 641 if (rmp->rm_ref) { 642 mutex_exit(&rmp->rm_lock); 643 /* 644 * call into SVC to clean the queue 645 */ 646 svc_queueclean(q); 647 mutex_enter(&rmp->rm_lock); 648 649 /* 650 * Block while there are kRPC threads with a reference 651 * to this message. 652 */ 653 while (rmp->rm_ref) 654 cv_wait(&rmp->rm_cwait, &rmp->rm_lock); 655 } 656 657 mutex_exit(&rmp->rm_lock); 658 659 /* 660 * It is now safe to remove this queue from the stream. No kRPC 661 * threads have a reference to the stream, and none ever will, 662 * because RM_CLOSING is set. 663 */ 664 qprocsoff(q); 665 666 /* Notify kRPC that this stream is going away. */ 667 svc_queueclose(q); 668 } else { 669 mutex_exit(&rmp->rm_lock); 670 qprocsoff(q); 671 } 672 673 q->q_ptr = NULL; 674 WR(q)->q_ptr = NULL; 675 mutex_destroy(&rmp->rm_lock); 676 cv_destroy(&rmp->rm_cwait); 677 kmem_free(rmp, sizeof (*rmp)); 678 return (0); 679 } 680 681 #ifdef DEBUG 682 int rpcmod_send_msg_up = 0; 683 int rpcmod_send_uderr = 0; 684 int rpcmod_send_dup = 0; 685 int rpcmod_send_dup_cnt = 0; 686 #endif 687 688 /* 689 * rpcmodrput - Module read put procedure. This is called from 690 * the module, driver, or stream head downstream. 691 */ 692 void 693 rpcmodrput(queue_t *q, mblk_t *mp) 694 { 695 struct rpcm *rmp; 696 union T_primitives *pptr; 697 int hdrsz; 698 699 TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_START, "rpcmodrput_start:"); 700 701 ASSERT(q != NULL); 702 rmp = (struct rpcm *)q->q_ptr; 703 704 if (rmp->rm_type == 0) { 705 freemsg(mp); 706 return; 707 } 708 709 #ifdef DEBUG 710 if (rpcmod_send_msg_up > 0) { 711 mblk_t *nmp = copymsg(mp); 712 if (nmp) { 713 putnext(q, nmp); 714 rpcmod_send_msg_up--; 715 } 716 } 717 if ((rpcmod_send_uderr > 0) && mp->b_datap->db_type == M_PROTO) { 718 mblk_t *nmp; 719 struct T_unitdata_ind *data; 720 struct T_uderror_ind *ud; 721 int d; 722 data = (struct T_unitdata_ind *)mp->b_rptr; 723 if (data->PRIM_type == T_UNITDATA_IND) { 724 d = sizeof (*ud) - sizeof (*data); 725 nmp = allocb(mp->b_wptr - mp->b_rptr + d, BPRI_HI); 726 if (nmp) { 727 ud = (struct T_uderror_ind *)nmp->b_rptr; 728 ud->PRIM_type = T_UDERROR_IND; 729 ud->DEST_length = data->SRC_length; 730 ud->DEST_offset = data->SRC_offset + d; 731 ud->OPT_length = data->OPT_length; 732 ud->OPT_offset = data->OPT_offset + d; 733 ud->ERROR_type = ENETDOWN; 734 if (data->SRC_length) { 735 bcopy(mp->b_rptr + 736 data->SRC_offset, 737 nmp->b_rptr + 738 ud->DEST_offset, 739 data->SRC_length); 740 } 741 if (data->OPT_length) { 742 bcopy(mp->b_rptr + 743 data->OPT_offset, 744 nmp->b_rptr + 745 ud->OPT_offset, 746 data->OPT_length); 747 } 748 nmp->b_wptr += d; 749 nmp->b_wptr += (mp->b_wptr - mp->b_rptr); 750 nmp->b_datap->db_type = M_PROTO; 751 putnext(q, nmp); 752 rpcmod_send_uderr--; 753 } 754 } 755 } 756 #endif 757 switch (mp->b_datap->db_type) { 758 default: 759 putnext(q, mp); 760 break; 761 762 case M_PROTO: 763 case M_PCPROTO: 764 ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (int32_t)); 765 pptr = (union T_primitives *)mp->b_rptr; 766 767 /* 768 * Forward this message to krpc if it is data. 769 */ 770 if (pptr->type == T_UNITDATA_IND) { 771 mblk_t *nmp; 772 773 /* 774 * Check if the module is being popped. 775 */ 776 mutex_enter(&rmp->rm_lock); 777 if (rmp->rm_state & RM_CLOSING) { 778 mutex_exit(&rmp->rm_lock); 779 putnext(q, mp); 780 break; 781 } 782 783 switch (rmp->rm_type) { 784 case RPC_CLIENT: 785 mutex_exit(&rmp->rm_lock); 786 hdrsz = mp->b_wptr - mp->b_rptr; 787 788 /* 789 * Make sure the header is sane. 790 */ 791 if (hdrsz < TUNITDATAINDSZ || 792 hdrsz < (pptr->unitdata_ind.OPT_length + 793 pptr->unitdata_ind.OPT_offset) || 794 hdrsz < (pptr->unitdata_ind.SRC_length + 795 pptr->unitdata_ind.SRC_offset)) { 796 freemsg(mp); 797 return; 798 } 799 800 /* 801 * Call clnt_clts_dispatch_notify, so that it 802 * can pass the message to the proper caller. 803 * Don't discard the header just yet since the 804 * client may need the sender's address. 805 */ 806 clnt_clts_dispatch_notify(mp, hdrsz, 807 rmp->rm_zoneid); 808 return; 809 case RPC_SERVER: 810 /* 811 * rm_krpc_cell is exclusively used by the kRPC 812 * CLTS server 813 */ 814 if (rmp->rm_krpc_cell) { 815 #ifdef DEBUG 816 /* 817 * Test duplicate request cache and 818 * rm_ref count handling by sending a 819 * duplicate every so often, if 820 * desired. 821 */ 822 if (rpcmod_send_dup && 823 rpcmod_send_dup_cnt++ % 824 rpcmod_send_dup) 825 nmp = copymsg(mp); 826 else 827 nmp = NULL; 828 #endif 829 /* 830 * Raise the reference count on this 831 * module to prevent it from being 832 * popped before krpc generates the 833 * reply. 834 */ 835 rmp->rm_ref++; 836 mutex_exit(&rmp->rm_lock); 837 838 /* 839 * Submit the message to krpc. 840 */ 841 svc_queuereq(q, mp); 842 #ifdef DEBUG 843 /* 844 * Send duplicate if we created one. 845 */ 846 if (nmp) { 847 mutex_enter(&rmp->rm_lock); 848 rmp->rm_ref++; 849 mutex_exit(&rmp->rm_lock); 850 svc_queuereq(q, nmp); 851 } 852 #endif 853 } else { 854 mutex_exit(&rmp->rm_lock); 855 freemsg(mp); 856 } 857 return; 858 default: 859 mutex_exit(&rmp->rm_lock); 860 freemsg(mp); 861 return; 862 } /* end switch(rmp->rm_type) */ 863 } else if (pptr->type == T_UDERROR_IND) { 864 mutex_enter(&rmp->rm_lock); 865 hdrsz = mp->b_wptr - mp->b_rptr; 866 867 /* 868 * Make sure the header is sane 869 */ 870 if (hdrsz < TUDERRORINDSZ || 871 hdrsz < (pptr->uderror_ind.OPT_length + 872 pptr->uderror_ind.OPT_offset) || 873 hdrsz < (pptr->uderror_ind.DEST_length + 874 pptr->uderror_ind.DEST_offset)) { 875 mutex_exit(&rmp->rm_lock); 876 freemsg(mp); 877 return; 878 } 879 880 /* 881 * In the case where a unit data error has been 882 * received, all we need to do is clear the message from 883 * the queue. 884 */ 885 mutex_exit(&rmp->rm_lock); 886 freemsg(mp); 887 RPCLOG(32, "rpcmodrput: unitdata error received at " 888 "%ld\n", gethrestime_sec()); 889 return; 890 } /* end else if (pptr->type == T_UDERROR_IND) */ 891 892 putnext(q, mp); 893 break; 894 } /* end switch (mp->b_datap->db_type) */ 895 896 TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_END, 897 "rpcmodrput_end:"); 898 /* 899 * Return codes are not looked at by the STREAMS framework. 900 */ 901 } 902 903 /* 904 * write put procedure 905 */ 906 void 907 rpcmodwput(queue_t *q, mblk_t *mp) 908 { 909 struct rpcm *rmp; 910 911 ASSERT(q != NULL); 912 913 switch (mp->b_datap->db_type) { 914 case M_PROTO: 915 case M_PCPROTO: 916 break; 917 default: 918 rpcmodwput_other(q, mp); 919 return; 920 } 921 922 /* 923 * Check to see if we can send the message downstream. 924 */ 925 if (canputnext(q)) { 926 putnext(q, mp); 927 return; 928 } 929 930 rmp = (struct rpcm *)q->q_ptr; 931 ASSERT(rmp != NULL); 932 933 /* 934 * The first canputnext failed. Try again except this time with the 935 * lock held, so that we can check the state of the stream to see if 936 * it is closing. If either of these conditions evaluate to true 937 * then send the meesage. 938 */ 939 mutex_enter(&rmp->rm_lock); 940 if (canputnext(q) || (rmp->rm_state & RM_CLOSING)) { 941 mutex_exit(&rmp->rm_lock); 942 putnext(q, mp); 943 } else { 944 /* 945 * canputnext failed again and the stream is not closing. 946 * Place the message on the queue and let the service 947 * procedure handle the message. 948 */ 949 mutex_exit(&rmp->rm_lock); 950 (void) putq(q, mp); 951 } 952 } 953 954 static void 955 rpcmodwput_other(queue_t *q, mblk_t *mp) 956 { 957 struct rpcm *rmp; 958 struct iocblk *iocp; 959 960 rmp = (struct rpcm *)q->q_ptr; 961 ASSERT(rmp != NULL); 962 963 switch (mp->b_datap->db_type) { 964 case M_IOCTL: 965 iocp = (struct iocblk *)mp->b_rptr; 966 ASSERT(iocp != NULL); 967 switch (iocp->ioc_cmd) { 968 case RPC_CLIENT: 969 case RPC_SERVER: 970 mutex_enter(&rmp->rm_lock); 971 rmp->rm_type = iocp->ioc_cmd; 972 mutex_exit(&rmp->rm_lock); 973 mp->b_datap->db_type = M_IOCACK; 974 qreply(q, mp); 975 return; 976 default: 977 /* 978 * pass the ioctl downstream and hope someone 979 * down there knows how to handle it. 980 */ 981 putnext(q, mp); 982 return; 983 } 984 default: 985 break; 986 } 987 /* 988 * This is something we definitely do not know how to handle, just 989 * pass the message downstream 990 */ 991 putnext(q, mp); 992 } 993 994 /* 995 * Module write service procedure. This is called by downstream modules 996 * for back enabling during flow control. 997 */ 998 void 999 rpcmodwsrv(queue_t *q) 1000 { 1001 struct rpcm *rmp; 1002 mblk_t *mp = NULL; 1003 1004 rmp = (struct rpcm *)q->q_ptr; 1005 ASSERT(rmp != NULL); 1006 1007 /* 1008 * Get messages that may be queued and send them down stream 1009 */ 1010 while ((mp = getq(q)) != NULL) { 1011 /* 1012 * Optimize the service procedure for the server-side, by 1013 * avoiding a call to canputnext(). 1014 */ 1015 if (rmp->rm_type == RPC_SERVER || canputnext(q)) { 1016 putnext(q, mp); 1017 continue; 1018 } 1019 (void) putbq(q, mp); 1020 return; 1021 } 1022 } 1023 1024 static void 1025 rpcmod_release(queue_t *q, mblk_t *bp) 1026 { 1027 struct rpcm *rmp; 1028 1029 /* 1030 * For now, just free the message. 1031 */ 1032 if (bp) 1033 freemsg(bp); 1034 rmp = (struct rpcm *)q->q_ptr; 1035 1036 mutex_enter(&rmp->rm_lock); 1037 rmp->rm_ref--; 1038 1039 if (rmp->rm_ref == 0 && (rmp->rm_state & RM_CLOSING)) { 1040 cv_broadcast(&rmp->rm_cwait); 1041 } 1042 1043 mutex_exit(&rmp->rm_lock); 1044 } 1045 1046 /* 1047 * This part of rpcmod is pushed on a connection-oriented transport for use 1048 * by RPC. It serves to bypass the Stream head, implements 1049 * the record marking protocol, and dispatches incoming RPC messages. 1050 */ 1051 1052 /* Default idle timer values */ 1053 #define MIR_CLNT_IDLE_TIMEOUT (5 * (60 * 1000L)) /* 5 minutes */ 1054 #define MIR_SVC_IDLE_TIMEOUT (6 * (60 * 1000L)) /* 6 minutes */ 1055 #define MIR_SVC_ORDREL_TIMEOUT (10 * (60 * 1000L)) /* 10 minutes */ 1056 #define MIR_LASTFRAG 0x80000000 /* Record marker */ 1057 1058 #define DLEN(mp) (mp->b_cont ? msgdsize(mp) : (mp->b_wptr - mp->b_rptr)) 1059 1060 #define MIR_SVC_QUIESCED(mir) \ 1061 (mir->mir_ref_cnt == 0 && mir->mir_inrservice == 0) 1062 1063 #define MIR_CLEAR_INRSRV(mir_ptr) { \ 1064 (mir_ptr)->mir_inrservice = 0; \ 1065 if ((mir_ptr)->mir_type == RPC_SERVER && \ 1066 (mir_ptr)->mir_closing) \ 1067 cv_signal(&(mir_ptr)->mir_condvar); \ 1068 } 1069 1070 /* 1071 * Don't block service procedure (and mir_close) if 1072 * we are in the process of closing. 1073 */ 1074 #define MIR_WCANPUTNEXT(mir_ptr, write_q) \ 1075 (canputnext(write_q) || ((mir_ptr)->mir_svc_no_more_msgs == 1)) 1076 1077 static int mir_clnt_dup_request(queue_t *q, mblk_t *mp); 1078 static void mir_rput_proto(queue_t *q, mblk_t *mp); 1079 static int mir_svc_policy_notify(queue_t *q, int event); 1080 static void mir_svc_release(queue_t *wq, mblk_t *mp); 1081 static void mir_svc_start(queue_t *wq); 1082 static void mir_svc_idle_start(queue_t *, mir_t *); 1083 static void mir_svc_idle_stop(queue_t *, mir_t *); 1084 static void mir_svc_start_close(queue_t *, mir_t *); 1085 static void mir_clnt_idle_do_stop(queue_t *); 1086 static void mir_clnt_idle_stop(queue_t *, mir_t *); 1087 static void mir_clnt_idle_start(queue_t *, mir_t *); 1088 static void mir_wput(queue_t *q, mblk_t *mp); 1089 static void mir_wput_other(queue_t *q, mblk_t *mp); 1090 static void mir_wsrv(queue_t *q); 1091 static void mir_disconnect(queue_t *, mir_t *ir); 1092 static int mir_check_len(queue_t *, int32_t, mblk_t *); 1093 static void mir_timer(void *); 1094 1095 extern void (*mir_rele)(queue_t *, mblk_t *); 1096 extern void (*mir_start)(queue_t *); 1097 extern void (*clnt_stop_idle)(queue_t *); 1098 1099 clock_t clnt_idle_timeout = MIR_CLNT_IDLE_TIMEOUT; 1100 clock_t svc_idle_timeout = MIR_SVC_IDLE_TIMEOUT; 1101 1102 /* 1103 * Timeout for subsequent notifications of idle connection. This is 1104 * typically used to clean up after a wedged orderly release. 1105 */ 1106 clock_t svc_ordrel_timeout = MIR_SVC_ORDREL_TIMEOUT; /* milliseconds */ 1107 1108 extern uint_t *clnt_max_msg_sizep; 1109 extern uint_t *svc_max_msg_sizep; 1110 uint_t clnt_max_msg_size = RPC_MAXDATASIZE; 1111 uint_t svc_max_msg_size = RPC_MAXDATASIZE; 1112 uint_t mir_krpc_cell_null; 1113 1114 static void 1115 mir_timer_stop(mir_t *mir) 1116 { 1117 timeout_id_t tid; 1118 1119 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1120 1121 /* 1122 * Since the mir_mutex lock needs to be released to call 1123 * untimeout(), we need to make sure that no other thread 1124 * can start/stop the timer (changing mir_timer_id) during 1125 * that time. The mir_timer_call bit and the mir_timer_cv 1126 * condition variable are used to synchronize this. Setting 1127 * mir_timer_call also tells mir_timer() (refer to the comments 1128 * in mir_timer()) that it does not need to do anything. 1129 */ 1130 while (mir->mir_timer_call) 1131 cv_wait(&mir->mir_timer_cv, &mir->mir_mutex); 1132 mir->mir_timer_call = B_TRUE; 1133 1134 if ((tid = mir->mir_timer_id) != 0) { 1135 mir->mir_timer_id = 0; 1136 mutex_exit(&mir->mir_mutex); 1137 (void) untimeout(tid); 1138 mutex_enter(&mir->mir_mutex); 1139 } 1140 mir->mir_timer_call = B_FALSE; 1141 cv_broadcast(&mir->mir_timer_cv); 1142 } 1143 1144 static void 1145 mir_timer_start(queue_t *q, mir_t *mir, clock_t intrvl) 1146 { 1147 timeout_id_t tid; 1148 1149 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1150 1151 while (mir->mir_timer_call) 1152 cv_wait(&mir->mir_timer_cv, &mir->mir_mutex); 1153 mir->mir_timer_call = B_TRUE; 1154 1155 if ((tid = mir->mir_timer_id) != 0) { 1156 mutex_exit(&mir->mir_mutex); 1157 (void) untimeout(tid); 1158 mutex_enter(&mir->mir_mutex); 1159 } 1160 /* Only start the timer when it is not closing. */ 1161 if (!mir->mir_closing) { 1162 mir->mir_timer_id = timeout(mir_timer, q, 1163 MSEC_TO_TICK(intrvl)); 1164 } 1165 mir->mir_timer_call = B_FALSE; 1166 cv_broadcast(&mir->mir_timer_cv); 1167 } 1168 1169 static int 1170 mir_clnt_dup_request(queue_t *q, mblk_t *mp) 1171 { 1172 mblk_t *mp1; 1173 uint32_t new_xid; 1174 uint32_t old_xid; 1175 1176 ASSERT(MUTEX_HELD(&((mir_t *)q->q_ptr)->mir_mutex)); 1177 new_xid = BE32_TO_U32(&mp->b_rptr[4]); 1178 /* 1179 * This loop is a bit tacky -- it walks the STREAMS list of 1180 * flow-controlled messages. 1181 */ 1182 if ((mp1 = q->q_first) != NULL) { 1183 do { 1184 old_xid = BE32_TO_U32(&mp1->b_rptr[4]); 1185 if (new_xid == old_xid) 1186 return (1); 1187 } while ((mp1 = mp1->b_next) != NULL); 1188 } 1189 return (0); 1190 } 1191 1192 static int 1193 mir_close(queue_t *q) 1194 { 1195 mir_t *mir; 1196 mblk_t *mp; 1197 bool_t queue_cleaned = FALSE; 1198 1199 RPCLOG(32, "rpcmod: mir_close of q 0x%p\n", (void *)q); 1200 mir = (mir_t *)q->q_ptr; 1201 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1202 mutex_enter(&mir->mir_mutex); 1203 if ((mp = mir->mir_head_mp) != NULL) { 1204 mir->mir_head_mp = (mblk_t *)0; 1205 freemsg(mp); 1206 } 1207 /* 1208 * Set mir_closing so we get notified when MIR_SVC_QUIESCED() 1209 * is TRUE. And mir_timer_start() won't start the timer again. 1210 */ 1211 mir->mir_closing = B_TRUE; 1212 mir_timer_stop(mir); 1213 1214 if (mir->mir_type == RPC_SERVER) { 1215 flushq(q, FLUSHDATA); /* Ditch anything waiting on read q */ 1216 1217 /* 1218 * This will prevent more requests from arriving and 1219 * will force rpcmod to ignore flow control. 1220 */ 1221 mir_svc_start_close(WR(q), mir); 1222 1223 while ((!MIR_SVC_QUIESCED(mir)) || mir->mir_inwservice == 1) { 1224 1225 if (mir->mir_ref_cnt && !mir->mir_inrservice && 1226 (queue_cleaned == FALSE)) { 1227 /* 1228 * call into SVC to clean the queue 1229 */ 1230 mutex_exit(&mir->mir_mutex); 1231 svc_queueclean(q); 1232 queue_cleaned = TRUE; 1233 mutex_enter(&mir->mir_mutex); 1234 continue; 1235 } 1236 1237 /* 1238 * Bugid 1253810 - Force the write service 1239 * procedure to send its messages, regardless 1240 * whether the downstream module is ready 1241 * to accept data. 1242 */ 1243 if (mir->mir_inwservice == 1) 1244 qenable(WR(q)); 1245 1246 cv_wait(&mir->mir_condvar, &mir->mir_mutex); 1247 } 1248 1249 mutex_exit(&mir->mir_mutex); 1250 qprocsoff(q); 1251 1252 /* Notify KRPC that this stream is going away. */ 1253 svc_queueclose(q); 1254 } else { 1255 mutex_exit(&mir->mir_mutex); 1256 qprocsoff(q); 1257 } 1258 1259 mutex_destroy(&mir->mir_mutex); 1260 cv_destroy(&mir->mir_condvar); 1261 cv_destroy(&mir->mir_timer_cv); 1262 kmem_free(mir, sizeof (mir_t)); 1263 return (0); 1264 } 1265 1266 /* 1267 * This is server side only (RPC_SERVER). 1268 * 1269 * Exit idle mode. 1270 */ 1271 static void 1272 mir_svc_idle_stop(queue_t *q, mir_t *mir) 1273 { 1274 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1275 ASSERT((q->q_flag & QREADR) == 0); 1276 ASSERT(mir->mir_type == RPC_SERVER); 1277 RPCLOG(16, "rpcmod: mir_svc_idle_stop of q 0x%p\n", (void *)q); 1278 1279 mir_timer_stop(mir); 1280 } 1281 1282 /* 1283 * This is server side only (RPC_SERVER). 1284 * 1285 * Start idle processing, which will include setting idle timer if the 1286 * stream is not being closed. 1287 */ 1288 static void 1289 mir_svc_idle_start(queue_t *q, mir_t *mir) 1290 { 1291 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 1292 ASSERT((q->q_flag & QREADR) == 0); 1293 ASSERT(mir->mir_type == RPC_SERVER); 1294 RPCLOG(16, "rpcmod: mir_svc_idle_start q 0x%p\n", (void *)q); 1295 1296 /* 1297 * Don't re-start idle timer if we are closing queues. 1298 */ 1299 if (mir->mir_closing) { 1300 RPCLOG(16, "mir_svc_idle_start - closing: 0x%p\n", 1301 (void *)q); 1302 1303 /* 1304 * We will call mir_svc_idle_start() whenever MIR_SVC_QUIESCED() 1305 * is true. When it is true, and we are in the process of 1306 * closing the stream, signal any thread waiting in 1307 * mir_close(). 1308 */ 1309 if (mir->mir_inwservice == 0) 1310 cv_signal(&mir->mir_condvar); 1311 1312 } else { 1313 RPCLOG(16, "mir_svc_idle_start - reset %s timer\n", 1314 mir->mir_ordrel_pending ? "ordrel" : "normal"); 1315 /* 1316 * Normal condition, start the idle timer. If an orderly 1317 * release has been sent, set the timeout to wait for the 1318 * client to close its side of the connection. Otherwise, 1319 * use the normal idle timeout. 1320 */ 1321 mir_timer_start(q, mir, mir->mir_ordrel_pending ? 1322 svc_ordrel_timeout : mir->mir_idle_timeout); 1323 } 1324 } 1325 1326 /* ARGSUSED */ 1327 static int 1328 mir_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1329 { 1330 mir_t *mir; 1331 1332 RPCLOG(32, "rpcmod: mir_open of q 0x%p\n", (void *)q); 1333 /* Set variables used directly by KRPC. */ 1334 if (!mir_rele) 1335 mir_rele = mir_svc_release; 1336 if (!mir_start) 1337 mir_start = mir_svc_start; 1338 if (!clnt_stop_idle) 1339 clnt_stop_idle = mir_clnt_idle_do_stop; 1340 if (!clnt_max_msg_sizep) 1341 clnt_max_msg_sizep = &clnt_max_msg_size; 1342 if (!svc_max_msg_sizep) 1343 svc_max_msg_sizep = &svc_max_msg_size; 1344 1345 /* Allocate a zero'ed out mir structure for this stream. */ 1346 mir = kmem_zalloc(sizeof (mir_t), KM_SLEEP); 1347 1348 /* 1349 * We set hold inbound here so that incoming messages will 1350 * be held on the read-side queue until the stream is completely 1351 * initialized with a RPC_CLIENT or RPC_SERVER ioctl. During 1352 * the ioctl processing, the flag is cleared and any messages that 1353 * arrived between the open and the ioctl are delivered to KRPC. 1354 * 1355 * Early data should never arrive on a client stream since 1356 * servers only respond to our requests and we do not send any. 1357 * until after the stream is initialized. Early data is 1358 * very common on a server stream where the client will start 1359 * sending data as soon as the connection is made (and this 1360 * is especially true with TCP where the protocol accepts the 1361 * connection before nfsd or KRPC is notified about it). 1362 */ 1363 1364 mir->mir_hold_inbound = 1; 1365 1366 /* 1367 * Start the record marker looking for a 4-byte header. When 1368 * this length is negative, it indicates that rpcmod is looking 1369 * for bytes to consume for the record marker header. When it 1370 * is positive, it holds the number of bytes that have arrived 1371 * for the current fragment and are being held in mir_header_mp. 1372 */ 1373 1374 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 1375 1376 mir->mir_zoneid = rpc_zoneid(); 1377 mutex_init(&mir->mir_mutex, NULL, MUTEX_DEFAULT, NULL); 1378 cv_init(&mir->mir_condvar, NULL, CV_DRIVER, NULL); 1379 cv_init(&mir->mir_timer_cv, NULL, CV_DRIVER, NULL); 1380 1381 q->q_ptr = (char *)mir; 1382 WR(q)->q_ptr = (char *)mir; 1383 1384 /* 1385 * We noenable the read-side queue because we don't want it 1386 * automatically enabled by putq. We enable it explicitly 1387 * in mir_wsrv when appropriate. (See additional comments on 1388 * flow control at the beginning of mir_rsrv.) 1389 */ 1390 noenable(q); 1391 1392 qprocson(q); 1393 return (0); 1394 } 1395 1396 /* 1397 * Read-side put routine for both the client and server side. Does the 1398 * record marking for incoming RPC messages, and when complete, dispatches 1399 * the message to either the client or server. 1400 */ 1401 static void 1402 mir_do_rput(queue_t *q, mblk_t *mp, int srv) 1403 { 1404 mblk_t *cont_mp; 1405 int excess; 1406 int32_t frag_len; 1407 int32_t frag_header; 1408 mblk_t *head_mp; 1409 int len; 1410 mir_t *mir; 1411 mblk_t *mp1; 1412 unsigned char *rptr; 1413 mblk_t *tail_mp; 1414 unsigned char *wptr; 1415 boolean_t stop_timer = B_FALSE; 1416 1417 mir = (mir_t *)q->q_ptr; 1418 ASSERT(mir != NULL); 1419 1420 /* 1421 * If the stream has not been set up as a RPC_CLIENT or RPC_SERVER 1422 * with the corresponding ioctl, then don't accept 1423 * any inbound data. This should never happen for streams 1424 * created by nfsd or client-side KRPC because they are careful 1425 * to set the mode of the stream before doing anything else. 1426 */ 1427 if (mir->mir_type == 0) { 1428 freemsg(mp); 1429 return; 1430 } 1431 1432 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1433 1434 switch (mp->b_datap->db_type) { 1435 case M_DATA: 1436 break; 1437 case M_PROTO: 1438 case M_PCPROTO: 1439 rptr = mp->b_rptr; 1440 if (mp->b_wptr - rptr < sizeof (uint32_t)) { 1441 RPCLOG(1, "mir_rput: runt TPI message (%d bytes)\n", 1442 (int)(mp->b_wptr - rptr)); 1443 freemsg(mp); 1444 return; 1445 } 1446 if (((union T_primitives *)rptr)->type != T_DATA_IND) { 1447 mir_rput_proto(q, mp); 1448 return; 1449 } 1450 1451 /* Throw away the T_DATA_IND block and continue with data. */ 1452 mp1 = mp; 1453 mp = mp->b_cont; 1454 freeb(mp1); 1455 break; 1456 case M_SETOPTS: 1457 /* 1458 * If a module on the stream is trying set the Stream head's 1459 * high water mark, then set our hiwater to the requested 1460 * value. We are the "stream head" for all inbound 1461 * data messages since messages are passed directly to KRPC. 1462 */ 1463 if ((mp->b_wptr - mp->b_rptr) >= sizeof (struct stroptions)) { 1464 struct stroptions *stropts; 1465 1466 stropts = (struct stroptions *)mp->b_rptr; 1467 if ((stropts->so_flags & SO_HIWAT) && 1468 !(stropts->so_flags & SO_BAND)) { 1469 (void) strqset(q, QHIWAT, 0, stropts->so_hiwat); 1470 } 1471 } 1472 putnext(q, mp); 1473 return; 1474 case M_FLUSH: 1475 RPCLOG(32, "mir_do_rput: ignoring M_FLUSH on q 0x%p. ", 1476 (void *)q); 1477 RPCLOG(32, "M_FLUSH is %x\n", (uint_t)*mp->b_rptr); 1478 1479 putnext(q, mp); 1480 return; 1481 default: 1482 putnext(q, mp); 1483 return; 1484 } 1485 1486 mutex_enter(&mir->mir_mutex); 1487 1488 /* 1489 * If this connection is closing, don't accept any new messages. 1490 */ 1491 if (mir->mir_svc_no_more_msgs) { 1492 ASSERT(mir->mir_type == RPC_SERVER); 1493 mutex_exit(&mir->mir_mutex); 1494 freemsg(mp); 1495 return; 1496 } 1497 1498 /* Get local copies for quicker access. */ 1499 frag_len = mir->mir_frag_len; 1500 frag_header = mir->mir_frag_header; 1501 head_mp = mir->mir_head_mp; 1502 tail_mp = mir->mir_tail_mp; 1503 1504 /* Loop, processing each message block in the mp chain separately. */ 1505 do { 1506 /* 1507 * cont_mp is used in the do/while condition below to 1508 * walk to the next block in the STREAMS message. 1509 * mp->b_cont may be nil'ed during processing so we 1510 * can't rely on it to find the next block. 1511 */ 1512 cont_mp = mp->b_cont; 1513 1514 /* 1515 * Get local copies of rptr and wptr for our processing. 1516 * These always point into "mp" (the current block being 1517 * processed), but rptr is updated as we consume any 1518 * record header in this message, and wptr is updated to 1519 * point to the end of the data for the current fragment, 1520 * if it ends in this block. The main point is that 1521 * they are not always the same as b_rptr and b_wptr. 1522 * b_rptr and b_wptr will be updated when appropriate. 1523 */ 1524 rptr = mp->b_rptr; 1525 wptr = mp->b_wptr; 1526 same_mblk:; 1527 len = (int)(wptr - rptr); 1528 if (len <= 0) { 1529 /* 1530 * If we have processed all of the data in the message 1531 * or the block is empty to begin with, then we're 1532 * done with this block and can go on to cont_mp, 1533 * if there is one. 1534 * 1535 * First, we check to see if the current block is 1536 * now zero-length and, if so, we free it. 1537 * This happens when either the block was empty 1538 * to begin with or we consumed all of the data 1539 * for the record marking header. 1540 */ 1541 if (rptr <= mp->b_rptr) { 1542 /* 1543 * If head_mp is non-NULL, add cont_mp to the 1544 * mblk list. XXX But there is a possibility 1545 * that tail_mp = mp or even head_mp = mp XXX 1546 */ 1547 if (head_mp) { 1548 if (head_mp == mp) 1549 head_mp = NULL; 1550 else if (tail_mp != mp) { 1551 ASSERT((tail_mp->b_cont == NULL) || (tail_mp->b_cont == mp)); 1552 tail_mp->b_cont = cont_mp; 1553 /* 1554 * It's possible that, because 1555 * of a very short mblk (0-3 1556 * bytes), we've ended up here 1557 * and that cont_mp could be 1558 * NULL (if we're at the end 1559 * of an mblk chain). If so, 1560 * don't set tail_mp to 1561 * cont_mp, because the next 1562 * time we access it, we'll 1563 * dereference a NULL pointer 1564 * and crash. Just leave 1565 * tail_mp pointing at the 1566 * current end of chain. 1567 */ 1568 if (cont_mp) 1569 tail_mp = cont_mp; 1570 } else { 1571 mblk_t *smp = head_mp; 1572 1573 while ((smp->b_cont != NULL) && 1574 (smp->b_cont != mp)) 1575 smp = smp->b_cont; 1576 smp->b_cont = cont_mp; 1577 /* 1578 * Don't set tail_mp to cont_mp 1579 * if it's NULL. Instead, set 1580 * tail_mp to smp, which is the 1581 * end of the chain starting 1582 * at head_mp. 1583 */ 1584 if (cont_mp) 1585 tail_mp = cont_mp; 1586 else 1587 tail_mp = smp; 1588 } 1589 } 1590 freeb(mp); 1591 } 1592 continue; 1593 } 1594 1595 /* 1596 * frag_len starts at -4 and is incremented past the record 1597 * marking header to 0, and then becomes positive as real data 1598 * bytes are received for the message. While frag_len is less 1599 * than zero, we need more bytes for the record marking 1600 * header. 1601 */ 1602 if (frag_len < 0) { 1603 uchar_t *up = rptr; 1604 /* 1605 * Collect as many bytes as we need for the record 1606 * marking header and that are available in this block. 1607 */ 1608 do { 1609 --len; 1610 frag_len++; 1611 frag_header <<= 8; 1612 frag_header += (*up++ & 0xFF); 1613 } while (len > 0 && frag_len < 0); 1614 1615 if (rptr == mp->b_rptr) { 1616 /* 1617 * The record header is located at the 1618 * beginning of the block, so just walk 1619 * b_rptr past it. 1620 */ 1621 mp->b_rptr = rptr = up; 1622 } else { 1623 /* 1624 * The record header is located in the middle 1625 * of a block, so copy any remaining data up. 1626 * This happens when an RPC message is 1627 * fragmented into multiple pieces and 1628 * a middle (or end) fragment immediately 1629 * follows a previous fragment in the same 1630 * message block. 1631 */ 1632 wptr = &rptr[len]; 1633 mp->b_wptr = wptr; 1634 if (len) { 1635 RPCLOG(32, "mir_do_rput: copying %d " 1636 "bytes of data up", len); 1637 RPCLOG(32, " db_ref %d\n", 1638 (uint_t)mp->b_datap->db_ref); 1639 bcopy(up, rptr, len); 1640 } 1641 } 1642 1643 /* 1644 * If we haven't received the complete record header 1645 * yet, then loop around to get the next block in the 1646 * STREAMS message. The logic at same_mblk label will 1647 * free the current block if it has become empty. 1648 */ 1649 if (frag_len < 0) { 1650 RPCLOG(32, "mir_do_rput: frag_len is still < 0 " 1651 "(%d)", len); 1652 goto same_mblk; 1653 } 1654 1655 #ifdef RPCDEBUG 1656 if ((frag_header & MIR_LASTFRAG) == 0) { 1657 RPCLOG0(32, "mir_do_rput: multi-fragment " 1658 "record\n"); 1659 } 1660 { 1661 uint_t l = frag_header & ~MIR_LASTFRAG; 1662 1663 if (l != 0 && mir->mir_max_msg_sizep && 1664 l >= *mir->mir_max_msg_sizep) { 1665 RPCLOG(32, "mir_do_rput: fragment size" 1666 " (%d) > maximum", l); 1667 RPCLOG(32, " (%u)\n", 1668 *mir->mir_max_msg_sizep); 1669 } 1670 } 1671 #endif 1672 /* 1673 * At this point we have retrieved the complete record 1674 * header for this fragment. If the current block is 1675 * empty, then we need to free it and walk to the next 1676 * block. 1677 */ 1678 if (mp->b_rptr >= wptr) { 1679 /* 1680 * If this is not the last fragment or if we 1681 * have not received all the data for this 1682 * RPC message, then loop around to the next 1683 * block. 1684 */ 1685 if (!(frag_header & MIR_LASTFRAG) || 1686 (frag_len - 1687 (frag_header & ~MIR_LASTFRAG)) || 1688 !head_mp) 1689 goto same_mblk; 1690 1691 /* 1692 * Quick walk to next block in the 1693 * STREAMS message. 1694 */ 1695 freeb(mp); 1696 continue; 1697 } 1698 } 1699 1700 /* 1701 * We've collected the complete record header. The data 1702 * in the current block is added to the end of the RPC 1703 * message. Note that tail_mp is the same as mp after 1704 * this linkage. 1705 */ 1706 if (!head_mp) 1707 head_mp = mp; 1708 else if (tail_mp != mp) { 1709 ASSERT((tail_mp->b_cont == NULL) || 1710 (tail_mp->b_cont == mp)); 1711 tail_mp->b_cont = mp; 1712 } 1713 tail_mp = mp; 1714 1715 /* 1716 * Add the length of this block to the accumulated 1717 * fragment length. 1718 */ 1719 frag_len += len; 1720 excess = frag_len - (frag_header & ~MIR_LASTFRAG); 1721 /* 1722 * If we have not received all the data for this fragment, 1723 * then walk to the next block. 1724 */ 1725 if (excess < 0) 1726 continue; 1727 1728 /* 1729 * We've received a complete fragment, so reset frag_len 1730 * for the next one. 1731 */ 1732 frag_len = -(int32_t)sizeof (uint32_t); 1733 1734 /* 1735 * Update rptr to point to the beginning of the next 1736 * fragment in this block. If there are no more bytes 1737 * in the block (excess is 0), then rptr will be equal 1738 * to wptr. 1739 */ 1740 rptr = wptr - excess; 1741 1742 /* 1743 * Now we check to see if this fragment is the last one in 1744 * the RPC message. 1745 */ 1746 if (!(frag_header & MIR_LASTFRAG)) { 1747 /* 1748 * This isn't the last one, so start processing the 1749 * next fragment. 1750 */ 1751 frag_header = 0; 1752 1753 /* 1754 * If excess is 0, the next fragment 1755 * starts at the beginning of the next block -- 1756 * we "continue" to the end of the while loop and 1757 * walk to cont_mp. 1758 */ 1759 if (excess == 0) 1760 continue; 1761 RPCLOG0(32, "mir_do_rput: multi-fragment message with " 1762 "two or more fragments in one mblk\n"); 1763 1764 /* 1765 * If excess is non-0, then the next fragment starts 1766 * in this block. rptr points to the beginning 1767 * of the next fragment and we "goto same_mblk" 1768 * to continue processing. 1769 */ 1770 goto same_mblk; 1771 } 1772 1773 /* 1774 * We've got a complete RPC message. Before passing it 1775 * upstream, check to see if there is extra data in this 1776 * message block. If so, then we separate the excess 1777 * from the complete message. The excess data is processed 1778 * after the current message goes upstream. 1779 */ 1780 if (excess > 0) { 1781 RPCLOG(32, "mir_do_rput: end of record, but excess " 1782 "data (%d bytes) in this mblk. dupb/copyb " 1783 "needed\n", excess); 1784 1785 /* Duplicate only the overlapping block. */ 1786 mp1 = dupb(tail_mp); 1787 1788 /* 1789 * dupb() might have failed due to ref count wrap around 1790 * so try a copyb(). 1791 */ 1792 if (mp1 == NULL) 1793 mp1 = copyb(tail_mp); 1794 1795 /* 1796 * Do not use bufcall() to schedule a "buffer 1797 * availability event." The reason is that 1798 * bufcall() has problems. For example, if memory 1799 * runs out, bufcall() itself will fail since it 1800 * needs to allocate memory. The most appropriate 1801 * action right now is to disconnect this connection 1802 * as the system is under stress. We should try to 1803 * free up resources. 1804 */ 1805 if (mp1 == NULL) { 1806 freemsg(head_mp); 1807 RPCLOG0(1, "mir_do_rput: dupb/copyb failed\n"); 1808 mir->mir_frag_header = 0; 1809 mir->mir_frag_len = -(int)sizeof (uint32_t); 1810 mir->mir_head_mp = NULL; 1811 mir->mir_tail_mp = NULL; 1812 1813 mir_disconnect(q, mir); 1814 return; 1815 } 1816 1817 /* 1818 * The new message block is linked with the 1819 * continuation block in cont_mp. We then point 1820 * cont_mp to the new block so that we will 1821 * process it next. 1822 */ 1823 mp1->b_cont = cont_mp; 1824 cont_mp = mp1; 1825 /* 1826 * Data in the new block begins at the 1827 * next fragment (rptr). 1828 */ 1829 cont_mp->b_rptr += (rptr - tail_mp->b_rptr); 1830 ASSERT(cont_mp->b_rptr >= cont_mp->b_datap->db_base); 1831 ASSERT(cont_mp->b_rptr <= cont_mp->b_wptr); 1832 1833 /* Data in the current fragment ends at rptr. */ 1834 tail_mp->b_wptr = rptr; 1835 ASSERT(tail_mp->b_wptr <= tail_mp->b_datap->db_lim); 1836 ASSERT(tail_mp->b_wptr >= tail_mp->b_rptr); 1837 1838 } 1839 1840 /* tail_mp is the last block with data for this RPC message. */ 1841 tail_mp->b_cont = NULL; 1842 1843 /* Pass the RPC message to the current consumer. */ 1844 switch (mir->mir_type) { 1845 case RPC_CLIENT: 1846 if (clnt_dispatch_notify(head_mp, mir->mir_zoneid)) { 1847 /* 1848 * Mark this stream as active. This marker 1849 * is used in mir_timer(). 1850 */ 1851 1852 mir->mir_clntreq = 1; 1853 mir->mir_use_timestamp = lbolt; 1854 } else 1855 freemsg(head_mp); 1856 break; 1857 1858 case RPC_SERVER: 1859 /* 1860 * Check for flow control before passing the 1861 * message to KRPC. 1862 */ 1863 1864 if (!mir->mir_hold_inbound) { 1865 if (mir->mir_krpc_cell) { 1866 /* 1867 * If the reference count is 0 1868 * (not including this request), 1869 * then the stream is transitioning 1870 * from idle to non-idle. In this case, 1871 * we cancel the idle timer. 1872 */ 1873 if (mir->mir_ref_cnt++ == 0) 1874 stop_timer = B_TRUE; 1875 if (mir_check_len(q, 1876 (int32_t)msgdsize(mp), mp)) 1877 return; 1878 svc_queuereq(q, head_mp); /* to KRPC */ 1879 } else { 1880 /* 1881 * Count # of times this happens. Should 1882 * be never, but experience shows 1883 * otherwise. 1884 */ 1885 mir_krpc_cell_null++; 1886 freemsg(head_mp); 1887 } 1888 1889 } else { 1890 /* 1891 * If the outbound side of the stream is 1892 * flow controlled, then hold this message 1893 * until client catches up. mir_hold_inbound 1894 * is set in mir_wput and cleared in mir_wsrv. 1895 */ 1896 if (srv) 1897 (void) putbq(q, head_mp); 1898 else 1899 (void) putq(q, head_mp); 1900 mir->mir_inrservice = B_TRUE; 1901 } 1902 break; 1903 default: 1904 RPCLOG(1, "mir_rput: unknown mir_type %d\n", 1905 mir->mir_type); 1906 freemsg(head_mp); 1907 break; 1908 } 1909 1910 /* 1911 * Reset head_mp and frag_header since we're starting on a 1912 * new RPC fragment and message. 1913 */ 1914 head_mp = NULL; 1915 tail_mp = NULL; 1916 frag_header = 0; 1917 } while ((mp = cont_mp) != NULL); 1918 1919 /* 1920 * Do a sanity check on the message length. If this message is 1921 * getting excessively large, shut down the connection. 1922 */ 1923 if (head_mp != NULL && mir->mir_setup_complete && 1924 mir_check_len(q, frag_len, head_mp)) 1925 return; 1926 1927 /* Save our local copies back in the mir structure. */ 1928 mir->mir_frag_header = frag_header; 1929 mir->mir_frag_len = frag_len; 1930 mir->mir_head_mp = head_mp; 1931 mir->mir_tail_mp = tail_mp; 1932 1933 /* 1934 * The timer is stopped after the whole message chain is processed. 1935 * The reason is that stopping the timer releases the mir_mutex 1936 * lock temporarily. This means that the request can be serviced 1937 * while we are still processing the message chain. This is not 1938 * good. So we stop the timer here instead. 1939 * 1940 * Note that if the timer fires before we stop it, it will not 1941 * do any harm as MIR_SVC_QUIESCED() is false and mir_timer() 1942 * will just return; 1943 */ 1944 if (stop_timer) { 1945 RPCLOG(16, "mir_do_rput stopping idle timer on 0x%p because " 1946 "ref cnt going to non zero\n", (void *) WR(q)); 1947 mir_svc_idle_stop(WR(q), mir); 1948 } 1949 mutex_exit(&mir->mir_mutex); 1950 } 1951 1952 static void 1953 mir_rput(queue_t *q, mblk_t *mp) 1954 { 1955 mir_do_rput(q, mp, 0); 1956 } 1957 1958 static void 1959 mir_rput_proto(queue_t *q, mblk_t *mp) 1960 { 1961 mir_t *mir = (mir_t *)q->q_ptr; 1962 uint32_t type; 1963 uint32_t reason = 0; 1964 1965 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 1966 1967 type = ((union T_primitives *)mp->b_rptr)->type; 1968 switch (mir->mir_type) { 1969 case RPC_CLIENT: 1970 switch (type) { 1971 case T_DISCON_IND: 1972 reason = ((struct T_discon_ind *) 1973 (mp->b_rptr))->DISCON_reason; 1974 /*FALLTHROUGH*/ 1975 case T_ORDREL_IND: 1976 mutex_enter(&mir->mir_mutex); 1977 if (mir->mir_head_mp) { 1978 freemsg(mir->mir_head_mp); 1979 mir->mir_head_mp = (mblk_t *)0; 1980 mir->mir_tail_mp = (mblk_t *)0; 1981 } 1982 /* 1983 * We are disconnecting, but not necessarily 1984 * closing. By not closing, we will fail to 1985 * pick up a possibly changed global timeout value, 1986 * unless we store it now. 1987 */ 1988 mir->mir_idle_timeout = clnt_idle_timeout; 1989 mir_clnt_idle_stop(WR(q), mir); 1990 1991 /* 1992 * Even though we are unconnected, we still 1993 * leave the idle timer going on the client. The 1994 * reason for is that if we've disconnected due 1995 * to a server-side disconnect, reset, or connection 1996 * timeout, there is a possibility the client may 1997 * retry the RPC request. This retry needs to done on 1998 * the same bound address for the server to interpret 1999 * it as such. However, we don't want 2000 * to wait forever for that possibility. If the 2001 * end-point stays unconnected for mir_idle_timeout 2002 * units of time, then that is a signal to the 2003 * connection manager to give up waiting for the 2004 * application (eg. NFS) to send a retry. 2005 */ 2006 mir_clnt_idle_start(WR(q), mir); 2007 mutex_exit(&mir->mir_mutex); 2008 clnt_dispatch_notifyall(WR(q), type, reason); 2009 freemsg(mp); 2010 return; 2011 case T_ERROR_ACK: 2012 { 2013 struct T_error_ack *terror; 2014 2015 terror = (struct T_error_ack *)mp->b_rptr; 2016 RPCLOG(1, "mir_rput_proto T_ERROR_ACK for queue 0x%p", 2017 (void *)q); 2018 RPCLOG(1, " ERROR_prim: %s,", 2019 rpc_tpiprim2name(terror->ERROR_prim)); 2020 RPCLOG(1, " TLI_error: %s,", 2021 rpc_tpierr2name(terror->TLI_error)); 2022 RPCLOG(1, " UNIX_error: %d\n", terror->UNIX_error); 2023 if (terror->ERROR_prim == T_DISCON_REQ) { 2024 clnt_dispatch_notifyall(WR(q), type, reason); 2025 freemsg(mp); 2026 return; 2027 } else { 2028 if (clnt_dispatch_notifyconn(WR(q), mp)) 2029 return; 2030 } 2031 break; 2032 } 2033 case T_OK_ACK: 2034 { 2035 struct T_ok_ack *tok = (struct T_ok_ack *)mp->b_rptr; 2036 2037 if (tok->CORRECT_prim == T_DISCON_REQ) { 2038 clnt_dispatch_notifyall(WR(q), type, reason); 2039 freemsg(mp); 2040 return; 2041 } else { 2042 if (clnt_dispatch_notifyconn(WR(q), mp)) 2043 return; 2044 } 2045 break; 2046 } 2047 case T_CONN_CON: 2048 case T_INFO_ACK: 2049 case T_OPTMGMT_ACK: 2050 if (clnt_dispatch_notifyconn(WR(q), mp)) 2051 return; 2052 break; 2053 case T_BIND_ACK: 2054 break; 2055 default: 2056 RPCLOG(1, "mir_rput: unexpected message %d " 2057 "for KRPC client\n", 2058 ((union T_primitives *)mp->b_rptr)->type); 2059 break; 2060 } 2061 break; 2062 2063 case RPC_SERVER: 2064 switch (type) { 2065 case T_BIND_ACK: 2066 { 2067 struct T_bind_ack *tbind; 2068 2069 /* 2070 * If this is a listening stream, then shut 2071 * off the idle timer. 2072 */ 2073 tbind = (struct T_bind_ack *)mp->b_rptr; 2074 if (tbind->CONIND_number > 0) { 2075 mutex_enter(&mir->mir_mutex); 2076 mir_svc_idle_stop(WR(q), mir); 2077 2078 /* 2079 * mark this as a listen endpoint 2080 * for special handling. 2081 */ 2082 2083 mir->mir_listen_stream = 1; 2084 mutex_exit(&mir->mir_mutex); 2085 } 2086 break; 2087 } 2088 case T_DISCON_IND: 2089 case T_ORDREL_IND: 2090 RPCLOG(16, "mir_rput_proto: got %s indication\n", 2091 type == T_DISCON_IND ? "disconnect" 2092 : "orderly release"); 2093 2094 /* 2095 * For listen endpoint just pass 2096 * on the message. 2097 */ 2098 2099 if (mir->mir_listen_stream) 2100 break; 2101 2102 mutex_enter(&mir->mir_mutex); 2103 2104 /* 2105 * If client wants to break off connection, record 2106 * that fact. 2107 */ 2108 mir_svc_start_close(WR(q), mir); 2109 2110 /* 2111 * If we are idle, then send the orderly release 2112 * or disconnect indication to nfsd. 2113 */ 2114 if (MIR_SVC_QUIESCED(mir)) { 2115 mutex_exit(&mir->mir_mutex); 2116 break; 2117 } 2118 2119 RPCLOG(16, "mir_rput_proto: not idle, so " 2120 "disconnect/ord rel indication not passed " 2121 "upstream on 0x%p\n", (void *)q); 2122 2123 /* 2124 * Hold the indication until we get idle 2125 * If there already is an indication stored, 2126 * replace it if the new one is a disconnect. The 2127 * reasoning is that disconnection takes less time 2128 * to process, and once a client decides to 2129 * disconnect, we should do that. 2130 */ 2131 if (mir->mir_svc_pend_mp) { 2132 if (type == T_DISCON_IND) { 2133 RPCLOG(16, "mir_rput_proto: replacing" 2134 " held disconnect/ord rel" 2135 " indication with disconnect on" 2136 " 0x%p\n", (void *)q); 2137 2138 freemsg(mir->mir_svc_pend_mp); 2139 mir->mir_svc_pend_mp = mp; 2140 } else { 2141 RPCLOG(16, "mir_rput_proto: already " 2142 "held a disconnect/ord rel " 2143 "indication. freeing ord rel " 2144 "ind on 0x%p\n", (void *)q); 2145 freemsg(mp); 2146 } 2147 } else 2148 mir->mir_svc_pend_mp = mp; 2149 2150 mutex_exit(&mir->mir_mutex); 2151 return; 2152 2153 default: 2154 /* nfsd handles server-side non-data messages. */ 2155 break; 2156 } 2157 break; 2158 2159 default: 2160 break; 2161 } 2162 2163 putnext(q, mp); 2164 } 2165 2166 /* 2167 * The server-side read queues are used to hold inbound messages while 2168 * outbound flow control is exerted. When outbound flow control is 2169 * relieved, mir_wsrv qenables the read-side queue. Read-side queues 2170 * are not enabled by STREAMS and are explicitly noenable'ed in mir_open. 2171 * 2172 * For the server side, we have two types of messages queued. The first type 2173 * are messages that are ready to be XDR decoded and and then sent to the 2174 * RPC program's dispatch routine. The second type are "raw" messages that 2175 * haven't been processed, i.e. assembled from rpc record fragements into 2176 * full requests. The only time we will see the second type of message 2177 * queued is if we have a memory allocation failure while processing a 2178 * a raw message. The field mir_first_non_processed_mblk will mark the 2179 * first such raw message. So the flow for server side is: 2180 * 2181 * - send processed queued messages to kRPC until we run out or find 2182 * one that needs additional processing because we were short on memory 2183 * earlier 2184 * - process a message that was deferred because of lack of 2185 * memory 2186 * - continue processing messages until the queue empties or we 2187 * have to stop because of lack of memory 2188 * - during each of the above phase, if the queue is empty and 2189 * there are no pending messages that were passed to the RPC 2190 * layer, send upstream the pending disconnect/ordrel indication if 2191 * there is one 2192 * 2193 * The read-side queue is also enabled by a bufcall callback if dupmsg 2194 * fails in mir_rput. 2195 */ 2196 static void 2197 mir_rsrv(queue_t *q) 2198 { 2199 mir_t *mir; 2200 mblk_t *mp; 2201 mblk_t *cmp = NULL; 2202 boolean_t stop_timer = B_FALSE; 2203 2204 mir = (mir_t *)q->q_ptr; 2205 mutex_enter(&mir->mir_mutex); 2206 2207 mp = NULL; 2208 switch (mir->mir_type) { 2209 case RPC_SERVER: 2210 if (mir->mir_ref_cnt == 0) 2211 mir->mir_hold_inbound = 0; 2212 if (mir->mir_hold_inbound) { 2213 2214 ASSERT(cmp == NULL); 2215 if (q->q_first == NULL) { 2216 2217 MIR_CLEAR_INRSRV(mir); 2218 2219 if (MIR_SVC_QUIESCED(mir)) { 2220 cmp = mir->mir_svc_pend_mp; 2221 mir->mir_svc_pend_mp = NULL; 2222 } 2223 } 2224 2225 mutex_exit(&mir->mir_mutex); 2226 2227 if (cmp != NULL) { 2228 RPCLOG(16, "mir_rsrv: line %d: sending a held " 2229 "disconnect/ord rel indication upstream\n", 2230 __LINE__); 2231 putnext(q, cmp); 2232 } 2233 2234 return; 2235 } 2236 while (mp = getq(q)) { 2237 if (mir->mir_krpc_cell && 2238 (mir->mir_svc_no_more_msgs == 0)) { 2239 /* 2240 * If we were idle, turn off idle timer since 2241 * we aren't idle any more. 2242 */ 2243 if (mir->mir_ref_cnt++ == 0) 2244 stop_timer = B_TRUE; 2245 if (mir_check_len(q, 2246 (int32_t)msgdsize(mp), mp)) 2247 return; 2248 svc_queuereq(q, mp); 2249 } else { 2250 /* 2251 * Count # of times this happens. Should be 2252 * never, but experience shows otherwise. 2253 */ 2254 if (mir->mir_krpc_cell == NULL) 2255 mir_krpc_cell_null++; 2256 freemsg(mp); 2257 } 2258 } 2259 break; 2260 case RPC_CLIENT: 2261 break; 2262 default: 2263 RPCLOG(1, "mir_rsrv: unexpected mir_type %d\n", mir->mir_type); 2264 2265 if (q->q_first == NULL) 2266 MIR_CLEAR_INRSRV(mir); 2267 2268 mutex_exit(&mir->mir_mutex); 2269 2270 return; 2271 } 2272 2273 /* 2274 * The timer is stopped after all the messages are processed. 2275 * The reason is that stopping the timer releases the mir_mutex 2276 * lock temporarily. This means that the request can be serviced 2277 * while we are still processing the message queue. This is not 2278 * good. So we stop the timer here instead. 2279 */ 2280 if (stop_timer) { 2281 RPCLOG(16, "mir_rsrv stopping idle timer on 0x%p because ref " 2282 "cnt going to non zero\n", (void *)WR(q)); 2283 mir_svc_idle_stop(WR(q), mir); 2284 } 2285 2286 if (q->q_first == NULL) { 2287 2288 MIR_CLEAR_INRSRV(mir); 2289 2290 ASSERT(cmp == NULL); 2291 if (mir->mir_type == RPC_SERVER && MIR_SVC_QUIESCED(mir)) { 2292 cmp = mir->mir_svc_pend_mp; 2293 mir->mir_svc_pend_mp = NULL; 2294 } 2295 2296 mutex_exit(&mir->mir_mutex); 2297 2298 if (cmp != NULL) { 2299 RPCLOG(16, "mir_rsrv: line %d: sending a held " 2300 "disconnect/ord rel indication upstream\n", 2301 __LINE__); 2302 putnext(q, cmp); 2303 } 2304 2305 return; 2306 } 2307 mutex_exit(&mir->mir_mutex); 2308 } 2309 2310 static int mir_svc_policy_fails; 2311 2312 /* 2313 * Called to send an event code to nfsd/lockd so that it initiates 2314 * connection close. 2315 */ 2316 static int 2317 mir_svc_policy_notify(queue_t *q, int event) 2318 { 2319 mblk_t *mp; 2320 #ifdef DEBUG 2321 mir_t *mir = (mir_t *)q->q_ptr; 2322 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2323 #endif 2324 ASSERT(q->q_flag & QREADR); 2325 2326 /* 2327 * Create an M_DATA message with the event code and pass it to the 2328 * Stream head (nfsd or whoever created the stream will consume it). 2329 */ 2330 mp = allocb(sizeof (int), BPRI_HI); 2331 2332 if (!mp) { 2333 2334 mir_svc_policy_fails++; 2335 RPCLOG(16, "mir_svc_policy_notify: could not allocate event " 2336 "%d\n", event); 2337 return (ENOMEM); 2338 } 2339 2340 U32_TO_BE32(event, mp->b_rptr); 2341 mp->b_wptr = mp->b_rptr + sizeof (int); 2342 putnext(q, mp); 2343 return (0); 2344 } 2345 2346 /* 2347 * Server side: start the close phase. We want to get this rpcmod slot in an 2348 * idle state before mir_close() is called. 2349 */ 2350 static void 2351 mir_svc_start_close(queue_t *wq, mir_t *mir) 2352 { 2353 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2354 ASSERT((wq->q_flag & QREADR) == 0); 2355 ASSERT(mir->mir_type == RPC_SERVER); 2356 2357 2358 /* 2359 * Do not accept any more messages. 2360 */ 2361 mir->mir_svc_no_more_msgs = 1; 2362 2363 /* 2364 * Next two statements will make the read service procedure invoke 2365 * svc_queuereq() on everything stuck in the streams read queue. 2366 * It's not necessary because enabling the write queue will 2367 * have the same effect, but why not speed the process along? 2368 */ 2369 mir->mir_hold_inbound = 0; 2370 qenable(RD(wq)); 2371 2372 /* 2373 * Meanwhile force the write service procedure to send the 2374 * responses downstream, regardless of flow control. 2375 */ 2376 qenable(wq); 2377 } 2378 2379 /* 2380 * This routine is called directly by KRPC after a request is completed, 2381 * whether a reply was sent or the request was dropped. 2382 */ 2383 static void 2384 mir_svc_release(queue_t *wq, mblk_t *mp) 2385 { 2386 mir_t *mir = (mir_t *)wq->q_ptr; 2387 mblk_t *cmp = NULL; 2388 2389 ASSERT((wq->q_flag & QREADR) == 0); 2390 if (mp) 2391 freemsg(mp); 2392 2393 mutex_enter(&mir->mir_mutex); 2394 2395 /* 2396 * Start idle processing if this is the last reference. 2397 */ 2398 if ((mir->mir_ref_cnt == 1) && (mir->mir_inrservice == 0)) { 2399 2400 RPCLOG(16, "mir_svc_release starting idle timer on 0x%p " 2401 "because ref cnt is zero\n", (void *) wq); 2402 2403 cmp = mir->mir_svc_pend_mp; 2404 mir->mir_svc_pend_mp = NULL; 2405 mir_svc_idle_start(wq, mir); 2406 } 2407 2408 mir->mir_ref_cnt--; 2409 ASSERT(mir->mir_ref_cnt >= 0); 2410 2411 /* 2412 * Wake up the thread waiting to close. 2413 */ 2414 2415 if ((mir->mir_ref_cnt == 0) && mir->mir_closing) 2416 cv_signal(&mir->mir_condvar); 2417 2418 mutex_exit(&mir->mir_mutex); 2419 2420 if (cmp) { 2421 RPCLOG(16, "mir_svc_release: sending a held " 2422 "disconnect/ord rel indication upstream on queue 0x%p\n", 2423 (void *)RD(wq)); 2424 2425 putnext(RD(wq), cmp); 2426 } 2427 } 2428 2429 /* 2430 * This routine is called by server-side KRPC when it is ready to 2431 * handle inbound messages on the stream. 2432 */ 2433 static void 2434 mir_svc_start(queue_t *wq) 2435 { 2436 mir_t *mir = (mir_t *)wq->q_ptr; 2437 2438 /* 2439 * no longer need to take the mir_mutex because the 2440 * mir_setup_complete field has been moved out of 2441 * the binary field protected by the mir_mutex. 2442 */ 2443 2444 mir->mir_setup_complete = 1; 2445 qenable(RD(wq)); 2446 } 2447 2448 /* 2449 * client side wrapper for stopping timer with normal idle timeout. 2450 */ 2451 static void 2452 mir_clnt_idle_stop(queue_t *wq, mir_t *mir) 2453 { 2454 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2455 ASSERT((wq->q_flag & QREADR) == 0); 2456 ASSERT(mir->mir_type == RPC_CLIENT); 2457 2458 mir_timer_stop(mir); 2459 } 2460 2461 /* 2462 * client side wrapper for stopping timer with normal idle timeout. 2463 */ 2464 static void 2465 mir_clnt_idle_start(queue_t *wq, mir_t *mir) 2466 { 2467 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 2468 ASSERT((wq->q_flag & QREADR) == 0); 2469 ASSERT(mir->mir_type == RPC_CLIENT); 2470 2471 mir_timer_start(wq, mir, mir->mir_idle_timeout); 2472 } 2473 2474 /* 2475 * client side only. Forces rpcmod to stop sending T_ORDREL_REQs on 2476 * end-points that aren't connected. 2477 */ 2478 static void 2479 mir_clnt_idle_do_stop(queue_t *wq) 2480 { 2481 mir_t *mir = (mir_t *)wq->q_ptr; 2482 2483 RPCLOG(1, "mir_clnt_idle_do_stop: wq 0x%p\n", (void *)wq); 2484 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2485 mutex_enter(&mir->mir_mutex); 2486 mir_clnt_idle_stop(wq, mir); 2487 mutex_exit(&mir->mir_mutex); 2488 } 2489 2490 /* 2491 * Timer handler. It handles idle timeout and memory shortage problem. 2492 */ 2493 static void 2494 mir_timer(void *arg) 2495 { 2496 queue_t *wq = (queue_t *)arg; 2497 mir_t *mir = (mir_t *)wq->q_ptr; 2498 boolean_t notify; 2499 2500 mutex_enter(&mir->mir_mutex); 2501 2502 /* 2503 * mir_timer_call is set only when either mir_timer_[start|stop] 2504 * is progressing. And mir_timer() can only be run while they 2505 * are progressing if the timer is being stopped. So just 2506 * return. 2507 */ 2508 if (mir->mir_timer_call) { 2509 mutex_exit(&mir->mir_mutex); 2510 return; 2511 } 2512 mir->mir_timer_id = 0; 2513 2514 switch (mir->mir_type) { 2515 case RPC_CLIENT: 2516 2517 /* 2518 * For clients, the timer fires at clnt_idle_timeout 2519 * intervals. If the activity marker (mir_clntreq) is 2520 * zero, then the stream has been idle since the last 2521 * timer event and we notify KRPC. If mir_clntreq is 2522 * non-zero, then the stream is active and we just 2523 * restart the timer for another interval. mir_clntreq 2524 * is set to 1 in mir_wput for every request passed 2525 * downstream. 2526 * 2527 * If this was a memory shortage timer reset the idle 2528 * timeout regardless; the mir_clntreq will not be a 2529 * valid indicator. 2530 * 2531 * The timer is initially started in mir_wput during 2532 * RPC_CLIENT ioctl processing. 2533 * 2534 * The timer interval can be changed for individual 2535 * streams with the ND variable "mir_idle_timeout". 2536 */ 2537 if (mir->mir_clntreq > 0 && mir->mir_use_timestamp + 2538 MSEC_TO_TICK(mir->mir_idle_timeout) - lbolt >= 0) { 2539 clock_t tout; 2540 2541 tout = mir->mir_idle_timeout - 2542 TICK_TO_MSEC(lbolt - mir->mir_use_timestamp); 2543 if (tout < 0) 2544 tout = 1000; 2545 #if 0 2546 printf("mir_timer[%d < %d + %d]: reset client timer " 2547 "to %d (ms)\n", TICK_TO_MSEC(lbolt), 2548 TICK_TO_MSEC(mir->mir_use_timestamp), 2549 mir->mir_idle_timeout, tout); 2550 #endif 2551 mir->mir_clntreq = 0; 2552 mir_timer_start(wq, mir, tout); 2553 mutex_exit(&mir->mir_mutex); 2554 return; 2555 } 2556 #if 0 2557 printf("mir_timer[%d]: doing client timeout\n", lbolt / hz); 2558 #endif 2559 /* 2560 * We are disconnecting, but not necessarily 2561 * closing. By not closing, we will fail to 2562 * pick up a possibly changed global timeout value, 2563 * unless we store it now. 2564 */ 2565 mir->mir_idle_timeout = clnt_idle_timeout; 2566 mir_clnt_idle_start(wq, mir); 2567 2568 mutex_exit(&mir->mir_mutex); 2569 /* 2570 * We pass T_ORDREL_REQ as an integer value 2571 * to KRPC as the indication that the stream 2572 * is idle. This is not a T_ORDREL_REQ message, 2573 * it is just a convenient value since we call 2574 * the same KRPC routine for T_ORDREL_INDs and 2575 * T_DISCON_INDs. 2576 */ 2577 clnt_dispatch_notifyall(wq, T_ORDREL_REQ, 0); 2578 return; 2579 2580 case RPC_SERVER: 2581 2582 /* 2583 * For servers, the timer is only running when the stream 2584 * is really idle or memory is short. The timer is started 2585 * by mir_wput when mir_type is set to RPC_SERVER and 2586 * by mir_svc_idle_start whenever the stream goes idle 2587 * (mir_ref_cnt == 0). The timer is cancelled in 2588 * mir_rput whenever a new inbound request is passed to KRPC 2589 * and the stream was previously idle. 2590 * 2591 * The timer interval can be changed for individual 2592 * streams with the ND variable "mir_idle_timeout". 2593 * 2594 * If the stream is not idle do nothing. 2595 */ 2596 if (!MIR_SVC_QUIESCED(mir)) { 2597 mutex_exit(&mir->mir_mutex); 2598 return; 2599 } 2600 2601 notify = !mir->mir_inrservice; 2602 mutex_exit(&mir->mir_mutex); 2603 2604 /* 2605 * If there is no packet queued up in read queue, the stream 2606 * is really idle so notify nfsd to close it. 2607 */ 2608 if (notify) { 2609 RPCLOG(16, "mir_timer: telling stream head listener " 2610 "to close stream (0x%p)\n", (void *) RD(wq)); 2611 (void) mir_svc_policy_notify(RD(wq), 1); 2612 } 2613 return; 2614 default: 2615 RPCLOG(1, "mir_timer: unexpected mir_type %d\n", 2616 mir->mir_type); 2617 mutex_exit(&mir->mir_mutex); 2618 return; 2619 } 2620 } 2621 2622 /* 2623 * Called by the RPC package to send either a call or a return, or a 2624 * transport connection request. Adds the record marking header. 2625 */ 2626 static void 2627 mir_wput(queue_t *q, mblk_t *mp) 2628 { 2629 uint_t frag_header; 2630 mir_t *mir = (mir_t *)q->q_ptr; 2631 uchar_t *rptr = mp->b_rptr; 2632 2633 if (!mir) { 2634 freemsg(mp); 2635 return; 2636 } 2637 2638 if (mp->b_datap->db_type != M_DATA) { 2639 mir_wput_other(q, mp); 2640 return; 2641 } 2642 2643 if (mir->mir_ordrel_pending == 1) { 2644 freemsg(mp); 2645 RPCLOG(16, "mir_wput wq 0x%p: got data after T_ORDREL_REQ\n", 2646 (void *)q); 2647 return; 2648 } 2649 2650 frag_header = (uint_t)DLEN(mp); 2651 frag_header |= MIR_LASTFRAG; 2652 2653 /* Stick in the 4 byte record marking header. */ 2654 if ((rptr - mp->b_datap->db_base) < sizeof (uint32_t) || 2655 !IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) { 2656 /* 2657 * Since we know that M_DATA messages are created exclusively 2658 * by KRPC, we expect that KRPC will leave room for our header 2659 * and 4 byte align which is normal for XDR. 2660 * If KRPC (or someone else) does not cooperate, then we 2661 * just throw away the message. 2662 */ 2663 RPCLOG(1, "mir_wput: KRPC did not leave space for record " 2664 "fragment header (%d bytes left)\n", 2665 (int)(rptr - mp->b_datap->db_base)); 2666 freemsg(mp); 2667 return; 2668 } 2669 rptr -= sizeof (uint32_t); 2670 *(uint32_t *)rptr = htonl(frag_header); 2671 mp->b_rptr = rptr; 2672 2673 mutex_enter(&mir->mir_mutex); 2674 if (mir->mir_type == RPC_CLIENT) { 2675 /* 2676 * For the client, set mir_clntreq to indicate that the 2677 * connection is active. 2678 */ 2679 mir->mir_clntreq = 1; 2680 mir->mir_use_timestamp = lbolt; 2681 } 2682 2683 /* 2684 * If we haven't already queued some data and the downstream module 2685 * can accept more data, send it on, otherwise we queue the message 2686 * and take other actions depending on mir_type. 2687 */ 2688 if (!mir->mir_inwservice && MIR_WCANPUTNEXT(mir, q)) { 2689 mutex_exit(&mir->mir_mutex); 2690 2691 /* 2692 * Now we pass the RPC message downstream. 2693 */ 2694 putnext(q, mp); 2695 return; 2696 } 2697 2698 switch (mir->mir_type) { 2699 case RPC_CLIENT: 2700 /* 2701 * Check for a previous duplicate request on the 2702 * queue. If there is one, then we throw away 2703 * the current message and let the previous one 2704 * go through. If we can't find a duplicate, then 2705 * send this one. This tap dance is an effort 2706 * to reduce traffic and processing requirements 2707 * under load conditions. 2708 */ 2709 if (mir_clnt_dup_request(q, mp)) { 2710 mutex_exit(&mir->mir_mutex); 2711 freemsg(mp); 2712 return; 2713 } 2714 break; 2715 case RPC_SERVER: 2716 /* 2717 * Set mir_hold_inbound so that new inbound RPC 2718 * messages will be held until the client catches 2719 * up on the earlier replies. This flag is cleared 2720 * in mir_wsrv after flow control is relieved; 2721 * the read-side queue is also enabled at that time. 2722 */ 2723 mir->mir_hold_inbound = 1; 2724 break; 2725 default: 2726 RPCLOG(1, "mir_wput: unexpected mir_type %d\n", mir->mir_type); 2727 break; 2728 } 2729 mir->mir_inwservice = 1; 2730 (void) putq(q, mp); 2731 mutex_exit(&mir->mir_mutex); 2732 } 2733 2734 static void 2735 mir_wput_other(queue_t *q, mblk_t *mp) 2736 { 2737 mir_t *mir = (mir_t *)q->q_ptr; 2738 struct iocblk *iocp; 2739 uchar_t *rptr = mp->b_rptr; 2740 bool_t flush_in_svc = FALSE; 2741 2742 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex)); 2743 switch (mp->b_datap->db_type) { 2744 case M_IOCTL: 2745 iocp = (struct iocblk *)rptr; 2746 switch (iocp->ioc_cmd) { 2747 case RPC_CLIENT: 2748 mutex_enter(&mir->mir_mutex); 2749 if (mir->mir_type != 0 && 2750 mir->mir_type != iocp->ioc_cmd) { 2751 ioc_eperm: 2752 mutex_exit(&mir->mir_mutex); 2753 iocp->ioc_error = EPERM; 2754 iocp->ioc_count = 0; 2755 mp->b_datap->db_type = M_IOCACK; 2756 qreply(q, mp); 2757 return; 2758 } 2759 2760 mir->mir_type = iocp->ioc_cmd; 2761 2762 /* 2763 * Clear mir_hold_inbound which was set to 1 by 2764 * mir_open. This flag is not used on client 2765 * streams. 2766 */ 2767 mir->mir_hold_inbound = 0; 2768 mir->mir_max_msg_sizep = &clnt_max_msg_size; 2769 2770 /* 2771 * Start the idle timer. See mir_timer() for more 2772 * information on how client timers work. 2773 */ 2774 mir->mir_idle_timeout = clnt_idle_timeout; 2775 mir_clnt_idle_start(q, mir); 2776 mutex_exit(&mir->mir_mutex); 2777 2778 mp->b_datap->db_type = M_IOCACK; 2779 qreply(q, mp); 2780 return; 2781 case RPC_SERVER: 2782 mutex_enter(&mir->mir_mutex); 2783 if (mir->mir_type != 0 && 2784 mir->mir_type != iocp->ioc_cmd) 2785 goto ioc_eperm; 2786 2787 /* 2788 * We don't clear mir_hold_inbound here because 2789 * mir_hold_inbound is used in the flow control 2790 * model. If we cleared it here, then we'd commit 2791 * a small violation to the model where the transport 2792 * might immediately block downstream flow. 2793 */ 2794 2795 mir->mir_type = iocp->ioc_cmd; 2796 mir->mir_max_msg_sizep = &svc_max_msg_size; 2797 2798 /* 2799 * Start the idle timer. See mir_timer() for more 2800 * information on how server timers work. 2801 * 2802 * Note that it is important to start the idle timer 2803 * here so that connections time out even if we 2804 * never receive any data on them. 2805 */ 2806 mir->mir_idle_timeout = svc_idle_timeout; 2807 RPCLOG(16, "mir_wput_other starting idle timer on 0x%p " 2808 "because we got RPC_SERVER ioctl\n", (void *)q); 2809 mir_svc_idle_start(q, mir); 2810 mutex_exit(&mir->mir_mutex); 2811 2812 mp->b_datap->db_type = M_IOCACK; 2813 qreply(q, mp); 2814 return; 2815 default: 2816 break; 2817 } 2818 break; 2819 2820 case M_PROTO: 2821 if (mir->mir_type == RPC_CLIENT) { 2822 /* 2823 * We are likely being called from the context of a 2824 * service procedure. So we need to enqueue. However 2825 * enqueing may put our message behind data messages. 2826 * So flush the data first. 2827 */ 2828 flush_in_svc = TRUE; 2829 } 2830 if ((mp->b_wptr - rptr) < sizeof (uint32_t) || 2831 !IS_P2ALIGNED(rptr, sizeof (uint32_t))) 2832 break; 2833 2834 switch (((union T_primitives *)rptr)->type) { 2835 case T_DATA_REQ: 2836 /* Don't pass T_DATA_REQ messages downstream. */ 2837 freemsg(mp); 2838 return; 2839 case T_ORDREL_REQ: 2840 RPCLOG(8, "mir_wput_other wq 0x%p: got T_ORDREL_REQ\n", 2841 (void *)q); 2842 mutex_enter(&mir->mir_mutex); 2843 if (mir->mir_type != RPC_SERVER) { 2844 /* 2845 * We are likely being called from 2846 * clnt_dispatch_notifyall(). Sending 2847 * a T_ORDREL_REQ will result in 2848 * a some kind of _IND message being sent, 2849 * will be another call to 2850 * clnt_dispatch_notifyall(). To keep the stack 2851 * lean, queue this message. 2852 */ 2853 mir->mir_inwservice = 1; 2854 (void) putq(q, mp); 2855 mutex_exit(&mir->mir_mutex); 2856 return; 2857 } 2858 2859 /* 2860 * Mark the structure such that we don't accept any 2861 * more requests from client. We could defer this 2862 * until we actually send the orderly release 2863 * request downstream, but all that does is delay 2864 * the closing of this stream. 2865 */ 2866 RPCLOG(16, "mir_wput_other wq 0x%p: got T_ORDREL_REQ " 2867 " so calling mir_svc_start_close\n", (void *)q); 2868 2869 mir_svc_start_close(q, mir); 2870 2871 /* 2872 * If we have sent down a T_ORDREL_REQ, don't send 2873 * any more. 2874 */ 2875 if (mir->mir_ordrel_pending) { 2876 freemsg(mp); 2877 mutex_exit(&mir->mir_mutex); 2878 return; 2879 } 2880 2881 /* 2882 * If the stream is not idle, then we hold the 2883 * orderly release until it becomes idle. This 2884 * ensures that KRPC will be able to reply to 2885 * all requests that we have passed to it. 2886 * 2887 * We also queue the request if there is data already 2888 * queued, because we cannot allow the T_ORDREL_REQ 2889 * to go before data. When we had a separate reply 2890 * count, this was not a problem, because the 2891 * reply count was reconciled when mir_wsrv() 2892 * completed. 2893 */ 2894 if (!MIR_SVC_QUIESCED(mir) || 2895 mir->mir_inwservice == 1) { 2896 mir->mir_inwservice = 1; 2897 (void) putq(q, mp); 2898 2899 RPCLOG(16, "mir_wput_other: queuing " 2900 "T_ORDREL_REQ on 0x%p\n", (void *)q); 2901 2902 mutex_exit(&mir->mir_mutex); 2903 return; 2904 } 2905 2906 /* 2907 * Mark the structure so that we know we sent 2908 * an orderly release request, and reset the idle timer. 2909 */ 2910 mir->mir_ordrel_pending = 1; 2911 2912 RPCLOG(16, "mir_wput_other: calling mir_svc_idle_start" 2913 " on 0x%p because we got T_ORDREL_REQ\n", 2914 (void *)q); 2915 2916 mir_svc_idle_start(q, mir); 2917 mutex_exit(&mir->mir_mutex); 2918 2919 /* 2920 * When we break, we will putnext the T_ORDREL_REQ. 2921 */ 2922 break; 2923 2924 case T_CONN_REQ: 2925 mutex_enter(&mir->mir_mutex); 2926 if (mir->mir_head_mp != NULL) { 2927 freemsg(mir->mir_head_mp); 2928 mir->mir_head_mp = NULL; 2929 mir->mir_tail_mp = NULL; 2930 } 2931 mir->mir_frag_len = -(int32_t)sizeof (uint32_t); 2932 /* 2933 * Restart timer in case mir_clnt_idle_do_stop() was 2934 * called. 2935 */ 2936 mir->mir_idle_timeout = clnt_idle_timeout; 2937 mir_clnt_idle_stop(q, mir); 2938 mir_clnt_idle_start(q, mir); 2939 mutex_exit(&mir->mir_mutex); 2940 break; 2941 2942 default: 2943 /* 2944 * T_DISCON_REQ is one of the interesting default 2945 * cases here. Ideally, an M_FLUSH is done before 2946 * T_DISCON_REQ is done. However, that is somewhat 2947 * cumbersome for clnt_cots.c to do. So we queue 2948 * T_DISCON_REQ, and let the service procedure 2949 * flush all M_DATA. 2950 */ 2951 break; 2952 } 2953 /* fallthru */; 2954 default: 2955 if (mp->b_datap->db_type >= QPCTL) { 2956 if (mp->b_datap->db_type == M_FLUSH) { 2957 if (mir->mir_type == RPC_CLIENT && 2958 *mp->b_rptr & FLUSHW) { 2959 RPCLOG(32, "mir_wput_other: flushing " 2960 "wq 0x%p\n", (void *)q); 2961 if (*mp->b_rptr & FLUSHBAND) { 2962 flushband(q, *(mp->b_rptr + 1), 2963 FLUSHDATA); 2964 } else { 2965 flushq(q, FLUSHDATA); 2966 } 2967 } else { 2968 RPCLOG(32, "mir_wput_other: ignoring " 2969 "M_FLUSH on wq 0x%p\n", (void *)q); 2970 } 2971 } 2972 break; 2973 } 2974 2975 mutex_enter(&mir->mir_mutex); 2976 if (mir->mir_inwservice == 0 && MIR_WCANPUTNEXT(mir, q)) { 2977 mutex_exit(&mir->mir_mutex); 2978 break; 2979 } 2980 mir->mir_inwservice = 1; 2981 mir->mir_inwflushdata = flush_in_svc; 2982 (void) putq(q, mp); 2983 mutex_exit(&mir->mir_mutex); 2984 qenable(q); 2985 2986 return; 2987 } 2988 putnext(q, mp); 2989 } 2990 2991 static void 2992 mir_wsrv(queue_t *q) 2993 { 2994 mblk_t *mp; 2995 mir_t *mir; 2996 bool_t flushdata; 2997 2998 mir = (mir_t *)q->q_ptr; 2999 mutex_enter(&mir->mir_mutex); 3000 3001 flushdata = mir->mir_inwflushdata; 3002 mir->mir_inwflushdata = 0; 3003 3004 while (mp = getq(q)) { 3005 if (mp->b_datap->db_type == M_DATA) { 3006 /* 3007 * Do not send any more data if we have sent 3008 * a T_ORDREL_REQ. 3009 */ 3010 if (flushdata || mir->mir_ordrel_pending == 1) { 3011 freemsg(mp); 3012 continue; 3013 } 3014 3015 /* 3016 * Make sure that the stream can really handle more 3017 * data. 3018 */ 3019 if (!MIR_WCANPUTNEXT(mir, q)) { 3020 (void) putbq(q, mp); 3021 mutex_exit(&mir->mir_mutex); 3022 return; 3023 } 3024 3025 /* 3026 * Now we pass the RPC message downstream. 3027 */ 3028 mutex_exit(&mir->mir_mutex); 3029 putnext(q, mp); 3030 mutex_enter(&mir->mir_mutex); 3031 continue; 3032 } 3033 3034 /* 3035 * This is not an RPC message, pass it downstream 3036 * (ignoring flow control) if the server side is not sending a 3037 * T_ORDREL_REQ downstream. 3038 */ 3039 if (mir->mir_type != RPC_SERVER || 3040 ((union T_primitives *)mp->b_rptr)->type != 3041 T_ORDREL_REQ) { 3042 mutex_exit(&mir->mir_mutex); 3043 putnext(q, mp); 3044 mutex_enter(&mir->mir_mutex); 3045 continue; 3046 } 3047 3048 if (mir->mir_ordrel_pending == 1) { 3049 /* 3050 * Don't send two T_ORDRELs 3051 */ 3052 freemsg(mp); 3053 continue; 3054 } 3055 3056 /* 3057 * Mark the structure so that we know we sent an orderly 3058 * release request. We will check to see slot is idle at the 3059 * end of this routine, and if so, reset the idle timer to 3060 * handle orderly release timeouts. 3061 */ 3062 mir->mir_ordrel_pending = 1; 3063 RPCLOG(16, "mir_wsrv: sending ordrel req on q 0x%p\n", 3064 (void *)q); 3065 /* 3066 * Send the orderly release downstream. If there are other 3067 * pending replies we won't be able to send them. However, 3068 * the only reason we should send the orderly release is if 3069 * we were idle, or if an unusual event occurred. 3070 */ 3071 mutex_exit(&mir->mir_mutex); 3072 putnext(q, mp); 3073 mutex_enter(&mir->mir_mutex); 3074 } 3075 3076 if (q->q_first == NULL) 3077 /* 3078 * If we call mir_svc_idle_start() below, then 3079 * clearing mir_inwservice here will also result in 3080 * any thread waiting in mir_close() to be signaled. 3081 */ 3082 mir->mir_inwservice = 0; 3083 3084 if (mir->mir_type != RPC_SERVER) { 3085 mutex_exit(&mir->mir_mutex); 3086 return; 3087 } 3088 3089 /* 3090 * If idle we call mir_svc_idle_start to start the timer (or wakeup 3091 * a close). Also make sure not to start the idle timer on the 3092 * listener stream. This can cause nfsd to send an orderly release 3093 * command on the listener stream. 3094 */ 3095 if (MIR_SVC_QUIESCED(mir) && !(mir->mir_listen_stream)) { 3096 RPCLOG(16, "mir_wsrv: calling mir_svc_idle_start on 0x%p " 3097 "because mir slot is idle\n", (void *)q); 3098 mir_svc_idle_start(q, mir); 3099 } 3100 3101 /* 3102 * If outbound flow control has been relieved, then allow new 3103 * inbound requests to be processed. 3104 */ 3105 if (mir->mir_hold_inbound) { 3106 mir->mir_hold_inbound = 0; 3107 qenable(RD(q)); 3108 } 3109 mutex_exit(&mir->mir_mutex); 3110 } 3111 3112 static void 3113 mir_disconnect(queue_t *q, mir_t *mir) 3114 { 3115 ASSERT(MUTEX_HELD(&mir->mir_mutex)); 3116 3117 switch (mir->mir_type) { 3118 case RPC_CLIENT: 3119 /* 3120 * We are disconnecting, but not necessarily 3121 * closing. By not closing, we will fail to 3122 * pick up a possibly changed global timeout value, 3123 * unless we store it now. 3124 */ 3125 mir->mir_idle_timeout = clnt_idle_timeout; 3126 mir_clnt_idle_start(WR(q), mir); 3127 mutex_exit(&mir->mir_mutex); 3128 3129 /* 3130 * T_DISCON_REQ is passed to KRPC as an integer value 3131 * (this is not a TPI message). It is used as a 3132 * convenient value to indicate a sanity check 3133 * failure -- the same KRPC routine is also called 3134 * for T_DISCON_INDs and T_ORDREL_INDs. 3135 */ 3136 clnt_dispatch_notifyall(WR(q), T_DISCON_REQ, 0); 3137 break; 3138 3139 case RPC_SERVER: 3140 mir->mir_svc_no_more_msgs = 1; 3141 mir_svc_idle_stop(WR(q), mir); 3142 mutex_exit(&mir->mir_mutex); 3143 RPCLOG(16, "mir_disconnect: telling " 3144 "stream head listener to disconnect stream " 3145 "(0x%p)\n", (void *) q); 3146 (void) mir_svc_policy_notify(q, 2); 3147 break; 3148 3149 default: 3150 mutex_exit(&mir->mir_mutex); 3151 break; 3152 } 3153 } 3154 3155 /* 3156 * do a sanity check on the length of the fragment. 3157 * returns 1 if bad else 0. 3158 */ 3159 static int 3160 mir_check_len(queue_t *q, int32_t frag_len, 3161 mblk_t *head_mp) 3162 { 3163 mir_t *mir; 3164 3165 mir = (mir_t *)q->q_ptr; 3166 3167 /* 3168 * Do a sanity check on the message length. If this message is 3169 * getting excessively large, shut down the connection. 3170 */ 3171 3172 if ((frag_len <= 0) || (mir->mir_max_msg_sizep == NULL) || 3173 (frag_len <= *mir->mir_max_msg_sizep)) { 3174 return (0); 3175 } 3176 3177 freemsg(head_mp); 3178 mir->mir_head_mp = (mblk_t *)0; 3179 mir->mir_frag_len = -(int)sizeof (uint32_t); 3180 if (mir->mir_type != RPC_SERVER || mir->mir_setup_complete) { 3181 cmn_err(CE_NOTE, 3182 "KRPC: record fragment from %s of size(%d) exceeds " 3183 "maximum (%u). Disconnecting", 3184 (mir->mir_type == RPC_CLIENT) ? "server" : 3185 (mir->mir_type == RPC_SERVER) ? "client" : 3186 "test tool", 3187 frag_len, *mir->mir_max_msg_sizep); 3188 } 3189 3190 mir_disconnect(q, mir); 3191 return (1); 3192 } 3193