1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 26 /* All Rights Reserved */ 27 28 /* 29 * Portions of this source code were derived from Berkeley 4.3 BSD 30 * under license from the Regents of the University of California. 31 */ 32 33 /* 34 * svc_cots.c 35 * Server side for connection-oriented RPC in the kernel. 36 * 37 */ 38 39 #include <sys/param.h> 40 #include <sys/types.h> 41 #include <sys/sysmacros.h> 42 #include <sys/file.h> 43 #include <sys/stream.h> 44 #include <sys/strsubr.h> 45 #include <sys/strsun.h> 46 #include <sys/stropts.h> 47 #include <sys/tiuser.h> 48 #include <sys/timod.h> 49 #include <sys/tihdr.h> 50 #include <sys/fcntl.h> 51 #include <sys/errno.h> 52 #include <sys/kmem.h> 53 #include <sys/systm.h> 54 #include <sys/debug.h> 55 #include <sys/cmn_err.h> 56 #include <sys/kstat.h> 57 #include <sys/vtrace.h> 58 59 #include <rpc/types.h> 60 #include <rpc/xdr.h> 61 #include <rpc/auth.h> 62 #include <rpc/rpc_msg.h> 63 #include <rpc/svc.h> 64 #include <inet/ip.h> 65 66 #define COTS_MAX_ALLOCSIZE 2048 67 #define MSG_OFFSET 128 /* offset of call into the mblk */ 68 #define RM_HDR_SIZE 4 /* record mark header size */ 69 70 /* 71 * Routines exported through ops vector. 72 */ 73 static bool_t svc_cots_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 74 static bool_t svc_cots_ksend(SVCXPRT *, struct rpc_msg *); 75 static bool_t svc_cots_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 76 static bool_t svc_cots_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 77 static void svc_cots_kdestroy(SVCMASTERXPRT *); 78 static int svc_cots_kdup(struct svc_req *, caddr_t, int, 79 struct dupreq **, bool_t *); 80 static void svc_cots_kdupdone(struct dupreq *, caddr_t, 81 void (*)(), int, int); 82 static int32_t *svc_cots_kgetres(SVCXPRT *, int); 83 static void svc_cots_kfreeres(SVCXPRT *); 84 static void svc_cots_kclone_destroy(SVCXPRT *); 85 static void svc_cots_kstart(SVCMASTERXPRT *); 86 static void svc_cots_ktattrs(SVCXPRT *, int, void **); 87 88 /* 89 * Server transport operations vector. 90 */ 91 struct svc_ops svc_cots_op = { 92 svc_cots_krecv, /* Get requests */ 93 svc_cots_kgetargs, /* Deserialize arguments */ 94 svc_cots_ksend, /* Send reply */ 95 svc_cots_kfreeargs, /* Free argument data space */ 96 svc_cots_kdestroy, /* Destroy transport handle */ 97 svc_cots_kdup, /* Check entry in dup req cache */ 98 svc_cots_kdupdone, /* Mark entry in dup req cache as done */ 99 svc_cots_kgetres, /* Get pointer to response buffer */ 100 svc_cots_kfreeres, /* Destroy pre-serialized response header */ 101 svc_cots_kclone_destroy, /* Destroy a clone xprt */ 102 svc_cots_kstart, /* Tell `ready-to-receive' to rpcmod */ 103 NULL, /* Transport specific clone xprt */ 104 svc_cots_ktattrs /* Transport Attributes */ 105 }; 106 107 /* 108 * Master transport private data. 109 * Kept in xprt->xp_p2. 110 */ 111 struct cots_master_data { 112 char *cmd_src_addr; /* client's address */ 113 int cmd_xprt_started; /* flag for clone routine to call */ 114 /* rpcmod's start routine. */ 115 struct rpc_cots_server *cmd_stats; /* stats for zone */ 116 }; 117 118 /* 119 * Transport private data. 120 * Kept in clone_xprt->xp_p2buf. 121 */ 122 typedef struct cots_data { 123 mblk_t *cd_mp; /* pre-allocated reply message */ 124 mblk_t *cd_req_mp; /* request message */ 125 } cots_data_t; 126 127 /* 128 * Server statistics 129 * NOTE: This structure type is duplicated in the NFS fast path. 130 */ 131 static const struct rpc_cots_server { 132 kstat_named_t rscalls; 133 kstat_named_t rsbadcalls; 134 kstat_named_t rsnullrecv; 135 kstat_named_t rsbadlen; 136 kstat_named_t rsxdrcall; 137 kstat_named_t rsdupchecks; 138 kstat_named_t rsdupreqs; 139 } cots_rsstat_tmpl = { 140 { "calls", KSTAT_DATA_UINT64 }, 141 { "badcalls", KSTAT_DATA_UINT64 }, 142 { "nullrecv", KSTAT_DATA_UINT64 }, 143 { "badlen", KSTAT_DATA_UINT64 }, 144 { "xdrcall", KSTAT_DATA_UINT64 }, 145 { "dupchecks", KSTAT_DATA_UINT64 }, 146 { "dupreqs", KSTAT_DATA_UINT64 } 147 }; 148 149 #define CLONE2STATS(clone_xprt) \ 150 ((struct cots_master_data *)(clone_xprt)->xp_master->xp_p2)->cmd_stats 151 #define RSSTAT_INCR(s, x) \ 152 atomic_add_64(&(s)->x.value.ui64, 1) 153 154 /* 155 * Pointer to a transport specific `ready to receive' function in rpcmod 156 * (set from rpcmod). 157 */ 158 void (*mir_start)(queue_t *); 159 uint_t *svc_max_msg_sizep; 160 161 /* 162 * the address size of the underlying transport can sometimes be 163 * unknown (tinfo->ADDR_size == -1). For this case, it is 164 * necessary to figure out what the size is so the correct amount 165 * of data is allocated. This is an itterative process: 166 * 1. take a good guess (use T_MINADDRSIZE) 167 * 2. try it. 168 * 3. if it works then everything is ok 169 * 4. if the error is ENAMETOLONG, double the guess 170 * 5. go back to step 2. 171 */ 172 #define T_UNKNOWNADDRSIZE (-1) 173 #define T_MINADDRSIZE 32 174 175 /* 176 * Create a transport record. 177 * The transport record, output buffer, and private data structure 178 * are allocated. The output buffer is serialized into using xdrmem. 179 * There is one transport record per user process which implements a 180 * set of services. 181 */ 182 static kmutex_t cots_kcreate_lock; 183 184 int 185 svc_cots_kcreate(file_t *fp, uint_t max_msgsize, struct T_info_ack *tinfo, 186 SVCMASTERXPRT **nxprt) 187 { 188 struct cots_master_data *cmd; 189 int err, retval; 190 SVCMASTERXPRT *xprt; 191 struct rpcstat *rpcstat; 192 struct T_addr_ack *ack_p; 193 struct strioctl getaddr; 194 195 if (nxprt == NULL) 196 return (EINVAL); 197 198 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 199 ASSERT(rpcstat != NULL); 200 201 xprt = kmem_zalloc(sizeof (SVCMASTERXPRT), KM_SLEEP); 202 203 cmd = kmem_zalloc(sizeof (*cmd) + sizeof (*ack_p) 204 + (2 * sizeof (sin6_t)), KM_SLEEP); 205 206 ack_p = (struct T_addr_ack *)&cmd[1]; 207 208 if ((tinfo->TIDU_size > COTS_MAX_ALLOCSIZE) || 209 (tinfo->TIDU_size <= 0)) 210 xprt->xp_msg_size = COTS_MAX_ALLOCSIZE; 211 else { 212 xprt->xp_msg_size = tinfo->TIDU_size - 213 (tinfo->TIDU_size % BYTES_PER_XDR_UNIT); 214 } 215 216 xprt->xp_ops = &svc_cots_op; 217 xprt->xp_p2 = (caddr_t)cmd; 218 cmd->cmd_xprt_started = 0; 219 cmd->cmd_stats = rpcstat->rpc_cots_server; 220 221 getaddr.ic_cmd = TI_GETINFO; 222 getaddr.ic_timout = -1; 223 getaddr.ic_len = sizeof (*ack_p) + (2 * sizeof (sin6_t)); 224 getaddr.ic_dp = (char *)ack_p; 225 ack_p->PRIM_type = T_ADDR_REQ; 226 227 err = strioctl(fp->f_vnode, I_STR, (intptr_t)&getaddr, 228 0, K_TO_K, CRED(), &retval); 229 if (err) { 230 kmem_free(cmd, sizeof (*cmd) + sizeof (*ack_p) + 231 (2 * sizeof (sin6_t))); 232 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 233 return (err); 234 } 235 236 xprt->xp_rtaddr.maxlen = ack_p->REMADDR_length; 237 xprt->xp_rtaddr.len = ack_p->REMADDR_length; 238 cmd->cmd_src_addr = xprt->xp_rtaddr.buf = 239 (char *)ack_p + ack_p->REMADDR_offset; 240 241 xprt->xp_lcladdr.maxlen = ack_p->LOCADDR_length; 242 xprt->xp_lcladdr.len = ack_p->LOCADDR_length; 243 xprt->xp_lcladdr.buf = (char *)ack_p + ack_p->LOCADDR_offset; 244 245 /* 246 * If the current sanity check size in rpcmod is smaller 247 * than the size needed for this xprt, then increase 248 * the sanity check. 249 */ 250 if (max_msgsize != 0 && svc_max_msg_sizep && 251 max_msgsize > *svc_max_msg_sizep) { 252 253 /* This check needs a lock */ 254 mutex_enter(&cots_kcreate_lock); 255 if (svc_max_msg_sizep && max_msgsize > *svc_max_msg_sizep) 256 *svc_max_msg_sizep = max_msgsize; 257 mutex_exit(&cots_kcreate_lock); 258 } 259 260 *nxprt = xprt; 261 262 return (0); 263 } 264 265 /* 266 * Destroy a master transport record. 267 * Frees the space allocated for a transport record. 268 */ 269 static void 270 svc_cots_kdestroy(SVCMASTERXPRT *xprt) 271 { 272 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 273 274 ASSERT(cmd); 275 276 if (xprt->xp_netid) 277 kmem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); 278 if (xprt->xp_addrmask.maxlen) 279 kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen); 280 281 mutex_destroy(&xprt->xp_req_lock); 282 mutex_destroy(&xprt->xp_thread_lock); 283 284 kmem_free(cmd, sizeof (*cmd) + sizeof (struct T_addr_ack) + 285 (2 * sizeof (sin6_t))); 286 287 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 288 } 289 290 /* 291 * svc_tli_kcreate() calls this function at the end to tell 292 * rpcmod that the transport is ready to receive requests. 293 */ 294 static void 295 svc_cots_kstart(SVCMASTERXPRT *xprt) 296 { 297 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 298 299 if (cmd->cmd_xprt_started == 0) { 300 /* 301 * Acquire the xp_req_lock in order to use xp_wq 302 * safely (we don't want to qenable a queue that has 303 * already been closed). 304 */ 305 mutex_enter(&xprt->xp_req_lock); 306 if (cmd->cmd_xprt_started == 0 && 307 xprt->xp_wq != NULL) { 308 (*mir_start)(xprt->xp_wq); 309 cmd->cmd_xprt_started = 1; 310 } 311 mutex_exit(&xprt->xp_req_lock); 312 } 313 } 314 315 /* 316 * Transport-type specific part of svc_xprt_cleanup(). 317 */ 318 static void 319 svc_cots_kclone_destroy(SVCXPRT *clone_xprt) 320 { 321 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 322 323 if (cd->cd_req_mp) { 324 freemsg(cd->cd_req_mp); 325 cd->cd_req_mp = (mblk_t *)0; 326 } 327 ASSERT(cd->cd_mp == NULL); 328 } 329 330 /* 331 * Transport Attributes. 332 */ 333 static void 334 svc_cots_ktattrs(SVCXPRT *clone_xprt, int attrflag, void **tattr) 335 { 336 *tattr = NULL; 337 338 switch (attrflag) { 339 case SVC_TATTR_ADDRMASK: 340 *tattr = (void *)&clone_xprt->xp_master->xp_addrmask; 341 } 342 } 343 344 /* 345 * Receive rpc requests. 346 * Checks if the message is intact, and deserializes the call packet. 347 */ 348 static bool_t 349 svc_cots_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 350 { 351 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 352 XDR *xdrs = &clone_xprt->xp_xdrin; 353 struct rpc_cots_server *stats = CLONE2STATS(clone_xprt); 354 355 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KRECV_START, 356 "svc_cots_krecv_start:"); 357 RPCLOG(4, "svc_cots_krecv_start clone_xprt = %p:\n", 358 (void *)clone_xprt); 359 360 RSSTAT_INCR(stats, rscalls); 361 362 if (mp->b_datap->db_type != M_DATA) { 363 RPCLOG(16, "svc_cots_krecv bad db_type %d\n", 364 mp->b_datap->db_type); 365 goto bad; 366 } 367 368 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 369 370 TRACE_0(TR_FAC_KRPC, TR_XDR_CALLMSG_START, 371 "xdr_callmsg_start:"); 372 RPCLOG0(4, "xdr_callmsg_start:\n"); 373 if (!xdr_callmsg(xdrs, msg)) { 374 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 375 "xdr_callmsg_end:(%S)", "bad"); 376 RPCLOG0(1, "svc_cots_krecv xdr_callmsg failure\n"); 377 RSSTAT_INCR(stats, rsxdrcall); 378 goto bad; 379 } 380 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 381 "xdr_callmsg_end:(%S)", "good"); 382 383 clone_xprt->xp_xid = msg->rm_xid; 384 cd->cd_req_mp = mp; 385 386 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 387 "svc_cots_krecv_end:(%S)", "good"); 388 RPCLOG0(4, "svc_cots_krecv_end:good\n"); 389 return (TRUE); 390 391 bad: 392 if (mp) 393 freemsg(mp); 394 395 RSSTAT_INCR(stats, rsbadcalls); 396 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 397 "svc_cots_krecv_end:(%S)", "bad"); 398 return (FALSE); 399 } 400 401 /* 402 * Send rpc reply. 403 */ 404 static bool_t 405 svc_cots_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg) 406 { 407 /* LINTED pointer alignment */ 408 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 409 XDR *xdrs = &(clone_xprt->xp_xdrout); 410 int retval = FALSE; 411 mblk_t *mp; 412 xdrproc_t xdr_results; 413 caddr_t xdr_location; 414 bool_t has_args; 415 416 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KSEND_START, 417 "svc_cots_ksend_start:"); 418 419 /* 420 * If there is a result procedure specified in the reply message, 421 * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 422 * We need to make sure it won't be processed twice, so we null 423 * it for xdr_replymsg here. 424 */ 425 has_args = FALSE; 426 if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 427 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 428 if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 429 has_args = TRUE; 430 xdr_location = msg->acpted_rply.ar_results.where; 431 msg->acpted_rply.ar_results.proc = xdr_void; 432 msg->acpted_rply.ar_results.where = NULL; 433 } 434 } 435 436 mp = cd->cd_mp; 437 if (mp) { 438 /* 439 * The program above pre-allocated an mblk and put 440 * the data in place. 441 */ 442 cd->cd_mp = (mblk_t *)NULL; 443 if (!(xdr_replymsg_body(xdrs, msg) && 444 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 445 xdr_results, xdr_location)))) { 446 RPCLOG0(1, "svc_cots_ksend: " 447 "xdr_replymsg_body/SVCAUTH_WRAP failed\n"); 448 freemsg(mp); 449 goto out; 450 } 451 } else { 452 int len; 453 int mpsize; 454 455 /* 456 * Leave space for protocol headers. 457 */ 458 len = MSG_OFFSET + clone_xprt->xp_msg_size; 459 460 /* 461 * Allocate an initial mblk for the response data. 462 */ 463 while (!(mp = allocb(len, BPRI_LO))) { 464 RPCLOG0(16, "svc_cots_ksend: allocb failed failed\n"); 465 if (strwaitbuf(len, BPRI_LO)) { 466 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 467 "svc_cots_ksend_end:(%S)", "strwaitbuf"); 468 RPCLOG0(1, 469 "svc_cots_ksend: strwaitbuf failed\n"); 470 goto out; 471 } 472 } 473 474 /* 475 * Initialize the XDR decode stream. Additional mblks 476 * will be allocated if necessary. They will be TIDU 477 * sized. 478 */ 479 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 480 mpsize = MBLKSIZE(mp); 481 ASSERT(mpsize >= len); 482 ASSERT(mp->b_rptr == mp->b_datap->db_base); 483 484 /* 485 * If the size of mblk is not appreciably larger than what we 486 * asked, then resize the mblk to exactly len bytes. Reason for 487 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 488 * (from TCP over ethernet), and the arguments to RPC require 489 * 2800 bytes. Ideally we want the protocol to render two 490 * ~1400 byte segments over the wire. If allocb() gives us a 2k 491 * mblk, and we allocate a second mblk for the rest, the 492 * protocol module may generate 3 segments over the wire: 493 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 494 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 495 * the XDR encoding will generate two ~1400 byte mblks, and the 496 * protocol module is more likely to produce properly sized 497 * segments. 498 */ 499 if ((mpsize >> 1) <= len) { 500 mp->b_rptr += (mpsize - len); 501 } 502 503 /* 504 * Adjust b_rptr to reserve space for the non-data protocol 505 * headers that any downstream modules might like to add, and 506 * for the record marking header. 507 */ 508 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 509 510 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 511 ASSERT(mp->b_wptr == mp->b_rptr); 512 513 msg->rm_xid = clone_xprt->xp_xid; 514 515 TRACE_0(TR_FAC_KRPC, TR_XDR_REPLYMSG_START, 516 "xdr_replymsg_start:"); 517 if (!(xdr_replymsg(xdrs, msg) && 518 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 519 xdr_results, xdr_location)))) { 520 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 521 "xdr_replymsg_end:(%S)", "bad"); 522 freemsg(mp); 523 RPCLOG0(1, "svc_cots_ksend: xdr_replymsg/SVCAUTH_WRAP " 524 "failed\n"); 525 goto out; 526 } 527 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 528 "xdr_replymsg_end:(%S)", "good"); 529 } 530 531 put(clone_xprt->xp_wq, mp); 532 retval = TRUE; 533 534 out: 535 /* 536 * This is completely disgusting. If public is set it is 537 * a pointer to a structure whose first field is the address 538 * of the function to free that structure and any related 539 * stuff. (see rrokfree in nfs_xdr.c). 540 */ 541 if (xdrs->x_public) { 542 /* LINTED pointer alignment */ 543 (**((int (**)())xdrs->x_public))(xdrs->x_public); 544 } 545 546 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 547 "svc_cots_ksend_end:(%S)", "done"); 548 return (retval); 549 } 550 551 /* 552 * Deserialize arguments. 553 */ 554 static bool_t 555 svc_cots_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 556 caddr_t args_ptr) 557 { 558 return (SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 559 xdr_args, args_ptr)); 560 } 561 562 static bool_t 563 svc_cots_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 564 caddr_t args_ptr) 565 { 566 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 567 mblk_t *mp; 568 bool_t retval; 569 570 /* 571 * It is important to call the XDR routine before 572 * freeing the request mblk. Structures in the 573 * XDR data may point into the mblk and require that 574 * the memory be intact during the free routine. 575 */ 576 if (args_ptr) { 577 /* LINTED pointer alignment */ 578 XDR *xdrs = &clone_xprt->xp_xdrin; 579 580 xdrs->x_op = XDR_FREE; 581 retval = (*xdr_args)(xdrs, args_ptr); 582 } else 583 retval = TRUE; 584 585 if ((mp = cd->cd_req_mp) != NULL) { 586 cd->cd_req_mp = (mblk_t *)0; 587 freemsg(mp); 588 } 589 590 return (retval); 591 } 592 593 static int32_t * 594 svc_cots_kgetres(SVCXPRT *clone_xprt, int size) 595 { 596 /* LINTED pointer alignment */ 597 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 598 XDR *xdrs = &clone_xprt->xp_xdrout; 599 mblk_t *mp; 600 int32_t *buf; 601 struct rpc_msg rply; 602 int len; 603 int mpsize; 604 605 /* 606 * Leave space for protocol headers. 607 */ 608 len = MSG_OFFSET + clone_xprt->xp_msg_size; 609 610 /* 611 * Allocate an initial mblk for the response data. 612 */ 613 while ((mp = allocb(len, BPRI_LO)) == NULL) { 614 if (strwaitbuf(len, BPRI_LO)) 615 return (FALSE); 616 } 617 618 /* 619 * Initialize the XDR decode stream. Additional mblks 620 * will be allocated if necessary. They will be TIDU 621 * sized. 622 */ 623 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 624 mpsize = MBLKSIZE(mp); 625 ASSERT(mpsize >= len); 626 ASSERT(mp->b_rptr == mp->b_datap->db_base); 627 628 /* 629 * If the size of mblk is not appreciably larger than what we 630 * asked, then resize the mblk to exactly len bytes. Reason for 631 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 632 * (from TCP over ethernet), and the arguments to RPC require 633 * 2800 bytes. Ideally we want the protocol to render two 634 * ~1400 byte segments over the wire. If allocb() gives us a 2k 635 * mblk, and we allocate a second mblk for the rest, the 636 * protocol module may generate 3 segments over the wire: 637 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 638 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 639 * the XDR encoding will generate two ~1400 byte mblks, and the 640 * protocol module is more likely to produce properly sized 641 * segments. 642 */ 643 if ((mpsize >> 1) <= len) { 644 mp->b_rptr += (mpsize - len); 645 } 646 647 /* 648 * Adjust b_rptr to reserve space for the non-data protocol 649 * headers that any downstream modules might like to add, and 650 * for the record marking header. 651 */ 652 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 653 654 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 655 ASSERT(mp->b_wptr == mp->b_rptr); 656 657 /* 658 * Assume a successful RPC since most of them are. 659 */ 660 rply.rm_xid = clone_xprt->xp_xid; 661 rply.rm_direction = REPLY; 662 rply.rm_reply.rp_stat = MSG_ACCEPTED; 663 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 664 rply.acpted_rply.ar_stat = SUCCESS; 665 666 if (!xdr_replymsg_hdr(xdrs, &rply)) { 667 freeb(mp); 668 return (NULL); 669 } 670 671 672 buf = XDR_INLINE(xdrs, size); 673 if (buf == NULL) { 674 ASSERT(cd->cd_mp == NULL); 675 freemsg(mp); 676 } else { 677 cd->cd_mp = mp; 678 } 679 return (buf); 680 } 681 682 static void 683 svc_cots_kfreeres(SVCXPRT *clone_xprt) 684 { 685 cots_data_t *cd; 686 mblk_t *mp; 687 688 cd = (cots_data_t *)clone_xprt->xp_p2buf; 689 if ((mp = cd->cd_mp) != NULL) { 690 cd->cd_mp = (mblk_t *)NULL; 691 freemsg(mp); 692 } 693 } 694 695 /* 696 * the dup cacheing routines below provide a cache of non-failure 697 * transaction id's. rpc service routines can use this to detect 698 * retransmissions and re-send a non-failure response. 699 */ 700 701 /* 702 * MAXDUPREQS is the number of cached items. It should be adjusted 703 * to the service load so that there is likely to be a response entry 704 * when the first retransmission comes in. 705 */ 706 #define MAXDUPREQS 1024 707 708 /* 709 * This should be appropriately scaled to MAXDUPREQS. 710 */ 711 #define DRHASHSZ 257 712 713 #if ((DRHASHSZ & (DRHASHSZ - 1)) == 0) 714 #define XIDHASH(xid) ((xid) & (DRHASHSZ - 1)) 715 #else 716 #define XIDHASH(xid) ((xid) % DRHASHSZ) 717 #endif 718 #define DRHASH(dr) XIDHASH((dr)->dr_xid) 719 #define REQTOXID(req) ((req)->rq_xprt->xp_xid) 720 721 static int cotsndupreqs = 0; 722 int cotsmaxdupreqs = MAXDUPREQS; 723 static kmutex_t cotsdupreq_lock; 724 static struct dupreq *cotsdrhashtbl[DRHASHSZ]; 725 static int cotsdrhashstat[DRHASHSZ]; 726 727 static void unhash(struct dupreq *); 728 729 /* 730 * cotsdrmru points to the head of a circular linked list in lru order. 731 * cotsdrmru->dr_next == drlru 732 */ 733 struct dupreq *cotsdrmru; 734 735 /* 736 * PSARC 2003/523 Contract Private Interface 737 * svc_cots_kdup 738 * Changes must be reviewed by Solaris File Sharing 739 * Changes must be communicated to contract-2003-523@sun.com 740 * 741 * svc_cots_kdup searches the request cache and returns 0 if the 742 * request is not found in the cache. If it is found, then it 743 * returns the state of the request (in progress or done) and 744 * the status or attributes that were part of the original reply. 745 * 746 * If DUP_DONE (there is a duplicate) svc_cots_kdup copies over the 747 * value of the response. In that case, also return in *dupcachedp 748 * whether the response free routine is cached in the dupreq - in which case 749 * the caller should not be freeing it, because it will be done later 750 * in the svc_cots_kdup code when the dupreq is reused. 751 */ 752 static int 753 svc_cots_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 754 bool_t *dupcachedp) 755 { 756 struct rpc_cots_server *stats = CLONE2STATS(req->rq_xprt); 757 struct dupreq *dr; 758 uint32_t xid; 759 uint32_t drhash; 760 int status; 761 762 xid = REQTOXID(req); 763 mutex_enter(&cotsdupreq_lock); 764 RSSTAT_INCR(stats, rsdupchecks); 765 /* 766 * Check to see whether an entry already exists in the cache. 767 */ 768 dr = cotsdrhashtbl[XIDHASH(xid)]; 769 while (dr != NULL) { 770 if (dr->dr_xid == xid && 771 dr->dr_proc == req->rq_proc && 772 dr->dr_prog == req->rq_prog && 773 dr->dr_vers == req->rq_vers && 774 dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 775 bcmp((caddr_t)dr->dr_addr.buf, 776 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 777 dr->dr_addr.len) == 0) { 778 status = dr->dr_status; 779 if (status == DUP_DONE) { 780 bcopy(dr->dr_resp.buf, res, size); 781 if (dupcachedp != NULL) 782 *dupcachedp = (dr->dr_resfree != NULL); 783 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KDUP_DONE, 784 "svc_cots_kdup: DUP_DONE"); 785 } else { 786 dr->dr_status = DUP_INPROGRESS; 787 *drpp = dr; 788 TRACE_0(TR_FAC_KRPC, 789 TR_SVC_COTS_KDUP_INPROGRESS, 790 "svc_cots_kdup: DUP_INPROGRESS"); 791 } 792 RSSTAT_INCR(stats, rsdupreqs); 793 mutex_exit(&cotsdupreq_lock); 794 return (status); 795 } 796 dr = dr->dr_chain; 797 } 798 799 /* 800 * There wasn't an entry, either allocate a new one or recycle 801 * an old one. 802 */ 803 if (cotsndupreqs < cotsmaxdupreqs) { 804 dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 805 if (dr == NULL) { 806 mutex_exit(&cotsdupreq_lock); 807 return (DUP_ERROR); 808 } 809 dr->dr_resp.buf = NULL; 810 dr->dr_resp.maxlen = 0; 811 dr->dr_addr.buf = NULL; 812 dr->dr_addr.maxlen = 0; 813 if (cotsdrmru) { 814 dr->dr_next = cotsdrmru->dr_next; 815 cotsdrmru->dr_next = dr; 816 } else { 817 dr->dr_next = dr; 818 } 819 cotsndupreqs++; 820 } else { 821 dr = cotsdrmru->dr_next; 822 while (dr->dr_status == DUP_INPROGRESS) { 823 dr = dr->dr_next; 824 if (dr == cotsdrmru->dr_next) { 825 cmn_err(CE_WARN, "svc_cots_kdup no slots free"); 826 mutex_exit(&cotsdupreq_lock); 827 return (DUP_ERROR); 828 } 829 } 830 unhash(dr); 831 if (dr->dr_resfree) { 832 (*dr->dr_resfree)(dr->dr_resp.buf); 833 } 834 } 835 dr->dr_resfree = NULL; 836 cotsdrmru = dr; 837 838 dr->dr_xid = REQTOXID(req); 839 dr->dr_prog = req->rq_prog; 840 dr->dr_vers = req->rq_vers; 841 dr->dr_proc = req->rq_proc; 842 if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 843 if (dr->dr_addr.buf != NULL) 844 kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 845 dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 846 dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 847 if (dr->dr_addr.buf == NULL) { 848 dr->dr_addr.maxlen = 0; 849 dr->dr_status = DUP_DROP; 850 mutex_exit(&cotsdupreq_lock); 851 return (DUP_ERROR); 852 } 853 } 854 dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 855 bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 856 if (dr->dr_resp.maxlen < size) { 857 if (dr->dr_resp.buf != NULL) 858 kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 859 dr->dr_resp.maxlen = (unsigned int)size; 860 dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 861 if (dr->dr_resp.buf == NULL) { 862 dr->dr_resp.maxlen = 0; 863 dr->dr_status = DUP_DROP; 864 mutex_exit(&cotsdupreq_lock); 865 return (DUP_ERROR); 866 } 867 } 868 dr->dr_status = DUP_INPROGRESS; 869 870 drhash = (uint32_t)DRHASH(dr); 871 dr->dr_chain = cotsdrhashtbl[drhash]; 872 cotsdrhashtbl[drhash] = dr; 873 cotsdrhashstat[drhash]++; 874 mutex_exit(&cotsdupreq_lock); 875 *drpp = dr; 876 return (DUP_NEW); 877 } 878 879 /* 880 * PSARC 2003/523 Contract Private Interface 881 * svc_cots_kdupdone 882 * Changes must be reviewed by Solaris File Sharing 883 * Changes must be communicated to contract-2003-523@sun.com 884 * 885 * svc_cots_kdupdone marks the request done (DUP_DONE or DUP_DROP) 886 * and stores the response. 887 */ 888 static void 889 svc_cots_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 890 int size, int status) 891 { 892 ASSERT(dr->dr_resfree == NULL); 893 if (status == DUP_DONE) { 894 bcopy(res, dr->dr_resp.buf, size); 895 dr->dr_resfree = dis_resfree; 896 } 897 dr->dr_status = status; 898 } 899 900 /* 901 * This routine expects that the mutex, cotsdupreq_lock, is already held. 902 */ 903 static void 904 unhash(struct dupreq *dr) 905 { 906 struct dupreq *drt; 907 struct dupreq *drtprev = NULL; 908 uint32_t drhash; 909 910 ASSERT(MUTEX_HELD(&cotsdupreq_lock)); 911 912 drhash = (uint32_t)DRHASH(dr); 913 drt = cotsdrhashtbl[drhash]; 914 while (drt != NULL) { 915 if (drt == dr) { 916 cotsdrhashstat[drhash]--; 917 if (drtprev == NULL) { 918 cotsdrhashtbl[drhash] = drt->dr_chain; 919 } else { 920 drtprev->dr_chain = drt->dr_chain; 921 } 922 return; 923 } 924 drtprev = drt; 925 drt = drt->dr_chain; 926 } 927 } 928 929 void 930 svc_cots_stats_init(zoneid_t zoneid, struct rpc_cots_server **statsp) 931 { 932 *statsp = (struct rpc_cots_server *)rpcstat_zone_init_common(zoneid, 933 "unix", "rpc_cots_server", (const kstat_named_t *)&cots_rsstat_tmpl, 934 sizeof (cots_rsstat_tmpl)); 935 } 936 937 void 938 svc_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_server **statsp) 939 { 940 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_server"); 941 kmem_free(*statsp, sizeof (cots_rsstat_tmpl)); 942 } 943 944 void 945 svc_cots_init(void) 946 { 947 /* 948 * Check to make sure that the cots private data will fit into 949 * the stack buffer allocated by svc_run. The ASSERT is a safety 950 * net if the cots_data_t structure ever changes. 951 */ 952 /*CONSTANTCONDITION*/ 953 ASSERT(sizeof (cots_data_t) <= SVC_P2LEN); 954 955 mutex_init(&cots_kcreate_lock, NULL, MUTEX_DEFAULT, NULL); 956 mutex_init(&cotsdupreq_lock, NULL, MUTEX_DEFAULT, NULL); 957 } 958