1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * svc_cots.c 36 * Server side for connection-oriented RPC in the kernel. 37 * 38 */ 39 40 #include <sys/param.h> 41 #include <sys/types.h> 42 #include <sys/sysmacros.h> 43 #include <sys/file.h> 44 #include <sys/stream.h> 45 #include <sys/strsubr.h> 46 #include <sys/strsun.h> 47 #include <sys/stropts.h> 48 #include <sys/tiuser.h> 49 #include <sys/timod.h> 50 #include <sys/tihdr.h> 51 #include <sys/fcntl.h> 52 #include <sys/errno.h> 53 #include <sys/kmem.h> 54 #include <sys/systm.h> 55 #include <sys/debug.h> 56 #include <sys/cmn_err.h> 57 #include <sys/kstat.h> 58 #include <sys/vtrace.h> 59 60 #include <rpc/types.h> 61 #include <rpc/xdr.h> 62 #include <rpc/auth.h> 63 #include <rpc/rpc_msg.h> 64 #include <rpc/svc.h> 65 #include <inet/ip.h> 66 67 #define COTS_MAX_ALLOCSIZE 2048 68 #define MSG_OFFSET 128 /* offset of call into the mblk */ 69 #define RM_HDR_SIZE 4 /* record mark header size */ 70 71 /* 72 * Routines exported through ops vector. 73 */ 74 static bool_t svc_cots_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 75 static bool_t svc_cots_ksend(SVCXPRT *, struct rpc_msg *); 76 static bool_t svc_cots_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 77 static bool_t svc_cots_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 78 static void svc_cots_kdestroy(SVCMASTERXPRT *); 79 static int svc_cots_kdup(struct svc_req *, caddr_t, int, 80 struct dupreq **, bool_t *); 81 static void svc_cots_kdupdone(struct dupreq *, caddr_t, 82 void (*)(), int, int); 83 static int32_t *svc_cots_kgetres(SVCXPRT *, int); 84 static void svc_cots_kfreeres(SVCXPRT *); 85 static void svc_cots_kclone_destroy(SVCXPRT *); 86 static void svc_cots_kstart(SVCMASTERXPRT *); 87 88 /* 89 * Server transport operations vector. 90 */ 91 struct svc_ops svc_cots_op = { 92 svc_cots_krecv, /* Get requests */ 93 svc_cots_kgetargs, /* Deserialize arguments */ 94 svc_cots_ksend, /* Send reply */ 95 svc_cots_kfreeargs, /* Free argument data space */ 96 svc_cots_kdestroy, /* Destroy transport handle */ 97 svc_cots_kdup, /* Check entry in dup req cache */ 98 svc_cots_kdupdone, /* Mark entry in dup req cache as done */ 99 svc_cots_kgetres, /* Get pointer to response buffer */ 100 svc_cots_kfreeres, /* Destroy pre-serialized response header */ 101 svc_cots_kclone_destroy, /* Destroy a clone xprt */ 102 svc_cots_kstart, /* Tell `ready-to-receive' to rpcmod */ 103 NULL /* Transport specific clone xprt */ 104 }; 105 106 /* 107 * Master transport private data. 108 * Kept in xprt->xp_p2. 109 */ 110 struct cots_master_data { 111 char *cmd_src_addr; /* client's address */ 112 int cmd_xprt_started; /* flag for clone routine to call */ 113 /* rpcmod's start routine. */ 114 struct rpc_cots_server *cmd_stats; /* stats for zone */ 115 }; 116 117 /* 118 * Transport private data. 119 * Kept in clone_xprt->xp_p2buf. 120 */ 121 typedef struct cots_data { 122 mblk_t *cd_mp; /* pre-allocated reply message */ 123 mblk_t *cd_req_mp; /* request message */ 124 } cots_data_t; 125 126 /* 127 * Server statistics 128 * NOTE: This structure type is duplicated in the NFS fast path. 129 */ 130 static const struct rpc_cots_server { 131 kstat_named_t rscalls; 132 kstat_named_t rsbadcalls; 133 kstat_named_t rsnullrecv; 134 kstat_named_t rsbadlen; 135 kstat_named_t rsxdrcall; 136 kstat_named_t rsdupchecks; 137 kstat_named_t rsdupreqs; 138 } cots_rsstat_tmpl = { 139 { "calls", KSTAT_DATA_UINT64 }, 140 { "badcalls", KSTAT_DATA_UINT64 }, 141 { "nullrecv", KSTAT_DATA_UINT64 }, 142 { "badlen", KSTAT_DATA_UINT64 }, 143 { "xdrcall", KSTAT_DATA_UINT64 }, 144 { "dupchecks", KSTAT_DATA_UINT64 }, 145 { "dupreqs", KSTAT_DATA_UINT64 } 146 }; 147 148 #define CLONE2STATS(clone_xprt) \ 149 ((struct cots_master_data *)(clone_xprt)->xp_master->xp_p2)->cmd_stats 150 #define RSSTAT_INCR(s, x) \ 151 atomic_add_64(&(s)->x.value.ui64, 1) 152 153 /* 154 * Pointer to a transport specific `ready to receive' function in rpcmod 155 * (set from rpcmod). 156 */ 157 void (*mir_start)(queue_t *); 158 uint_t *svc_max_msg_sizep; 159 160 /* 161 * the address size of the underlying transport can sometimes be 162 * unknown (tinfo->ADDR_size == -1). For this case, it is 163 * necessary to figure out what the size is so the correct amount 164 * of data is allocated. This is an itterative process: 165 * 1. take a good guess (use T_MINADDRSIZE) 166 * 2. try it. 167 * 3. if it works then everything is ok 168 * 4. if the error is ENAMETOLONG, double the guess 169 * 5. go back to step 2. 170 */ 171 #define T_UNKNOWNADDRSIZE (-1) 172 #define T_MINADDRSIZE 32 173 174 /* 175 * Create a transport record. 176 * The transport record, output buffer, and private data structure 177 * are allocated. The output buffer is serialized into using xdrmem. 178 * There is one transport record per user process which implements a 179 * set of services. 180 */ 181 static kmutex_t cots_kcreate_lock; 182 183 int 184 svc_cots_kcreate(file_t *fp, uint_t max_msgsize, struct T_info_ack *tinfo, 185 SVCMASTERXPRT **nxprt) 186 { 187 struct cots_master_data *cmd; 188 int err, retval; 189 SVCMASTERXPRT *xprt; 190 struct rpcstat *rpcstat; 191 struct T_addr_ack *ack_p; 192 struct strioctl getaddr; 193 194 if (nxprt == NULL) 195 return (EINVAL); 196 197 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 198 ASSERT(rpcstat != NULL); 199 200 xprt = kmem_zalloc(sizeof (SVCMASTERXPRT), KM_SLEEP); 201 202 cmd = kmem_zalloc(sizeof (*cmd) + sizeof (*ack_p) 203 + (2 * sizeof (sin6_t)), KM_SLEEP); 204 205 ack_p = (struct T_addr_ack *)&cmd[1]; 206 207 if ((tinfo->TIDU_size > COTS_MAX_ALLOCSIZE) || 208 (tinfo->TIDU_size <= 0)) 209 xprt->xp_msg_size = COTS_MAX_ALLOCSIZE; 210 else { 211 xprt->xp_msg_size = tinfo->TIDU_size - 212 (tinfo->TIDU_size % BYTES_PER_XDR_UNIT); 213 } 214 215 xprt->xp_ops = &svc_cots_op; 216 xprt->xp_p2 = (caddr_t)cmd; 217 cmd->cmd_xprt_started = 0; 218 cmd->cmd_stats = rpcstat->rpc_cots_server; 219 220 getaddr.ic_cmd = TI_GETINFO; 221 getaddr.ic_timout = -1; 222 getaddr.ic_len = sizeof (*ack_p) + (2 * sizeof (sin6_t)); 223 getaddr.ic_dp = (char *)ack_p; 224 ack_p->PRIM_type = T_ADDR_REQ; 225 226 err = strioctl(fp->f_vnode, I_STR, (intptr_t)&getaddr, 227 0, K_TO_K, CRED(), &retval); 228 if (err) { 229 kmem_free(cmd, sizeof (*cmd) + sizeof (*ack_p) + 230 (2 * sizeof (sin6_t))); 231 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 232 return (err); 233 } 234 235 xprt->xp_rtaddr.maxlen = ack_p->REMADDR_length; 236 xprt->xp_rtaddr.len = ack_p->REMADDR_length; 237 cmd->cmd_src_addr = xprt->xp_rtaddr.buf = 238 (char *)ack_p + ack_p->REMADDR_offset; 239 240 xprt->xp_lcladdr.maxlen = ack_p->LOCADDR_length; 241 xprt->xp_lcladdr.len = ack_p->LOCADDR_length; 242 xprt->xp_lcladdr.buf = (char *)ack_p + ack_p->LOCADDR_offset; 243 244 /* 245 * If the current sanity check size in rpcmod is smaller 246 * than the size needed for this xprt, then increase 247 * the sanity check. 248 */ 249 if (max_msgsize != 0 && svc_max_msg_sizep && 250 max_msgsize > *svc_max_msg_sizep) { 251 252 /* This check needs a lock */ 253 mutex_enter(&cots_kcreate_lock); 254 if (svc_max_msg_sizep && max_msgsize > *svc_max_msg_sizep) 255 *svc_max_msg_sizep = max_msgsize; 256 mutex_exit(&cots_kcreate_lock); 257 } 258 259 *nxprt = xprt; 260 261 return (0); 262 } 263 264 /* 265 * Destroy a master transport record. 266 * Frees the space allocated for a transport record. 267 */ 268 static void 269 svc_cots_kdestroy(SVCMASTERXPRT *xprt) 270 { 271 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 272 273 ASSERT(cmd); 274 275 if (xprt->xp_netid) 276 kmem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); 277 if (xprt->xp_addrmask.maxlen) 278 kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen); 279 280 mutex_destroy(&xprt->xp_req_lock); 281 mutex_destroy(&xprt->xp_thread_lock); 282 283 kmem_free(cmd, sizeof (*cmd) + sizeof (struct T_addr_ack) + 284 (2 * sizeof (sin6_t))); 285 286 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 287 } 288 289 /* 290 * svc_tli_kcreate() calls this function at the end to tell 291 * rpcmod that the transport is ready to receive requests. 292 */ 293 static void 294 svc_cots_kstart(SVCMASTERXPRT *xprt) 295 { 296 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 297 298 if (cmd->cmd_xprt_started == 0) { 299 /* 300 * Acquire the xp_req_lock in order to use xp_wq 301 * safely (we don't want to qenable a queue that has 302 * already been closed). 303 */ 304 mutex_enter(&xprt->xp_req_lock); 305 if (cmd->cmd_xprt_started == 0 && 306 xprt->xp_wq != NULL) { 307 (*mir_start)(xprt->xp_wq); 308 cmd->cmd_xprt_started = 1; 309 } 310 mutex_exit(&xprt->xp_req_lock); 311 } 312 } 313 314 /* 315 * Transport-type specific part of svc_xprt_cleanup(). 316 */ 317 static void 318 svc_cots_kclone_destroy(SVCXPRT *clone_xprt) 319 { 320 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 321 322 if (cd->cd_req_mp) { 323 freemsg(cd->cd_req_mp); 324 cd->cd_req_mp = (mblk_t *)0; 325 } 326 ASSERT(cd->cd_mp == NULL); 327 } 328 329 /* 330 * Receive rpc requests. 331 * Checks if the message is intact, and deserializes the call packet. 332 */ 333 static bool_t 334 svc_cots_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 335 { 336 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 337 XDR *xdrs = &clone_xprt->xp_xdrin; 338 struct rpc_cots_server *stats = CLONE2STATS(clone_xprt); 339 340 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KRECV_START, 341 "svc_cots_krecv_start:"); 342 RPCLOG(4, "svc_cots_krecv_start clone_xprt = %p:\n", 343 (void *)clone_xprt); 344 345 RSSTAT_INCR(stats, rscalls); 346 347 if (mp->b_datap->db_type != M_DATA) { 348 RPCLOG(16, "svc_cots_krecv bad db_type %d\n", 349 mp->b_datap->db_type); 350 goto bad; 351 } 352 353 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 354 355 TRACE_0(TR_FAC_KRPC, TR_XDR_CALLMSG_START, 356 "xdr_callmsg_start:"); 357 RPCLOG0(4, "xdr_callmsg_start:\n"); 358 if (!xdr_callmsg(xdrs, msg)) { 359 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 360 "xdr_callmsg_end:(%S)", "bad"); 361 RPCLOG0(1, "svc_cots_krecv xdr_callmsg failure\n"); 362 RSSTAT_INCR(stats, rsxdrcall); 363 goto bad; 364 } 365 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 366 "xdr_callmsg_end:(%S)", "good"); 367 368 clone_xprt->xp_xid = msg->rm_xid; 369 cd->cd_req_mp = mp; 370 371 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 372 "svc_cots_krecv_end:(%S)", "good"); 373 RPCLOG0(4, "svc_cots_krecv_end:good\n"); 374 return (TRUE); 375 376 bad: 377 if (mp) 378 freemsg(mp); 379 380 RSSTAT_INCR(stats, rsbadcalls); 381 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 382 "svc_cots_krecv_end:(%S)", "bad"); 383 return (FALSE); 384 } 385 386 /* 387 * Send rpc reply. 388 */ 389 static bool_t 390 svc_cots_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg) 391 { 392 /* LINTED pointer alignment */ 393 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 394 XDR *xdrs = &(clone_xprt->xp_xdrout); 395 int retval = FALSE; 396 mblk_t *mp; 397 xdrproc_t xdr_results; 398 caddr_t xdr_location; 399 bool_t has_args; 400 401 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KSEND_START, 402 "svc_cots_ksend_start:"); 403 404 /* 405 * If there is a result procedure specified in the reply message, 406 * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 407 * We need to make sure it won't be processed twice, so we null 408 * it for xdr_replymsg here. 409 */ 410 has_args = FALSE; 411 if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 412 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 413 if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 414 has_args = TRUE; 415 xdr_location = msg->acpted_rply.ar_results.where; 416 msg->acpted_rply.ar_results.proc = xdr_void; 417 msg->acpted_rply.ar_results.where = NULL; 418 } 419 } 420 421 mp = cd->cd_mp; 422 if (mp) { 423 /* 424 * The program above pre-allocated an mblk and put 425 * the data in place. 426 */ 427 cd->cd_mp = (mblk_t *)NULL; 428 if (!(xdr_replymsg_body(xdrs, msg) && 429 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 430 xdr_results, xdr_location)))) { 431 RPCLOG0(1, "svc_cots_ksend: " 432 "xdr_replymsg_body/SVCAUTH_WRAP failed\n"); 433 freemsg(mp); 434 goto out; 435 } 436 } else { 437 int len; 438 int mpsize; 439 440 /* 441 * Leave space for protocol headers. 442 */ 443 len = MSG_OFFSET + clone_xprt->xp_msg_size; 444 445 /* 446 * Allocate an initial mblk for the response data. 447 */ 448 while (!(mp = allocb(len, BPRI_LO))) { 449 RPCLOG0(16, "svc_cots_ksend: allocb failed failed\n"); 450 if (strwaitbuf(len, BPRI_LO)) { 451 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 452 "svc_cots_ksend_end:(%S)", "strwaitbuf"); 453 RPCLOG0(1, 454 "svc_cots_ksend: strwaitbuf failed\n"); 455 goto out; 456 } 457 } 458 459 /* 460 * Initialize the XDR decode stream. Additional mblks 461 * will be allocated if necessary. They will be TIDU 462 * sized. 463 */ 464 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 465 mpsize = MBLKSIZE(mp); 466 ASSERT(mpsize >= len); 467 ASSERT(mp->b_rptr == mp->b_datap->db_base); 468 469 /* 470 * If the size of mblk is not appreciably larger than what we 471 * asked, then resize the mblk to exactly len bytes. Reason for 472 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 473 * (from TCP over ethernet), and the arguments to RPC require 474 * 2800 bytes. Ideally we want the protocol to render two 475 * ~1400 byte segments over the wire. If allocb() gives us a 2k 476 * mblk, and we allocate a second mblk for the rest, the 477 * protocol module may generate 3 segments over the wire: 478 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 479 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 480 * the XDR encoding will generate two ~1400 byte mblks, and the 481 * protocol module is more likely to produce properly sized 482 * segments. 483 */ 484 if ((mpsize >> 1) <= len) { 485 mp->b_rptr += (mpsize - len); 486 } 487 488 /* 489 * Adjust b_rptr to reserve space for the non-data protocol 490 * headers that any downstream modules might like to add, and 491 * for the record marking header. 492 */ 493 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 494 495 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 496 ASSERT(mp->b_wptr == mp->b_rptr); 497 498 msg->rm_xid = clone_xprt->xp_xid; 499 500 TRACE_0(TR_FAC_KRPC, TR_XDR_REPLYMSG_START, 501 "xdr_replymsg_start:"); 502 if (!(xdr_replymsg(xdrs, msg) && 503 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 504 xdr_results, xdr_location)))) { 505 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 506 "xdr_replymsg_end:(%S)", "bad"); 507 freemsg(mp); 508 RPCLOG0(1, "svc_cots_ksend: xdr_replymsg/SVCAUTH_WRAP " 509 "failed\n"); 510 goto out; 511 } 512 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 513 "xdr_replymsg_end:(%S)", "good"); 514 } 515 516 put(clone_xprt->xp_wq, mp); 517 retval = TRUE; 518 519 out: 520 /* 521 * This is completely disgusting. If public is set it is 522 * a pointer to a structure whose first field is the address 523 * of the function to free that structure and any related 524 * stuff. (see rrokfree in nfs_xdr.c). 525 */ 526 if (xdrs->x_public) { 527 /* LINTED pointer alignment */ 528 (**((int (**)())xdrs->x_public))(xdrs->x_public); 529 } 530 531 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 532 "svc_cots_ksend_end:(%S)", "done"); 533 return (retval); 534 } 535 536 /* 537 * Deserialize arguments. 538 */ 539 static bool_t 540 svc_cots_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 541 caddr_t args_ptr) 542 { 543 return (SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 544 xdr_args, args_ptr)); 545 } 546 547 static bool_t 548 svc_cots_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 549 caddr_t args_ptr) 550 { 551 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 552 mblk_t *mp; 553 bool_t retval; 554 555 /* 556 * It is important to call the XDR routine before 557 * freeing the request mblk. Structures in the 558 * XDR data may point into the mblk and require that 559 * the memory be intact during the free routine. 560 */ 561 if (args_ptr) { 562 /* LINTED pointer alignment */ 563 XDR *xdrs = &clone_xprt->xp_xdrin; 564 565 xdrs->x_op = XDR_FREE; 566 retval = (*xdr_args)(xdrs, args_ptr); 567 } else 568 retval = TRUE; 569 570 if ((mp = cd->cd_req_mp) != NULL) { 571 cd->cd_req_mp = (mblk_t *)0; 572 freemsg(mp); 573 } 574 575 return (retval); 576 } 577 578 static int32_t * 579 svc_cots_kgetres(SVCXPRT *clone_xprt, int size) 580 { 581 /* LINTED pointer alignment */ 582 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 583 XDR *xdrs = &clone_xprt->xp_xdrout; 584 mblk_t *mp; 585 int32_t *buf; 586 struct rpc_msg rply; 587 int len; 588 int mpsize; 589 590 /* 591 * Leave space for protocol headers. 592 */ 593 len = MSG_OFFSET + clone_xprt->xp_msg_size; 594 595 /* 596 * Allocate an initial mblk for the response data. 597 */ 598 while ((mp = allocb(len, BPRI_LO)) == NULL) { 599 if (strwaitbuf(len, BPRI_LO)) 600 return (FALSE); 601 } 602 603 /* 604 * Initialize the XDR decode stream. Additional mblks 605 * will be allocated if necessary. They will be TIDU 606 * sized. 607 */ 608 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 609 mpsize = MBLKSIZE(mp); 610 ASSERT(mpsize >= len); 611 ASSERT(mp->b_rptr == mp->b_datap->db_base); 612 613 /* 614 * If the size of mblk is not appreciably larger than what we 615 * asked, then resize the mblk to exactly len bytes. Reason for 616 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 617 * (from TCP over ethernet), and the arguments to RPC require 618 * 2800 bytes. Ideally we want the protocol to render two 619 * ~1400 byte segments over the wire. If allocb() gives us a 2k 620 * mblk, and we allocate a second mblk for the rest, the 621 * protocol module may generate 3 segments over the wire: 622 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 623 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 624 * the XDR encoding will generate two ~1400 byte mblks, and the 625 * protocol module is more likely to produce properly sized 626 * segments. 627 */ 628 if ((mpsize >> 1) <= len) { 629 mp->b_rptr += (mpsize - len); 630 } 631 632 /* 633 * Adjust b_rptr to reserve space for the non-data protocol 634 * headers that any downstream modules might like to add, and 635 * for the record marking header. 636 */ 637 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 638 639 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 640 ASSERT(mp->b_wptr == mp->b_rptr); 641 642 /* 643 * Assume a successful RPC since most of them are. 644 */ 645 rply.rm_xid = clone_xprt->xp_xid; 646 rply.rm_direction = REPLY; 647 rply.rm_reply.rp_stat = MSG_ACCEPTED; 648 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 649 rply.acpted_rply.ar_stat = SUCCESS; 650 651 if (!xdr_replymsg_hdr(xdrs, &rply)) { 652 freeb(mp); 653 return (NULL); 654 } 655 656 657 buf = XDR_INLINE(xdrs, size); 658 if (buf == NULL) { 659 ASSERT(cd->cd_mp == NULL); 660 freemsg(mp); 661 } else { 662 cd->cd_mp = mp; 663 } 664 return (buf); 665 } 666 667 static void 668 svc_cots_kfreeres(SVCXPRT *clone_xprt) 669 { 670 cots_data_t *cd; 671 mblk_t *mp; 672 673 cd = (cots_data_t *)clone_xprt->xp_p2buf; 674 if ((mp = cd->cd_mp) != NULL) { 675 cd->cd_mp = (mblk_t *)NULL; 676 freemsg(mp); 677 } 678 } 679 680 /* 681 * the dup cacheing routines below provide a cache of non-failure 682 * transaction id's. rpc service routines can use this to detect 683 * retransmissions and re-send a non-failure response. 684 */ 685 686 /* 687 * MAXDUPREQS is the number of cached items. It should be adjusted 688 * to the service load so that there is likely to be a response entry 689 * when the first retransmission comes in. 690 */ 691 #define MAXDUPREQS 1024 692 693 /* 694 * This should be appropriately scaled to MAXDUPREQS. 695 */ 696 #define DRHASHSZ 257 697 698 #if ((DRHASHSZ & (DRHASHSZ - 1)) == 0) 699 #define XIDHASH(xid) ((xid) & (DRHASHSZ - 1)) 700 #else 701 #define XIDHASH(xid) ((xid) % DRHASHSZ) 702 #endif 703 #define DRHASH(dr) XIDHASH((dr)->dr_xid) 704 #define REQTOXID(req) ((req)->rq_xprt->xp_xid) 705 706 static int cotsndupreqs = 0; 707 int cotsmaxdupreqs = MAXDUPREQS; 708 static kmutex_t cotsdupreq_lock; 709 static struct dupreq *cotsdrhashtbl[DRHASHSZ]; 710 static int cotsdrhashstat[DRHASHSZ]; 711 712 static void unhash(struct dupreq *); 713 714 /* 715 * cotsdrmru points to the head of a circular linked list in lru order. 716 * cotsdrmru->dr_next == drlru 717 */ 718 struct dupreq *cotsdrmru; 719 720 /* 721 * PSARC 2003/523 Contract Private Interface 722 * svc_cots_kdup 723 * Changes must be reviewed by Solaris File Sharing 724 * Changes must be communicated to contract-2003-523@sun.com 725 * 726 * svc_cots_kdup searches the request cache and returns 0 if the 727 * request is not found in the cache. If it is found, then it 728 * returns the state of the request (in progress or done) and 729 * the status or attributes that were part of the original reply. 730 * 731 * If DUP_DONE (there is a duplicate) svc_cots_kdup copies over the 732 * value of the response. In that case, also return in *dupcachedp 733 * whether the response free routine is cached in the dupreq - in which case 734 * the caller should not be freeing it, because it will be done later 735 * in the svc_cots_kdup code when the dupreq is reused. 736 */ 737 static int 738 svc_cots_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 739 bool_t *dupcachedp) 740 { 741 struct rpc_cots_server *stats = CLONE2STATS(req->rq_xprt); 742 struct dupreq *dr; 743 uint32_t xid; 744 uint32_t drhash; 745 int status; 746 747 xid = REQTOXID(req); 748 mutex_enter(&cotsdupreq_lock); 749 RSSTAT_INCR(stats, rsdupchecks); 750 /* 751 * Check to see whether an entry already exists in the cache. 752 */ 753 dr = cotsdrhashtbl[XIDHASH(xid)]; 754 while (dr != NULL) { 755 if (dr->dr_xid == xid && 756 dr->dr_proc == req->rq_proc && 757 dr->dr_prog == req->rq_prog && 758 dr->dr_vers == req->rq_vers && 759 dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 760 bcmp((caddr_t)dr->dr_addr.buf, 761 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 762 dr->dr_addr.len) == 0) { 763 status = dr->dr_status; 764 if (status == DUP_DONE) { 765 bcopy(dr->dr_resp.buf, res, size); 766 if (dupcachedp != NULL) 767 *dupcachedp = (dr->dr_resfree != NULL); 768 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KDUP_DONE, 769 "svc_cots_kdup: DUP_DONE"); 770 } else { 771 dr->dr_status = DUP_INPROGRESS; 772 *drpp = dr; 773 TRACE_0(TR_FAC_KRPC, 774 TR_SVC_COTS_KDUP_INPROGRESS, 775 "svc_cots_kdup: DUP_INPROGRESS"); 776 } 777 RSSTAT_INCR(stats, rsdupreqs); 778 mutex_exit(&cotsdupreq_lock); 779 return (status); 780 } 781 dr = dr->dr_chain; 782 } 783 784 /* 785 * There wasn't an entry, either allocate a new one or recycle 786 * an old one. 787 */ 788 if (cotsndupreqs < cotsmaxdupreqs) { 789 dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 790 if (dr == NULL) { 791 mutex_exit(&cotsdupreq_lock); 792 return (DUP_ERROR); 793 } 794 dr->dr_resp.buf = NULL; 795 dr->dr_resp.maxlen = 0; 796 dr->dr_addr.buf = NULL; 797 dr->dr_addr.maxlen = 0; 798 if (cotsdrmru) { 799 dr->dr_next = cotsdrmru->dr_next; 800 cotsdrmru->dr_next = dr; 801 } else { 802 dr->dr_next = dr; 803 } 804 cotsndupreqs++; 805 } else { 806 dr = cotsdrmru->dr_next; 807 while (dr->dr_status == DUP_INPROGRESS) { 808 dr = dr->dr_next; 809 if (dr == cotsdrmru->dr_next) { 810 cmn_err(CE_WARN, "svc_cots_kdup no slots free"); 811 mutex_exit(&cotsdupreq_lock); 812 return (DUP_ERROR); 813 } 814 } 815 unhash(dr); 816 if (dr->dr_resfree) { 817 (*dr->dr_resfree)(dr->dr_resp.buf); 818 } 819 } 820 dr->dr_resfree = NULL; 821 cotsdrmru = dr; 822 823 dr->dr_xid = REQTOXID(req); 824 dr->dr_prog = req->rq_prog; 825 dr->dr_vers = req->rq_vers; 826 dr->dr_proc = req->rq_proc; 827 if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 828 if (dr->dr_addr.buf != NULL) 829 kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 830 dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 831 dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 832 if (dr->dr_addr.buf == NULL) { 833 dr->dr_addr.maxlen = 0; 834 dr->dr_status = DUP_DROP; 835 mutex_exit(&cotsdupreq_lock); 836 return (DUP_ERROR); 837 } 838 } 839 dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 840 bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 841 if (dr->dr_resp.maxlen < size) { 842 if (dr->dr_resp.buf != NULL) 843 kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 844 dr->dr_resp.maxlen = (unsigned int)size; 845 dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 846 if (dr->dr_resp.buf == NULL) { 847 dr->dr_resp.maxlen = 0; 848 dr->dr_status = DUP_DROP; 849 mutex_exit(&cotsdupreq_lock); 850 return (DUP_ERROR); 851 } 852 } 853 dr->dr_status = DUP_INPROGRESS; 854 855 drhash = (uint32_t)DRHASH(dr); 856 dr->dr_chain = cotsdrhashtbl[drhash]; 857 cotsdrhashtbl[drhash] = dr; 858 cotsdrhashstat[drhash]++; 859 mutex_exit(&cotsdupreq_lock); 860 *drpp = dr; 861 return (DUP_NEW); 862 } 863 864 /* 865 * PSARC 2003/523 Contract Private Interface 866 * svc_cots_kdupdone 867 * Changes must be reviewed by Solaris File Sharing 868 * Changes must be communicated to contract-2003-523@sun.com 869 * 870 * svc_cots_kdupdone marks the request done (DUP_DONE or DUP_DROP) 871 * and stores the response. 872 */ 873 static void 874 svc_cots_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 875 int size, int status) 876 { 877 ASSERT(dr->dr_resfree == NULL); 878 if (status == DUP_DONE) { 879 bcopy(res, dr->dr_resp.buf, size); 880 dr->dr_resfree = dis_resfree; 881 } 882 dr->dr_status = status; 883 } 884 885 /* 886 * This routine expects that the mutex, cotsdupreq_lock, is already held. 887 */ 888 static void 889 unhash(struct dupreq *dr) 890 { 891 struct dupreq *drt; 892 struct dupreq *drtprev = NULL; 893 uint32_t drhash; 894 895 ASSERT(MUTEX_HELD(&cotsdupreq_lock)); 896 897 drhash = (uint32_t)DRHASH(dr); 898 drt = cotsdrhashtbl[drhash]; 899 while (drt != NULL) { 900 if (drt == dr) { 901 cotsdrhashstat[drhash]--; 902 if (drtprev == NULL) { 903 cotsdrhashtbl[drhash] = drt->dr_chain; 904 } else { 905 drtprev->dr_chain = drt->dr_chain; 906 } 907 return; 908 } 909 drtprev = drt; 910 drt = drt->dr_chain; 911 } 912 } 913 914 void 915 svc_cots_stats_init(zoneid_t zoneid, struct rpc_cots_server **statsp) 916 { 917 *statsp = (struct rpc_cots_server *)rpcstat_zone_init_common(zoneid, 918 "unix", "rpc_cots_server", (const kstat_named_t *)&cots_rsstat_tmpl, 919 sizeof (cots_rsstat_tmpl)); 920 } 921 922 void 923 svc_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_server **statsp) 924 { 925 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_server"); 926 kmem_free(*statsp, sizeof (cots_rsstat_tmpl)); 927 } 928 929 void 930 svc_cots_init(void) 931 { 932 /* 933 * Check to make sure that the cots private data will fit into 934 * the stack buffer allocated by svc_run. The ASSERT is a safety 935 * net if the cots_data_t structure ever changes. 936 */ 937 /*CONSTANTCONDITION*/ 938 ASSERT(sizeof (cots_data_t) <= SVC_P2LEN); 939 940 mutex_init(&cots_kcreate_lock, NULL, MUTEX_DEFAULT, NULL); 941 mutex_init(&cotsdupreq_lock, NULL, MUTEX_DEFAULT, NULL); 942 } 943