1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * svc_cots.c 36 * Server side for connection-oriented RPC in the kernel. 37 * 38 */ 39 40 #include <sys/param.h> 41 #include <sys/types.h> 42 #include <sys/sysmacros.h> 43 #include <sys/file.h> 44 #include <sys/stream.h> 45 #include <sys/strsubr.h> 46 #include <sys/strsun.h> 47 #include <sys/stropts.h> 48 #include <sys/tiuser.h> 49 #include <sys/timod.h> 50 #include <sys/tihdr.h> 51 #include <sys/fcntl.h> 52 #include <sys/errno.h> 53 #include <sys/kmem.h> 54 #include <sys/systm.h> 55 #include <sys/debug.h> 56 #include <sys/cmn_err.h> 57 #include <sys/kstat.h> 58 #include <sys/vtrace.h> 59 60 #include <rpc/types.h> 61 #include <rpc/xdr.h> 62 #include <rpc/auth.h> 63 #include <rpc/rpc_msg.h> 64 #include <rpc/svc.h> 65 #include <inet/ip.h> 66 67 #define COTS_MAX_ALLOCSIZE 2048 68 #define MSG_OFFSET 128 /* offset of call into the mblk */ 69 #define RM_HDR_SIZE 4 /* record mark header size */ 70 71 /* 72 * Routines exported through ops vector. 73 */ 74 static bool_t svc_cots_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 75 static bool_t svc_cots_ksend(SVCXPRT *, struct rpc_msg *); 76 static bool_t svc_cots_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 77 static bool_t svc_cots_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 78 static void svc_cots_kdestroy(SVCMASTERXPRT *); 79 static int svc_cots_kdup(struct svc_req *, caddr_t, int, 80 struct dupreq **, bool_t *); 81 static void svc_cots_kdupdone(struct dupreq *, caddr_t, 82 void (*)(), int, int); 83 static int32_t *svc_cots_kgetres(SVCXPRT *, int); 84 static void svc_cots_kfreeres(SVCXPRT *); 85 static void svc_cots_kclone_destroy(SVCXPRT *); 86 static void svc_cots_kstart(SVCMASTERXPRT *); 87 88 /* 89 * Server transport operations vector. 90 */ 91 struct svc_ops svc_cots_op = { 92 svc_cots_krecv, /* Get requests */ 93 svc_cots_kgetargs, /* Deserialize arguments */ 94 svc_cots_ksend, /* Send reply */ 95 svc_cots_kfreeargs, /* Free argument data space */ 96 svc_cots_kdestroy, /* Destroy transport handle */ 97 svc_cots_kdup, /* Check entry in dup req cache */ 98 svc_cots_kdupdone, /* Mark entry in dup req cache as done */ 99 svc_cots_kgetres, /* Get pointer to response buffer */ 100 svc_cots_kfreeres, /* Destroy pre-serialized response header */ 101 svc_cots_kclone_destroy, /* Destroy a clone xprt */ 102 svc_cots_kstart /* Tell `ready-to-receive' to rpcmod */ 103 }; 104 105 /* 106 * Master transport private data. 107 * Kept in xprt->xp_p2. 108 */ 109 struct cots_master_data { 110 char *cmd_src_addr; /* client's address */ 111 int cmd_xprt_started; /* flag for clone routine to call */ 112 /* rpcmod's start routine. */ 113 struct rpc_cots_server *cmd_stats; /* stats for zone */ 114 }; 115 116 /* 117 * Transport private data. 118 * Kept in clone_xprt->xp_p2buf. 119 */ 120 typedef struct cots_data { 121 mblk_t *cd_mp; /* pre-allocated reply message */ 122 mblk_t *cd_req_mp; /* request message */ 123 } cots_data_t; 124 125 /* 126 * Server statistics 127 * NOTE: This structure type is duplicated in the NFS fast path. 128 */ 129 static const struct rpc_cots_server { 130 kstat_named_t rscalls; 131 kstat_named_t rsbadcalls; 132 kstat_named_t rsnullrecv; 133 kstat_named_t rsbadlen; 134 kstat_named_t rsxdrcall; 135 kstat_named_t rsdupchecks; 136 kstat_named_t rsdupreqs; 137 } cots_rsstat_tmpl = { 138 { "calls", KSTAT_DATA_UINT64 }, 139 { "badcalls", KSTAT_DATA_UINT64 }, 140 { "nullrecv", KSTAT_DATA_UINT64 }, 141 { "badlen", KSTAT_DATA_UINT64 }, 142 { "xdrcall", KSTAT_DATA_UINT64 }, 143 { "dupchecks", KSTAT_DATA_UINT64 }, 144 { "dupreqs", KSTAT_DATA_UINT64 } 145 }; 146 147 #define CLONE2STATS(clone_xprt) \ 148 ((struct cots_master_data *)(clone_xprt)->xp_master->xp_p2)->cmd_stats 149 #define RSSTAT_INCR(s, x) \ 150 atomic_add_64(&(s)->x.value.ui64, 1) 151 152 /* 153 * Pointer to a transport specific `ready to receive' function in rpcmod 154 * (set from rpcmod). 155 */ 156 void (*mir_start)(queue_t *); 157 uint_t *svc_max_msg_sizep; 158 159 /* 160 * the address size of the underlying transport can sometimes be 161 * unknown (tinfo->ADDR_size == -1). For this case, it is 162 * necessary to figure out what the size is so the correct amount 163 * of data is allocated. This is an itterative process: 164 * 1. take a good guess (use T_MINADDRSIZE) 165 * 2. try it. 166 * 3. if it works then everything is ok 167 * 4. if the error is ENAMETOLONG, double the guess 168 * 5. go back to step 2. 169 */ 170 #define T_UNKNOWNADDRSIZE (-1) 171 #define T_MINADDRSIZE 32 172 173 /* 174 * Create a transport record. 175 * The transport record, output buffer, and private data structure 176 * are allocated. The output buffer is serialized into using xdrmem. 177 * There is one transport record per user process which implements a 178 * set of services. 179 */ 180 static kmutex_t cots_kcreate_lock; 181 182 int 183 svc_cots_kcreate(file_t *fp, uint_t max_msgsize, struct T_info_ack *tinfo, 184 SVCMASTERXPRT **nxprt) 185 { 186 struct cots_master_data *cmd; 187 int err, retval; 188 SVCMASTERXPRT *xprt; 189 struct rpcstat *rpcstat; 190 struct T_addr_ack *ack_p; 191 struct strioctl getaddr; 192 193 if (nxprt == NULL) 194 return (EINVAL); 195 196 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 197 ASSERT(rpcstat != NULL); 198 199 xprt = kmem_zalloc(sizeof (SVCMASTERXPRT), KM_SLEEP); 200 201 cmd = kmem_zalloc(sizeof (*cmd) + sizeof (*ack_p) 202 + (2 * sizeof (sin6_t)), KM_SLEEP); 203 204 ack_p = (struct T_addr_ack *)&cmd[1]; 205 206 if ((tinfo->TIDU_size > COTS_MAX_ALLOCSIZE) || 207 (tinfo->TIDU_size <= 0)) 208 xprt->xp_msg_size = COTS_MAX_ALLOCSIZE; 209 else { 210 xprt->xp_msg_size = tinfo->TIDU_size - 211 (tinfo->TIDU_size % BYTES_PER_XDR_UNIT); 212 } 213 214 xprt->xp_ops = &svc_cots_op; 215 xprt->xp_p2 = (caddr_t)cmd; 216 cmd->cmd_xprt_started = 0; 217 cmd->cmd_stats = rpcstat->rpc_cots_server; 218 219 getaddr.ic_cmd = TI_GETINFO; 220 getaddr.ic_timout = -1; 221 getaddr.ic_len = sizeof (*ack_p) + (2 * sizeof (sin6_t)); 222 getaddr.ic_dp = (char *)ack_p; 223 ack_p->PRIM_type = T_ADDR_REQ; 224 225 err = strioctl(fp->f_vnode, I_STR, (intptr_t)&getaddr, 226 0, K_TO_K, CRED(), &retval); 227 if (err) { 228 kmem_free(cmd, sizeof (*cmd) + sizeof (*ack_p) + 229 (2 * sizeof (sin6_t))); 230 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 231 return (err); 232 } 233 234 xprt->xp_rtaddr.maxlen = ack_p->REMADDR_length; 235 xprt->xp_rtaddr.len = ack_p->REMADDR_length; 236 cmd->cmd_src_addr = xprt->xp_rtaddr.buf = 237 (char *)ack_p + ack_p->REMADDR_offset; 238 239 xprt->xp_lcladdr.maxlen = ack_p->LOCADDR_length; 240 xprt->xp_lcladdr.len = ack_p->LOCADDR_length; 241 xprt->xp_lcladdr.buf = (char *)ack_p + ack_p->LOCADDR_offset; 242 243 /* 244 * If the current sanity check size in rpcmod is smaller 245 * than the size needed for this xprt, then increase 246 * the sanity check. 247 */ 248 if (max_msgsize != 0 && svc_max_msg_sizep && 249 max_msgsize > *svc_max_msg_sizep) { 250 251 /* This check needs a lock */ 252 mutex_enter(&cots_kcreate_lock); 253 if (svc_max_msg_sizep && max_msgsize > *svc_max_msg_sizep) 254 *svc_max_msg_sizep = max_msgsize; 255 mutex_exit(&cots_kcreate_lock); 256 } 257 258 *nxprt = xprt; 259 260 return (0); 261 } 262 263 /* 264 * Destroy a master transport record. 265 * Frees the space allocated for a transport record. 266 */ 267 static void 268 svc_cots_kdestroy(SVCMASTERXPRT *xprt) 269 { 270 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 271 272 ASSERT(cmd); 273 274 if (xprt->xp_netid) 275 kmem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); 276 if (xprt->xp_addrmask.maxlen) 277 kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen); 278 279 mutex_destroy(&xprt->xp_req_lock); 280 mutex_destroy(&xprt->xp_thread_lock); 281 282 kmem_free(cmd, sizeof (*cmd) + sizeof (struct T_addr_ack) + 283 (2 * sizeof (sin6_t))); 284 285 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 286 } 287 288 /* 289 * svc_tli_kcreate() calls this function at the end to tell 290 * rpcmod that the transport is ready to receive requests. 291 */ 292 static void 293 svc_cots_kstart(SVCMASTERXPRT *xprt) 294 { 295 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 296 297 if (cmd->cmd_xprt_started == 0) { 298 /* 299 * Acquire the xp_req_lock in order to use xp_wq 300 * safely (we don't want to qenable a queue that has 301 * already been closed). 302 */ 303 mutex_enter(&xprt->xp_req_lock); 304 if (cmd->cmd_xprt_started == 0 && 305 xprt->xp_wq != NULL) { 306 (*mir_start)(xprt->xp_wq); 307 cmd->cmd_xprt_started = 1; 308 } 309 mutex_exit(&xprt->xp_req_lock); 310 } 311 } 312 313 /* 314 * Transport-type specific part of svc_xprt_cleanup(). 315 */ 316 static void 317 svc_cots_kclone_destroy(SVCXPRT *clone_xprt) 318 { 319 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 320 321 if (cd->cd_req_mp) { 322 freemsg(cd->cd_req_mp); 323 cd->cd_req_mp = (mblk_t *)0; 324 } 325 ASSERT(cd->cd_mp == NULL); 326 } 327 328 /* 329 * Receive rpc requests. 330 * Checks if the message is intact, and deserializes the call packet. 331 */ 332 static bool_t 333 svc_cots_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 334 { 335 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 336 XDR *xdrs = &clone_xprt->xp_xdrin; 337 struct rpc_cots_server *stats = CLONE2STATS(clone_xprt); 338 339 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KRECV_START, 340 "svc_cots_krecv_start:"); 341 RPCLOG(4, "svc_cots_krecv_start clone_xprt = %p:\n", 342 (void *)clone_xprt); 343 344 RSSTAT_INCR(stats, rscalls); 345 346 if (mp->b_datap->db_type != M_DATA) { 347 RPCLOG(16, "svc_cots_krecv bad db_type %d\n", 348 mp->b_datap->db_type); 349 goto bad; 350 } 351 352 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 353 354 TRACE_0(TR_FAC_KRPC, TR_XDR_CALLMSG_START, 355 "xdr_callmsg_start:"); 356 RPCLOG0(4, "xdr_callmsg_start:\n"); 357 if (!xdr_callmsg(xdrs, msg)) { 358 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 359 "xdr_callmsg_end:(%S)", "bad"); 360 RPCLOG0(1, "svc_cots_krecv xdr_callmsg failure\n"); 361 RSSTAT_INCR(stats, rsxdrcall); 362 goto bad; 363 } 364 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 365 "xdr_callmsg_end:(%S)", "good"); 366 367 clone_xprt->xp_xid = msg->rm_xid; 368 cd->cd_req_mp = mp; 369 370 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 371 "svc_cots_krecv_end:(%S)", "good"); 372 RPCLOG0(4, "svc_cots_krecv_end:good\n"); 373 return (TRUE); 374 375 bad: 376 if (mp) 377 freemsg(mp); 378 379 RSSTAT_INCR(stats, rsbadcalls); 380 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 381 "svc_cots_krecv_end:(%S)", "bad"); 382 return (FALSE); 383 } 384 385 /* 386 * Send rpc reply. 387 */ 388 static bool_t 389 svc_cots_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg) 390 { 391 /* LINTED pointer alignment */ 392 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 393 XDR *xdrs = &(clone_xprt->xp_xdrout); 394 int retval = FALSE; 395 mblk_t *mp; 396 xdrproc_t xdr_results; 397 caddr_t xdr_location; 398 bool_t has_args; 399 400 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KSEND_START, 401 "svc_cots_ksend_start:"); 402 403 /* 404 * If there is a result procedure specified in the reply message, 405 * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 406 * We need to make sure it won't be processed twice, so we null 407 * it for xdr_replymsg here. 408 */ 409 has_args = FALSE; 410 if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 411 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 412 if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 413 has_args = TRUE; 414 xdr_location = msg->acpted_rply.ar_results.where; 415 msg->acpted_rply.ar_results.proc = xdr_void; 416 msg->acpted_rply.ar_results.where = NULL; 417 } 418 } 419 420 mp = cd->cd_mp; 421 if (mp) { 422 /* 423 * The program above pre-allocated an mblk and put 424 * the data in place. 425 */ 426 cd->cd_mp = (mblk_t *)NULL; 427 if (!(xdr_replymsg_body(xdrs, msg) && 428 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 429 xdr_results, xdr_location)))) { 430 RPCLOG0(1, "svc_cots_ksend: " 431 "xdr_replymsg_body/SVCAUTH_WRAP failed\n"); 432 freemsg(mp); 433 goto out; 434 } 435 } else { 436 int len; 437 int mpsize; 438 439 /* 440 * Leave space for protocol headers. 441 */ 442 len = MSG_OFFSET + clone_xprt->xp_msg_size; 443 444 /* 445 * Allocate an initial mblk for the response data. 446 */ 447 while (!(mp = allocb(len, BPRI_LO))) { 448 RPCLOG0(16, "svc_cots_ksend: allocb failed failed\n"); 449 if (strwaitbuf(len, BPRI_LO)) { 450 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 451 "svc_cots_ksend_end:(%S)", "strwaitbuf"); 452 RPCLOG0(1, 453 "svc_cots_ksend: strwaitbuf failed\n"); 454 goto out; 455 } 456 } 457 458 /* 459 * Initialize the XDR decode stream. Additional mblks 460 * will be allocated if necessary. They will be TIDU 461 * sized. 462 */ 463 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 464 mpsize = MBLKSIZE(mp); 465 ASSERT(mpsize >= len); 466 ASSERT(mp->b_rptr == mp->b_datap->db_base); 467 468 /* 469 * If the size of mblk is not appreciably larger than what we 470 * asked, then resize the mblk to exactly len bytes. Reason for 471 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 472 * (from TCP over ethernet), and the arguments to RPC require 473 * 2800 bytes. Ideally we want the protocol to render two 474 * ~1400 byte segments over the wire. If allocb() gives us a 2k 475 * mblk, and we allocate a second mblk for the rest, the 476 * protocol module may generate 3 segments over the wire: 477 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 478 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 479 * the XDR encoding will generate two ~1400 byte mblks, and the 480 * protocol module is more likely to produce properly sized 481 * segments. 482 */ 483 if ((mpsize >> 1) <= len) { 484 mp->b_rptr += (mpsize - len); 485 } 486 487 /* 488 * Adjust b_rptr to reserve space for the non-data protocol 489 * headers that any downstream modules might like to add, and 490 * for the record marking header. 491 */ 492 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 493 494 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 495 ASSERT(mp->b_wptr == mp->b_rptr); 496 497 msg->rm_xid = clone_xprt->xp_xid; 498 499 TRACE_0(TR_FAC_KRPC, TR_XDR_REPLYMSG_START, 500 "xdr_replymsg_start:"); 501 if (!(xdr_replymsg(xdrs, msg) && 502 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 503 xdr_results, xdr_location)))) { 504 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 505 "xdr_replymsg_end:(%S)", "bad"); 506 freemsg(mp); 507 RPCLOG0(1, "svc_cots_ksend: xdr_replymsg/SVCAUTH_WRAP " 508 "failed\n"); 509 goto out; 510 } 511 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 512 "xdr_replymsg_end:(%S)", "good"); 513 } 514 515 put(clone_xprt->xp_wq, mp); 516 retval = TRUE; 517 518 out: 519 /* 520 * This is completely disgusting. If public is set it is 521 * a pointer to a structure whose first field is the address 522 * of the function to free that structure and any related 523 * stuff. (see rrokfree in nfs_xdr.c). 524 */ 525 if (xdrs->x_public) { 526 /* LINTED pointer alignment */ 527 (**((int (**)())xdrs->x_public))(xdrs->x_public); 528 } 529 530 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 531 "svc_cots_ksend_end:(%S)", "done"); 532 return (retval); 533 } 534 535 /* 536 * Deserialize arguments. 537 */ 538 static bool_t 539 svc_cots_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 540 caddr_t args_ptr) 541 { 542 return (SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 543 xdr_args, args_ptr)); 544 } 545 546 static bool_t 547 svc_cots_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 548 caddr_t args_ptr) 549 { 550 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 551 mblk_t *mp; 552 bool_t retval; 553 554 /* 555 * It is important to call the XDR routine before 556 * freeing the request mblk. Structures in the 557 * XDR data may point into the mblk and require that 558 * the memory be intact during the free routine. 559 */ 560 if (args_ptr) { 561 /* LINTED pointer alignment */ 562 XDR *xdrs = &clone_xprt->xp_xdrin; 563 564 xdrs->x_op = XDR_FREE; 565 retval = (*xdr_args)(xdrs, args_ptr); 566 } else 567 retval = TRUE; 568 569 if ((mp = cd->cd_req_mp) != NULL) { 570 cd->cd_req_mp = (mblk_t *)0; 571 freemsg(mp); 572 } 573 574 return (retval); 575 } 576 577 static int32_t * 578 svc_cots_kgetres(SVCXPRT *clone_xprt, int size) 579 { 580 /* LINTED pointer alignment */ 581 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 582 XDR *xdrs = &clone_xprt->xp_xdrout; 583 mblk_t *mp; 584 int32_t *buf; 585 struct rpc_msg rply; 586 int len; 587 int mpsize; 588 589 /* 590 * Leave space for protocol headers. 591 */ 592 len = MSG_OFFSET + clone_xprt->xp_msg_size; 593 594 /* 595 * Allocate an initial mblk for the response data. 596 */ 597 while ((mp = allocb(len, BPRI_LO)) == NULL) { 598 if (strwaitbuf(len, BPRI_LO)) 599 return (FALSE); 600 } 601 602 /* 603 * Initialize the XDR decode stream. Additional mblks 604 * will be allocated if necessary. They will be TIDU 605 * sized. 606 */ 607 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 608 mpsize = MBLKSIZE(mp); 609 ASSERT(mpsize >= len); 610 ASSERT(mp->b_rptr == mp->b_datap->db_base); 611 612 /* 613 * If the size of mblk is not appreciably larger than what we 614 * asked, then resize the mblk to exactly len bytes. Reason for 615 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 616 * (from TCP over ethernet), and the arguments to RPC require 617 * 2800 bytes. Ideally we want the protocol to render two 618 * ~1400 byte segments over the wire. If allocb() gives us a 2k 619 * mblk, and we allocate a second mblk for the rest, the 620 * protocol module may generate 3 segments over the wire: 621 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 622 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 623 * the XDR encoding will generate two ~1400 byte mblks, and the 624 * protocol module is more likely to produce properly sized 625 * segments. 626 */ 627 if ((mpsize >> 1) <= len) { 628 mp->b_rptr += (mpsize - len); 629 } 630 631 /* 632 * Adjust b_rptr to reserve space for the non-data protocol 633 * headers that any downstream modules might like to add, and 634 * for the record marking header. 635 */ 636 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 637 638 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 639 ASSERT(mp->b_wptr == mp->b_rptr); 640 641 /* 642 * Assume a successful RPC since most of them are. 643 */ 644 rply.rm_xid = clone_xprt->xp_xid; 645 rply.rm_direction = REPLY; 646 rply.rm_reply.rp_stat = MSG_ACCEPTED; 647 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 648 rply.acpted_rply.ar_stat = SUCCESS; 649 650 if (!xdr_replymsg_hdr(xdrs, &rply)) { 651 freeb(mp); 652 return (NULL); 653 } 654 655 656 buf = XDR_INLINE(xdrs, size); 657 if (buf == NULL) { 658 ASSERT(cd->cd_mp == NULL); 659 freemsg(mp); 660 } else { 661 cd->cd_mp = mp; 662 } 663 return (buf); 664 } 665 666 static void 667 svc_cots_kfreeres(SVCXPRT *clone_xprt) 668 { 669 cots_data_t *cd; 670 mblk_t *mp; 671 672 cd = (cots_data_t *)clone_xprt->xp_p2buf; 673 if ((mp = cd->cd_mp) != NULL) { 674 cd->cd_mp = (mblk_t *)NULL; 675 freemsg(mp); 676 } 677 } 678 679 /* 680 * the dup cacheing routines below provide a cache of non-failure 681 * transaction id's. rpc service routines can use this to detect 682 * retransmissions and re-send a non-failure response. 683 */ 684 685 /* 686 * MAXDUPREQS is the number of cached items. It should be adjusted 687 * to the service load so that there is likely to be a response entry 688 * when the first retransmission comes in. 689 */ 690 #define MAXDUPREQS 1024 691 692 /* 693 * This should be appropriately scaled to MAXDUPREQS. 694 */ 695 #define DRHASHSZ 257 696 697 #if ((DRHASHSZ & (DRHASHSZ - 1)) == 0) 698 #define XIDHASH(xid) ((xid) & (DRHASHSZ - 1)) 699 #else 700 #define XIDHASH(xid) ((xid) % DRHASHSZ) 701 #endif 702 #define DRHASH(dr) XIDHASH((dr)->dr_xid) 703 #define REQTOXID(req) ((req)->rq_xprt->xp_xid) 704 705 static int cotsndupreqs = 0; 706 int cotsmaxdupreqs = MAXDUPREQS; 707 static kmutex_t cotsdupreq_lock; 708 static struct dupreq *cotsdrhashtbl[DRHASHSZ]; 709 static int cotsdrhashstat[DRHASHSZ]; 710 711 static void unhash(struct dupreq *); 712 713 /* 714 * cotsdrmru points to the head of a circular linked list in lru order. 715 * cotsdrmru->dr_next == drlru 716 */ 717 struct dupreq *cotsdrmru; 718 719 /* 720 * PSARC 2003/523 Contract Private Interface 721 * svc_cots_kdup 722 * Changes must be reviewed by Solaris File Sharing 723 * Changes must be communicated to contract-2003-523@sun.com 724 * 725 * svc_cots_kdup searches the request cache and returns 0 if the 726 * request is not found in the cache. If it is found, then it 727 * returns the state of the request (in progress or done) and 728 * the status or attributes that were part of the original reply. 729 * 730 * If DUP_DONE (there is a duplicate) svc_cots_kdup copies over the 731 * value of the response. In that case, also return in *dupcachedp 732 * whether the response free routine is cached in the dupreq - in which case 733 * the caller should not be freeing it, because it will be done later 734 * in the svc_cots_kdup code when the dupreq is reused. 735 */ 736 static int 737 svc_cots_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 738 bool_t *dupcachedp) 739 { 740 struct rpc_cots_server *stats = CLONE2STATS(req->rq_xprt); 741 struct dupreq *dr; 742 uint32_t xid; 743 uint32_t drhash; 744 int status; 745 746 xid = REQTOXID(req); 747 mutex_enter(&cotsdupreq_lock); 748 RSSTAT_INCR(stats, rsdupchecks); 749 /* 750 * Check to see whether an entry already exists in the cache. 751 */ 752 dr = cotsdrhashtbl[XIDHASH(xid)]; 753 while (dr != NULL) { 754 if (dr->dr_xid == xid && 755 dr->dr_proc == req->rq_proc && 756 dr->dr_prog == req->rq_prog && 757 dr->dr_vers == req->rq_vers && 758 dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 759 bcmp((caddr_t)dr->dr_addr.buf, 760 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 761 dr->dr_addr.len) == 0) { 762 status = dr->dr_status; 763 if (status == DUP_DONE) { 764 bcopy(dr->dr_resp.buf, res, size); 765 if (dupcachedp != NULL) 766 *dupcachedp = (dr->dr_resfree != NULL); 767 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KDUP_DONE, 768 "svc_cots_kdup: DUP_DONE"); 769 } else { 770 dr->dr_status = DUP_INPROGRESS; 771 *drpp = dr; 772 TRACE_0(TR_FAC_KRPC, 773 TR_SVC_COTS_KDUP_INPROGRESS, 774 "svc_cots_kdup: DUP_INPROGRESS"); 775 } 776 RSSTAT_INCR(stats, rsdupreqs); 777 mutex_exit(&cotsdupreq_lock); 778 return (status); 779 } 780 dr = dr->dr_chain; 781 } 782 783 /* 784 * There wasn't an entry, either allocate a new one or recycle 785 * an old one. 786 */ 787 if (cotsndupreqs < cotsmaxdupreqs) { 788 dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 789 if (dr == NULL) { 790 mutex_exit(&cotsdupreq_lock); 791 return (DUP_ERROR); 792 } 793 dr->dr_resp.buf = NULL; 794 dr->dr_resp.maxlen = 0; 795 dr->dr_addr.buf = NULL; 796 dr->dr_addr.maxlen = 0; 797 if (cotsdrmru) { 798 dr->dr_next = cotsdrmru->dr_next; 799 cotsdrmru->dr_next = dr; 800 } else { 801 dr->dr_next = dr; 802 } 803 cotsndupreqs++; 804 } else { 805 dr = cotsdrmru->dr_next; 806 while (dr->dr_status == DUP_INPROGRESS) { 807 dr = dr->dr_next; 808 if (dr == cotsdrmru->dr_next) { 809 cmn_err(CE_WARN, "svc_cots_kdup no slots free"); 810 mutex_exit(&cotsdupreq_lock); 811 return (DUP_ERROR); 812 } 813 } 814 unhash(dr); 815 if (dr->dr_resfree) { 816 (*dr->dr_resfree)(dr->dr_resp.buf); 817 } 818 } 819 dr->dr_resfree = NULL; 820 cotsdrmru = dr; 821 822 dr->dr_xid = REQTOXID(req); 823 dr->dr_prog = req->rq_prog; 824 dr->dr_vers = req->rq_vers; 825 dr->dr_proc = req->rq_proc; 826 if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 827 if (dr->dr_addr.buf != NULL) 828 kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 829 dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 830 dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 831 if (dr->dr_addr.buf == NULL) { 832 dr->dr_addr.maxlen = 0; 833 dr->dr_status = DUP_DROP; 834 mutex_exit(&cotsdupreq_lock); 835 return (DUP_ERROR); 836 } 837 } 838 dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 839 bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 840 if (dr->dr_resp.maxlen < size) { 841 if (dr->dr_resp.buf != NULL) 842 kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 843 dr->dr_resp.maxlen = (unsigned int)size; 844 dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 845 if (dr->dr_resp.buf == NULL) { 846 dr->dr_resp.maxlen = 0; 847 dr->dr_status = DUP_DROP; 848 mutex_exit(&cotsdupreq_lock); 849 return (DUP_ERROR); 850 } 851 } 852 dr->dr_status = DUP_INPROGRESS; 853 854 drhash = (uint32_t)DRHASH(dr); 855 dr->dr_chain = cotsdrhashtbl[drhash]; 856 cotsdrhashtbl[drhash] = dr; 857 cotsdrhashstat[drhash]++; 858 mutex_exit(&cotsdupreq_lock); 859 *drpp = dr; 860 return (DUP_NEW); 861 } 862 863 /* 864 * PSARC 2003/523 Contract Private Interface 865 * svc_cots_kdupdone 866 * Changes must be reviewed by Solaris File Sharing 867 * Changes must be communicated to contract-2003-523@sun.com 868 * 869 * svc_cots_kdupdone marks the request done (DUP_DONE or DUP_DROP) 870 * and stores the response. 871 */ 872 static void 873 svc_cots_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 874 int size, int status) 875 { 876 ASSERT(dr->dr_resfree == NULL); 877 if (status == DUP_DONE) { 878 bcopy(res, dr->dr_resp.buf, size); 879 dr->dr_resfree = dis_resfree; 880 } 881 dr->dr_status = status; 882 } 883 884 /* 885 * This routine expects that the mutex, cotsdupreq_lock, is already held. 886 */ 887 static void 888 unhash(struct dupreq *dr) 889 { 890 struct dupreq *drt; 891 struct dupreq *drtprev = NULL; 892 uint32_t drhash; 893 894 ASSERT(MUTEX_HELD(&cotsdupreq_lock)); 895 896 drhash = (uint32_t)DRHASH(dr); 897 drt = cotsdrhashtbl[drhash]; 898 while (drt != NULL) { 899 if (drt == dr) { 900 cotsdrhashstat[drhash]--; 901 if (drtprev == NULL) { 902 cotsdrhashtbl[drhash] = drt->dr_chain; 903 } else { 904 drtprev->dr_chain = drt->dr_chain; 905 } 906 return; 907 } 908 drtprev = drt; 909 drt = drt->dr_chain; 910 } 911 } 912 913 void 914 svc_cots_stats_init(zoneid_t zoneid, struct rpc_cots_server **statsp) 915 { 916 *statsp = (struct rpc_cots_server *)rpcstat_zone_init_common(zoneid, 917 "unix", "rpc_cots_server", (const kstat_named_t *)&cots_rsstat_tmpl, 918 sizeof (cots_rsstat_tmpl)); 919 } 920 921 void 922 svc_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_server **statsp) 923 { 924 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_server"); 925 kmem_free(*statsp, sizeof (cots_rsstat_tmpl)); 926 } 927 928 void 929 svc_cots_init(void) 930 { 931 /* 932 * Check to make sure that the cots private data will fit into 933 * the stack buffer allocated by svc_run. The ASSERT is a safety 934 * net if the cots_data_t structure ever changes. 935 */ 936 /*CONSTANTCONDITION*/ 937 ASSERT(sizeof (cots_data_t) <= SVC_P2LEN); 938 939 mutex_init(&cots_kcreate_lock, NULL, MUTEX_DEFAULT, NULL); 940 mutex_init(&cotsdupreq_lock, NULL, MUTEX_DEFAULT, NULL); 941 } 942