1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 /* 37 * svc_cots.c 38 * Server side for connection-oriented RPC in the kernel. 39 * 40 */ 41 42 #include <sys/param.h> 43 #include <sys/types.h> 44 #include <sys/sysmacros.h> 45 #include <sys/file.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/stropts.h> 50 #include <sys/tiuser.h> 51 #include <sys/timod.h> 52 #include <sys/tihdr.h> 53 #include <sys/fcntl.h> 54 #include <sys/errno.h> 55 #include <sys/kmem.h> 56 #include <sys/systm.h> 57 #include <sys/debug.h> 58 #include <sys/cmn_err.h> 59 #include <sys/kstat.h> 60 #include <sys/vtrace.h> 61 62 #include <rpc/types.h> 63 #include <rpc/xdr.h> 64 #include <rpc/auth.h> 65 #include <rpc/rpc_msg.h> 66 #include <rpc/svc.h> 67 #include <inet/ip.h> 68 69 #define COTS_MAX_ALLOCSIZE 2048 70 #define MSG_OFFSET 128 /* offset of call into the mblk */ 71 #define RM_HDR_SIZE 4 /* record mark header size */ 72 73 /* 74 * Routines exported through ops vector. 75 */ 76 static bool_t svc_cots_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 77 static bool_t svc_cots_ksend(SVCXPRT *, struct rpc_msg *); 78 static bool_t svc_cots_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 79 static bool_t svc_cots_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 80 static void svc_cots_kdestroy(SVCMASTERXPRT *); 81 static int svc_cots_kdup(struct svc_req *, caddr_t, int, 82 struct dupreq **, bool_t *); 83 static void svc_cots_kdupdone(struct dupreq *, caddr_t, 84 void (*)(), int, int); 85 static int32_t *svc_cots_kgetres(SVCXPRT *, int); 86 static void svc_cots_kfreeres(SVCXPRT *); 87 static void svc_cots_kclone_destroy(SVCXPRT *); 88 static void svc_cots_kstart(SVCMASTERXPRT *); 89 90 /* 91 * Server transport operations vector. 92 */ 93 struct svc_ops svc_cots_op = { 94 svc_cots_krecv, /* Get requests */ 95 svc_cots_kgetargs, /* Deserialize arguments */ 96 svc_cots_ksend, /* Send reply */ 97 svc_cots_kfreeargs, /* Free argument data space */ 98 svc_cots_kdestroy, /* Destroy transport handle */ 99 svc_cots_kdup, /* Check entry in dup req cache */ 100 svc_cots_kdupdone, /* Mark entry in dup req cache as done */ 101 svc_cots_kgetres, /* Get pointer to response buffer */ 102 svc_cots_kfreeres, /* Destroy pre-serialized response header */ 103 svc_cots_kclone_destroy, /* Destroy a clone xprt */ 104 svc_cots_kstart /* Tell `ready-to-receive' to rpcmod */ 105 }; 106 107 /* 108 * Master transport private data. 109 * Kept in xprt->xp_p2. 110 */ 111 struct cots_master_data { 112 char *cmd_src_addr; /* client's address */ 113 int cmd_xprt_started; /* flag for clone routine to call */ 114 /* rpcmod's start routine. */ 115 struct rpc_cots_server *cmd_stats; /* stats for zone */ 116 }; 117 118 /* 119 * Transport private data. 120 * Kept in clone_xprt->xp_p2buf. 121 */ 122 typedef struct cots_data { 123 mblk_t *cd_mp; /* pre-allocated reply message */ 124 mblk_t *cd_req_mp; /* request message */ 125 } cots_data_t; 126 127 /* 128 * Server statistics 129 * NOTE: This structure type is duplicated in the NFS fast path. 130 */ 131 static const struct rpc_cots_server { 132 kstat_named_t rscalls; 133 kstat_named_t rsbadcalls; 134 kstat_named_t rsnullrecv; 135 kstat_named_t rsbadlen; 136 kstat_named_t rsxdrcall; 137 kstat_named_t rsdupchecks; 138 kstat_named_t rsdupreqs; 139 } cots_rsstat_tmpl = { 140 { "calls", KSTAT_DATA_UINT64 }, 141 { "badcalls", KSTAT_DATA_UINT64 }, 142 { "nullrecv", KSTAT_DATA_UINT64 }, 143 { "badlen", KSTAT_DATA_UINT64 }, 144 { "xdrcall", KSTAT_DATA_UINT64 }, 145 { "dupchecks", KSTAT_DATA_UINT64 }, 146 { "dupreqs", KSTAT_DATA_UINT64 } 147 }; 148 149 #define CLONE2STATS(clone_xprt) \ 150 ((struct cots_master_data *)(clone_xprt)->xp_master->xp_p2)->cmd_stats 151 #define RSSTAT_INCR(s, x) \ 152 atomic_add_64(&(s)->x.value.ui64, 1) 153 154 /* 155 * Pointer to a transport specific `ready to receive' function in rpcmod 156 * (set from rpcmod). 157 */ 158 void (*mir_start)(queue_t *); 159 uint_t *svc_max_msg_sizep; 160 161 /* 162 * the address size of the underlying transport can sometimes be 163 * unknown (tinfo->ADDR_size == -1). For this case, it is 164 * necessary to figure out what the size is so the correct amount 165 * of data is allocated. This is an itterative process: 166 * 1. take a good guess (use T_MINADDRSIZE) 167 * 2. try it. 168 * 3. if it works then everything is ok 169 * 4. if the error is ENAMETOLONG, double the guess 170 * 5. go back to step 2. 171 */ 172 #define T_UNKNOWNADDRSIZE (-1) 173 #define T_MINADDRSIZE 32 174 175 /* 176 * Create a transport record. 177 * The transport record, output buffer, and private data structure 178 * are allocated. The output buffer is serialized into using xdrmem. 179 * There is one transport record per user process which implements a 180 * set of services. 181 */ 182 static kmutex_t cots_kcreate_lock; 183 184 int 185 svc_cots_kcreate(file_t *fp, uint_t max_msgsize, struct T_info_ack *tinfo, 186 SVCMASTERXPRT **nxprt) 187 { 188 struct cots_master_data *cmd; 189 int err, retval; 190 SVCMASTERXPRT *xprt; 191 struct rpcstat *rpcstat; 192 struct T_addr_ack *ack_p; 193 struct strioctl getaddr; 194 195 if (nxprt == NULL) 196 return (EINVAL); 197 198 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 199 ASSERT(rpcstat != NULL); 200 201 xprt = kmem_zalloc(sizeof (SVCMASTERXPRT), KM_SLEEP); 202 203 cmd = kmem_zalloc(sizeof (*cmd) + sizeof (*ack_p) 204 + (2 * sizeof (sin6_t)), KM_SLEEP); 205 206 ack_p = (struct T_addr_ack *)&cmd[1]; 207 208 if ((tinfo->TIDU_size > COTS_MAX_ALLOCSIZE) || 209 (tinfo->TIDU_size <= 0)) 210 xprt->xp_msg_size = COTS_MAX_ALLOCSIZE; 211 else { 212 xprt->xp_msg_size = tinfo->TIDU_size - 213 (tinfo->TIDU_size % BYTES_PER_XDR_UNIT); 214 } 215 216 xprt->xp_ops = &svc_cots_op; 217 xprt->xp_p2 = (caddr_t)cmd; 218 cmd->cmd_xprt_started = 0; 219 cmd->cmd_stats = rpcstat->rpc_cots_server; 220 221 getaddr.ic_cmd = TI_GETINFO; 222 getaddr.ic_timout = -1; 223 getaddr.ic_len = sizeof (*ack_p) + (2 * sizeof (sin6_t)); 224 getaddr.ic_dp = (char *)ack_p; 225 ack_p->PRIM_type = T_ADDR_REQ; 226 227 err = strioctl(fp->f_vnode, I_STR, (intptr_t)&getaddr, 228 0, K_TO_K, CRED(), &retval); 229 if (err) { 230 kmem_free(cmd, sizeof (*cmd) + sizeof (*ack_p) + 231 (2 * sizeof (sin6_t))); 232 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 233 return (err); 234 } 235 236 xprt->xp_rtaddr.maxlen = ack_p->REMADDR_length; 237 xprt->xp_rtaddr.len = ack_p->REMADDR_length; 238 cmd->cmd_src_addr = xprt->xp_rtaddr.buf = 239 (char *)ack_p + ack_p->REMADDR_offset; 240 241 xprt->xp_lcladdr.maxlen = ack_p->LOCADDR_length; 242 xprt->xp_lcladdr.len = ack_p->LOCADDR_length; 243 xprt->xp_lcladdr.buf = (char *)ack_p + ack_p->LOCADDR_offset; 244 245 /* 246 * If the current sanity check size in rpcmod is smaller 247 * than the size needed for this xprt, then increase 248 * the sanity check. 249 */ 250 if (max_msgsize != 0 && svc_max_msg_sizep && 251 max_msgsize > *svc_max_msg_sizep) { 252 253 /* This check needs a lock */ 254 mutex_enter(&cots_kcreate_lock); 255 if (svc_max_msg_sizep && max_msgsize > *svc_max_msg_sizep) 256 *svc_max_msg_sizep = max_msgsize; 257 mutex_exit(&cots_kcreate_lock); 258 } 259 260 *nxprt = xprt; 261 262 return (0); 263 } 264 265 /* 266 * Destroy a master transport record. 267 * Frees the space allocated for a transport record. 268 */ 269 static void 270 svc_cots_kdestroy(SVCMASTERXPRT *xprt) 271 { 272 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 273 274 ASSERT(cmd); 275 276 if (xprt->xp_netid) 277 kmem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); 278 if (xprt->xp_addrmask.maxlen) 279 kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen); 280 281 mutex_destroy(&xprt->xp_req_lock); 282 mutex_destroy(&xprt->xp_thread_lock); 283 284 kmem_free(cmd, sizeof (*cmd) + sizeof (struct T_addr_ack) + 285 (2 * sizeof (sin6_t))); 286 287 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 288 } 289 290 /* 291 * svc_tli_kcreate() calls this function at the end to tell 292 * rpcmod that the transport is ready to receive requests. 293 */ 294 static void 295 svc_cots_kstart(SVCMASTERXPRT *xprt) 296 { 297 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 298 299 if (cmd->cmd_xprt_started == 0) { 300 /* 301 * Acquire the xp_req_lock in order to use xp_wq 302 * safely (we don't want to qenable a queue that has 303 * already been closed). 304 */ 305 mutex_enter(&xprt->xp_req_lock); 306 if (cmd->cmd_xprt_started == 0 && 307 xprt->xp_wq != NULL) { 308 (*mir_start)(xprt->xp_wq); 309 cmd->cmd_xprt_started = 1; 310 } 311 mutex_exit(&xprt->xp_req_lock); 312 } 313 } 314 315 /* 316 * Transport-type specific part of svc_xprt_cleanup(). 317 */ 318 static void 319 svc_cots_kclone_destroy(SVCXPRT *clone_xprt) 320 { 321 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 322 323 if (cd->cd_req_mp) { 324 freemsg(cd->cd_req_mp); 325 cd->cd_req_mp = (mblk_t *)0; 326 } 327 ASSERT(cd->cd_mp == NULL); 328 } 329 330 /* 331 * Receive rpc requests. 332 * Checks if the message is intact, and deserializes the call packet. 333 */ 334 static bool_t 335 svc_cots_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 336 { 337 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 338 XDR *xdrs = &clone_xprt->xp_xdrin; 339 struct rpc_cots_server *stats = CLONE2STATS(clone_xprt); 340 341 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KRECV_START, 342 "svc_cots_krecv_start:"); 343 RPCLOG(4, "svc_cots_krecv_start clone_xprt = %p:\n", 344 (void *)clone_xprt); 345 346 RSSTAT_INCR(stats, rscalls); 347 348 if (mp->b_datap->db_type != M_DATA) { 349 RPCLOG(16, "svc_cots_krecv bad db_type %d\n", 350 mp->b_datap->db_type); 351 goto bad; 352 } 353 354 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 355 356 TRACE_0(TR_FAC_KRPC, TR_XDR_CALLMSG_START, 357 "xdr_callmsg_start:"); 358 RPCLOG0(4, "xdr_callmsg_start:\n"); 359 if (!xdr_callmsg(xdrs, msg)) { 360 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 361 "xdr_callmsg_end:(%S)", "bad"); 362 RPCLOG0(1, "svc_cots_krecv xdr_callmsg failure\n"); 363 RSSTAT_INCR(stats, rsxdrcall); 364 goto bad; 365 } 366 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 367 "xdr_callmsg_end:(%S)", "good"); 368 369 clone_xprt->xp_xid = msg->rm_xid; 370 cd->cd_req_mp = mp; 371 372 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 373 "svc_cots_krecv_end:(%S)", "good"); 374 RPCLOG0(4, "svc_cots_krecv_end:good\n"); 375 return (TRUE); 376 377 bad: 378 if (mp) 379 freemsg(mp); 380 381 RSSTAT_INCR(stats, rsbadcalls); 382 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 383 "svc_cots_krecv_end:(%S)", "bad"); 384 return (FALSE); 385 } 386 387 /* 388 * Send rpc reply. 389 */ 390 static bool_t 391 svc_cots_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg) 392 { 393 /* LINTED pointer alignment */ 394 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 395 XDR *xdrs = &(clone_xprt->xp_xdrout); 396 int retval = FALSE; 397 mblk_t *mp; 398 xdrproc_t xdr_results; 399 caddr_t xdr_location; 400 bool_t has_args; 401 402 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KSEND_START, 403 "svc_cots_ksend_start:"); 404 405 /* 406 * If there is a result procedure specified in the reply message, 407 * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 408 * We need to make sure it won't be processed twice, so we null 409 * it for xdr_replymsg here. 410 */ 411 has_args = FALSE; 412 if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 413 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 414 if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 415 has_args = TRUE; 416 xdr_location = msg->acpted_rply.ar_results.where; 417 msg->acpted_rply.ar_results.proc = xdr_void; 418 msg->acpted_rply.ar_results.where = NULL; 419 } 420 } 421 422 mp = cd->cd_mp; 423 if (mp) { 424 /* 425 * The program above pre-allocated an mblk and put 426 * the data in place. 427 */ 428 cd->cd_mp = (mblk_t *)NULL; 429 if (!(xdr_replymsg_body(xdrs, msg) && 430 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 431 xdr_results, xdr_location)))) { 432 RPCLOG0(1, "svc_cots_ksend: " 433 "xdr_replymsg_body/SVCAUTH_WRAP failed\n"); 434 freemsg(mp); 435 goto out; 436 } 437 } else { 438 int len; 439 int mpsize; 440 441 /* 442 * Leave space for protocol headers. 443 */ 444 len = MSG_OFFSET + clone_xprt->xp_msg_size; 445 446 /* 447 * Allocate an initial mblk for the response data. 448 */ 449 while (!(mp = allocb(len, BPRI_LO))) { 450 RPCLOG0(16, "svc_cots_ksend: allocb failed failed\n"); 451 if (strwaitbuf(len, BPRI_LO)) { 452 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 453 "svc_cots_ksend_end:(%S)", "strwaitbuf"); 454 RPCLOG0(1, 455 "svc_cots_ksend: strwaitbuf failed\n"); 456 goto out; 457 } 458 } 459 460 /* 461 * Initialize the XDR decode stream. Additional mblks 462 * will be allocated if necessary. They will be TIDU 463 * sized. 464 */ 465 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 466 mpsize = MBLKSIZE(mp); 467 ASSERT(mpsize >= len); 468 ASSERT(mp->b_rptr == mp->b_datap->db_base); 469 470 /* 471 * If the size of mblk is not appreciably larger than what we 472 * asked, then resize the mblk to exactly len bytes. Reason for 473 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 474 * (from TCP over ethernet), and the arguments to RPC require 475 * 2800 bytes. Ideally we want the protocol to render two 476 * ~1400 byte segments over the wire. If allocb() gives us a 2k 477 * mblk, and we allocate a second mblk for the rest, the 478 * protocol module may generate 3 segments over the wire: 479 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 480 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 481 * the XDR encoding will generate two ~1400 byte mblks, and the 482 * protocol module is more likely to produce properly sized 483 * segments. 484 */ 485 if ((mpsize >> 1) <= len) { 486 mp->b_rptr += (mpsize - len); 487 } 488 489 /* 490 * Adjust b_rptr to reserve space for the non-data protocol 491 * headers that any downstream modules might like to add, and 492 * for the record marking header. 493 */ 494 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 495 496 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 497 ASSERT(mp->b_wptr == mp->b_rptr); 498 499 msg->rm_xid = clone_xprt->xp_xid; 500 501 TRACE_0(TR_FAC_KRPC, TR_XDR_REPLYMSG_START, 502 "xdr_replymsg_start:"); 503 if (!(xdr_replymsg(xdrs, msg) && 504 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 505 xdr_results, xdr_location)))) { 506 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 507 "xdr_replymsg_end:(%S)", "bad"); 508 freemsg(mp); 509 RPCLOG0(1, "svc_cots_ksend: xdr_replymsg/SVCAUTH_WRAP " 510 "failed\n"); 511 goto out; 512 } 513 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 514 "xdr_replymsg_end:(%S)", "good"); 515 } 516 517 put(clone_xprt->xp_wq, mp); 518 retval = TRUE; 519 520 out: 521 /* 522 * This is completely disgusting. If public is set it is 523 * a pointer to a structure whose first field is the address 524 * of the function to free that structure and any related 525 * stuff. (see rrokfree in nfs_xdr.c). 526 */ 527 if (xdrs->x_public) { 528 /* LINTED pointer alignment */ 529 (**((int (**)())xdrs->x_public))(xdrs->x_public); 530 } 531 532 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 533 "svc_cots_ksend_end:(%S)", "done"); 534 return (retval); 535 } 536 537 /* 538 * Deserialize arguments. 539 */ 540 static bool_t 541 svc_cots_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 542 caddr_t args_ptr) 543 { 544 return (SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 545 xdr_args, args_ptr)); 546 } 547 548 static bool_t 549 svc_cots_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 550 caddr_t args_ptr) 551 { 552 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 553 mblk_t *mp; 554 bool_t retval; 555 556 /* 557 * It is important to call the XDR routine before 558 * freeing the request mblk. Structures in the 559 * XDR data may point into the mblk and require that 560 * the memory be intact during the free routine. 561 */ 562 if (args_ptr) { 563 /* LINTED pointer alignment */ 564 XDR *xdrs = &clone_xprt->xp_xdrin; 565 566 xdrs->x_op = XDR_FREE; 567 retval = (*xdr_args)(xdrs, args_ptr); 568 } else 569 retval = TRUE; 570 571 if ((mp = cd->cd_req_mp) != NULL) { 572 cd->cd_req_mp = (mblk_t *)0; 573 freemsg(mp); 574 } 575 576 return (retval); 577 } 578 579 static int32_t * 580 svc_cots_kgetres(SVCXPRT *clone_xprt, int size) 581 { 582 /* LINTED pointer alignment */ 583 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 584 XDR *xdrs = &clone_xprt->xp_xdrout; 585 mblk_t *mp; 586 int32_t *buf; 587 struct rpc_msg rply; 588 int len; 589 int mpsize; 590 591 /* 592 * Leave space for protocol headers. 593 */ 594 len = MSG_OFFSET + clone_xprt->xp_msg_size; 595 596 /* 597 * Allocate an initial mblk for the response data. 598 */ 599 while ((mp = allocb(len, BPRI_LO)) == NULL) { 600 if (strwaitbuf(len, BPRI_LO)) 601 return (FALSE); 602 } 603 604 /* 605 * Initialize the XDR decode stream. Additional mblks 606 * will be allocated if necessary. They will be TIDU 607 * sized. 608 */ 609 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 610 mpsize = MBLKSIZE(mp); 611 ASSERT(mpsize >= len); 612 ASSERT(mp->b_rptr == mp->b_datap->db_base); 613 614 /* 615 * If the size of mblk is not appreciably larger than what we 616 * asked, then resize the mblk to exactly len bytes. Reason for 617 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 618 * (from TCP over ethernet), and the arguments to RPC require 619 * 2800 bytes. Ideally we want the protocol to render two 620 * ~1400 byte segments over the wire. If allocb() gives us a 2k 621 * mblk, and we allocate a second mblk for the rest, the 622 * protocol module may generate 3 segments over the wire: 623 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 624 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 625 * the XDR encoding will generate two ~1400 byte mblks, and the 626 * protocol module is more likely to produce properly sized 627 * segments. 628 */ 629 if ((mpsize >> 1) <= len) { 630 mp->b_rptr += (mpsize - len); 631 } 632 633 /* 634 * Adjust b_rptr to reserve space for the non-data protocol 635 * headers that any downstream modules might like to add, and 636 * for the record marking header. 637 */ 638 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 639 640 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 641 ASSERT(mp->b_wptr == mp->b_rptr); 642 643 /* 644 * Assume a successful RPC since most of them are. 645 */ 646 rply.rm_xid = clone_xprt->xp_xid; 647 rply.rm_direction = REPLY; 648 rply.rm_reply.rp_stat = MSG_ACCEPTED; 649 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 650 rply.acpted_rply.ar_stat = SUCCESS; 651 652 if (!xdr_replymsg_hdr(xdrs, &rply)) { 653 freeb(mp); 654 return (NULL); 655 } 656 657 658 buf = XDR_INLINE(xdrs, size); 659 if (buf == NULL) { 660 ASSERT(cd->cd_mp == NULL); 661 freemsg(mp); 662 } else { 663 cd->cd_mp = mp; 664 } 665 return (buf); 666 } 667 668 static void 669 svc_cots_kfreeres(SVCXPRT *clone_xprt) 670 { 671 cots_data_t *cd; 672 mblk_t *mp; 673 674 cd = (cots_data_t *)clone_xprt->xp_p2buf; 675 if ((mp = cd->cd_mp) != NULL) { 676 cd->cd_mp = (mblk_t *)NULL; 677 freemsg(mp); 678 } 679 } 680 681 /* 682 * the dup cacheing routines below provide a cache of non-failure 683 * transaction id's. rpc service routines can use this to detect 684 * retransmissions and re-send a non-failure response. 685 */ 686 687 /* 688 * MAXDUPREQS is the number of cached items. It should be adjusted 689 * to the service load so that there is likely to be a response entry 690 * when the first retransmission comes in. 691 */ 692 #define MAXDUPREQS 1024 693 694 /* 695 * This should be appropriately scaled to MAXDUPREQS. 696 */ 697 #define DRHASHSZ 257 698 699 #if ((DRHASHSZ & (DRHASHSZ - 1)) == 0) 700 #define XIDHASH(xid) ((xid) & (DRHASHSZ - 1)) 701 #else 702 #define XIDHASH(xid) ((xid) % DRHASHSZ) 703 #endif 704 #define DRHASH(dr) XIDHASH((dr)->dr_xid) 705 #define REQTOXID(req) ((req)->rq_xprt->xp_xid) 706 707 static int cotsndupreqs = 0; 708 static int cotsmaxdupreqs = MAXDUPREQS; 709 static kmutex_t cotsdupreq_lock; 710 static struct dupreq *cotsdrhashtbl[DRHASHSZ]; 711 static int cotsdrhashstat[DRHASHSZ]; 712 713 static void unhash(struct dupreq *); 714 715 /* 716 * cotsdrmru points to the head of a circular linked list in lru order. 717 * cotsdrmru->dr_next == drlru 718 */ 719 struct dupreq *cotsdrmru; 720 721 /* 722 * PSARC 2003/523 Contract Private Interface 723 * svc_cots_kdup 724 * Changes must be reviewed by Solaris File Sharing 725 * Changes must be communicated to contract-2003-523@sun.com 726 * 727 * svc_cots_kdup searches the request cache and returns 0 if the 728 * request is not found in the cache. If it is found, then it 729 * returns the state of the request (in progress or done) and 730 * the status or attributes that were part of the original reply. 731 * 732 * If DUP_DONE (there is a duplicate) svc_cots_kdup copies over the 733 * value of the response. In that case, also return in *dupcachedp 734 * whether the response free routine is cached in the dupreq - in which case 735 * the caller should not be freeing it, because it will be done later 736 * in the svc_cots_kdup code when the dupreq is reused. 737 */ 738 static int 739 svc_cots_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 740 bool_t *dupcachedp) 741 { 742 struct rpc_cots_server *stats = CLONE2STATS(req->rq_xprt); 743 struct dupreq *dr; 744 uint32_t xid; 745 uint32_t drhash; 746 int status; 747 748 xid = REQTOXID(req); 749 mutex_enter(&cotsdupreq_lock); 750 RSSTAT_INCR(stats, rsdupchecks); 751 /* 752 * Check to see whether an entry already exists in the cache. 753 */ 754 dr = cotsdrhashtbl[XIDHASH(xid)]; 755 while (dr != NULL) { 756 if (dr->dr_xid == xid && 757 dr->dr_proc == req->rq_proc && 758 dr->dr_prog == req->rq_prog && 759 dr->dr_vers == req->rq_vers && 760 dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 761 bcmp((caddr_t)dr->dr_addr.buf, 762 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 763 dr->dr_addr.len) == 0) { 764 status = dr->dr_status; 765 if (status == DUP_DONE) { 766 bcopy(dr->dr_resp.buf, res, size); 767 if (dupcachedp != NULL) 768 *dupcachedp = (dr->dr_resfree != NULL); 769 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KDUP_DONE, 770 "svc_cots_kdup: DUP_DONE"); 771 } else { 772 dr->dr_status = DUP_INPROGRESS; 773 *drpp = dr; 774 TRACE_0(TR_FAC_KRPC, 775 TR_SVC_COTS_KDUP_INPROGRESS, 776 "svc_cots_kdup: DUP_INPROGRESS"); 777 } 778 RSSTAT_INCR(stats, rsdupreqs); 779 mutex_exit(&cotsdupreq_lock); 780 return (status); 781 } 782 dr = dr->dr_chain; 783 } 784 785 /* 786 * There wasn't an entry, either allocate a new one or recycle 787 * an old one. 788 */ 789 if (cotsndupreqs < cotsmaxdupreqs) { 790 dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 791 if (dr == NULL) { 792 mutex_exit(&cotsdupreq_lock); 793 return (DUP_ERROR); 794 } 795 dr->dr_resp.buf = NULL; 796 dr->dr_resp.maxlen = 0; 797 dr->dr_addr.buf = NULL; 798 dr->dr_addr.maxlen = 0; 799 if (cotsdrmru) { 800 dr->dr_next = cotsdrmru->dr_next; 801 cotsdrmru->dr_next = dr; 802 } else { 803 dr->dr_next = dr; 804 } 805 cotsndupreqs++; 806 } else { 807 dr = cotsdrmru->dr_next; 808 while (dr->dr_status == DUP_INPROGRESS) { 809 dr = dr->dr_next; 810 if (dr == cotsdrmru->dr_next) { 811 cmn_err(CE_WARN, "svc_cots_kdup no slots free"); 812 mutex_exit(&cotsdupreq_lock); 813 return (DUP_ERROR); 814 } 815 } 816 unhash(dr); 817 if (dr->dr_resfree) { 818 (*dr->dr_resfree)(dr->dr_resp.buf); 819 } 820 } 821 dr->dr_resfree = NULL; 822 cotsdrmru = dr; 823 824 dr->dr_xid = REQTOXID(req); 825 dr->dr_prog = req->rq_prog; 826 dr->dr_vers = req->rq_vers; 827 dr->dr_proc = req->rq_proc; 828 if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 829 if (dr->dr_addr.buf != NULL) 830 kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 831 dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 832 dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 833 if (dr->dr_addr.buf == NULL) { 834 dr->dr_addr.maxlen = 0; 835 dr->dr_status = DUP_DROP; 836 mutex_exit(&cotsdupreq_lock); 837 return (DUP_ERROR); 838 } 839 } 840 dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 841 bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 842 if (dr->dr_resp.maxlen < size) { 843 if (dr->dr_resp.buf != NULL) 844 kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 845 dr->dr_resp.maxlen = (unsigned int)size; 846 dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 847 if (dr->dr_resp.buf == NULL) { 848 dr->dr_resp.maxlen = 0; 849 dr->dr_status = DUP_DROP; 850 mutex_exit(&cotsdupreq_lock); 851 return (DUP_ERROR); 852 } 853 } 854 dr->dr_status = DUP_INPROGRESS; 855 856 drhash = (uint32_t)DRHASH(dr); 857 dr->dr_chain = cotsdrhashtbl[drhash]; 858 cotsdrhashtbl[drhash] = dr; 859 cotsdrhashstat[drhash]++; 860 mutex_exit(&cotsdupreq_lock); 861 *drpp = dr; 862 return (DUP_NEW); 863 } 864 865 /* 866 * PSARC 2003/523 Contract Private Interface 867 * svc_cots_kdupdone 868 * Changes must be reviewed by Solaris File Sharing 869 * Changes must be communicated to contract-2003-523@sun.com 870 * 871 * svc_cots_kdupdone marks the request done (DUP_DONE or DUP_DROP) 872 * and stores the response. 873 */ 874 static void 875 svc_cots_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 876 int size, int status) 877 { 878 ASSERT(dr->dr_resfree == NULL); 879 if (status == DUP_DONE) { 880 bcopy(res, dr->dr_resp.buf, size); 881 dr->dr_resfree = dis_resfree; 882 } 883 dr->dr_status = status; 884 } 885 886 /* 887 * This routine expects that the mutex, cotsdupreq_lock, is already held. 888 */ 889 static void 890 unhash(struct dupreq *dr) 891 { 892 struct dupreq *drt; 893 struct dupreq *drtprev = NULL; 894 uint32_t drhash; 895 896 ASSERT(MUTEX_HELD(&cotsdupreq_lock)); 897 898 drhash = (uint32_t)DRHASH(dr); 899 drt = cotsdrhashtbl[drhash]; 900 while (drt != NULL) { 901 if (drt == dr) { 902 cotsdrhashstat[drhash]--; 903 if (drtprev == NULL) { 904 cotsdrhashtbl[drhash] = drt->dr_chain; 905 } else { 906 drtprev->dr_chain = drt->dr_chain; 907 } 908 return; 909 } 910 drtprev = drt; 911 drt = drt->dr_chain; 912 } 913 } 914 915 void 916 svc_cots_stats_init(zoneid_t zoneid, struct rpc_cots_server **statsp) 917 { 918 *statsp = (struct rpc_cots_server *)rpcstat_zone_init_common(zoneid, 919 "unix", "rpc_cots_server", (const kstat_named_t *)&cots_rsstat_tmpl, 920 sizeof (cots_rsstat_tmpl)); 921 } 922 923 void 924 svc_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_server **statsp) 925 { 926 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_server"); 927 kmem_free(*statsp, sizeof (cots_rsstat_tmpl)); 928 } 929 930 void 931 svc_cots_init(void) 932 { 933 /* 934 * Check to make sure that the cots private data will fit into 935 * the stack buffer allocated by svc_run. The ASSERT is a safety 936 * net if the cots_data_t structure ever changes. 937 */ 938 /*CONSTANTCONDITION*/ 939 ASSERT(sizeof (cots_data_t) <= SVC_P2LEN); 940 941 mutex_init(&cots_kcreate_lock, NULL, MUTEX_DEFAULT, NULL); 942 mutex_init(&cotsdupreq_lock, NULL, MUTEX_DEFAULT, NULL); 943 } 944