1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (c) 2012 by Delphix. All rights reserved. 26 */ 27 28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 /* 32 * Portions of this source code were derived from Berkeley 4.3 BSD 33 * under license from the Regents of the University of California. 34 */ 35 36 /* 37 * svc_cots.c 38 * Server side for connection-oriented RPC in the kernel. 39 * 40 */ 41 42 #include <sys/param.h> 43 #include <sys/types.h> 44 #include <sys/sysmacros.h> 45 #include <sys/file.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/stropts.h> 50 #include <sys/tiuser.h> 51 #include <sys/timod.h> 52 #include <sys/tihdr.h> 53 #include <sys/fcntl.h> 54 #include <sys/errno.h> 55 #include <sys/kmem.h> 56 #include <sys/systm.h> 57 #include <sys/debug.h> 58 #include <sys/cmn_err.h> 59 #include <sys/kstat.h> 60 #include <sys/vtrace.h> 61 62 #include <rpc/types.h> 63 #include <rpc/xdr.h> 64 #include <rpc/auth.h> 65 #include <rpc/rpc_msg.h> 66 #include <rpc/svc.h> 67 #include <inet/ip.h> 68 69 #define COTS_MAX_ALLOCSIZE 2048 70 #define MSG_OFFSET 128 /* offset of call into the mblk */ 71 #define RM_HDR_SIZE 4 /* record mark header size */ 72 73 /* 74 * Routines exported through ops vector. 75 */ 76 static bool_t svc_cots_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 77 static bool_t svc_cots_ksend(SVCXPRT *, struct rpc_msg *); 78 static bool_t svc_cots_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 79 static bool_t svc_cots_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 80 static void svc_cots_kdestroy(SVCMASTERXPRT *); 81 static int svc_cots_kdup(struct svc_req *, caddr_t, int, 82 struct dupreq **, bool_t *); 83 static void svc_cots_kdupdone(struct dupreq *, caddr_t, 84 void (*)(), int, int); 85 static int32_t *svc_cots_kgetres(SVCXPRT *, int); 86 static void svc_cots_kfreeres(SVCXPRT *); 87 static void svc_cots_kclone_destroy(SVCXPRT *); 88 static void svc_cots_kstart(SVCMASTERXPRT *); 89 static void svc_cots_ktattrs(SVCXPRT *, int, void **); 90 91 /* 92 * Server transport operations vector. 93 */ 94 struct svc_ops svc_cots_op = { 95 svc_cots_krecv, /* Get requests */ 96 svc_cots_kgetargs, /* Deserialize arguments */ 97 svc_cots_ksend, /* Send reply */ 98 svc_cots_kfreeargs, /* Free argument data space */ 99 svc_cots_kdestroy, /* Destroy transport handle */ 100 svc_cots_kdup, /* Check entry in dup req cache */ 101 svc_cots_kdupdone, /* Mark entry in dup req cache as done */ 102 svc_cots_kgetres, /* Get pointer to response buffer */ 103 svc_cots_kfreeres, /* Destroy pre-serialized response header */ 104 svc_cots_kclone_destroy, /* Destroy a clone xprt */ 105 svc_cots_kstart, /* Tell `ready-to-receive' to rpcmod */ 106 NULL, /* Transport specific clone xprt */ 107 svc_cots_ktattrs /* Transport Attributes */ 108 }; 109 110 /* 111 * Master transport private data. 112 * Kept in xprt->xp_p2. 113 */ 114 struct cots_master_data { 115 char *cmd_src_addr; /* client's address */ 116 int cmd_xprt_started; /* flag for clone routine to call */ 117 /* rpcmod's start routine. */ 118 struct rpc_cots_server *cmd_stats; /* stats for zone */ 119 }; 120 121 /* 122 * Transport private data. 123 * Kept in clone_xprt->xp_p2buf. 124 */ 125 typedef struct cots_data { 126 mblk_t *cd_mp; /* pre-allocated reply message */ 127 mblk_t *cd_req_mp; /* request message */ 128 } cots_data_t; 129 130 /* 131 * Server statistics 132 * NOTE: This structure type is duplicated in the NFS fast path. 133 */ 134 static const struct rpc_cots_server { 135 kstat_named_t rscalls; 136 kstat_named_t rsbadcalls; 137 kstat_named_t rsnullrecv; 138 kstat_named_t rsbadlen; 139 kstat_named_t rsxdrcall; 140 kstat_named_t rsdupchecks; 141 kstat_named_t rsdupreqs; 142 } cots_rsstat_tmpl = { 143 { "calls", KSTAT_DATA_UINT64 }, 144 { "badcalls", KSTAT_DATA_UINT64 }, 145 { "nullrecv", KSTAT_DATA_UINT64 }, 146 { "badlen", KSTAT_DATA_UINT64 }, 147 { "xdrcall", KSTAT_DATA_UINT64 }, 148 { "dupchecks", KSTAT_DATA_UINT64 }, 149 { "dupreqs", KSTAT_DATA_UINT64 } 150 }; 151 152 #define CLONE2STATS(clone_xprt) \ 153 ((struct cots_master_data *)(clone_xprt)->xp_master->xp_p2)->cmd_stats 154 #define RSSTAT_INCR(s, x) \ 155 atomic_inc_64(&(s)->x.value.ui64) 156 157 /* 158 * Pointer to a transport specific `ready to receive' function in rpcmod 159 * (set from rpcmod). 160 */ 161 void (*mir_start)(queue_t *); 162 uint_t *svc_max_msg_sizep; 163 164 /* 165 * the address size of the underlying transport can sometimes be 166 * unknown (tinfo->ADDR_size == -1). For this case, it is 167 * necessary to figure out what the size is so the correct amount 168 * of data is allocated. This is an itterative process: 169 * 1. take a good guess (use T_MINADDRSIZE) 170 * 2. try it. 171 * 3. if it works then everything is ok 172 * 4. if the error is ENAMETOLONG, double the guess 173 * 5. go back to step 2. 174 */ 175 #define T_UNKNOWNADDRSIZE (-1) 176 #define T_MINADDRSIZE 32 177 178 /* 179 * Create a transport record. 180 * The transport record, output buffer, and private data structure 181 * are allocated. The output buffer is serialized into using xdrmem. 182 * There is one transport record per user process which implements a 183 * set of services. 184 */ 185 static kmutex_t cots_kcreate_lock; 186 187 int 188 svc_cots_kcreate(file_t *fp, uint_t max_msgsize, struct T_info_ack *tinfo, 189 SVCMASTERXPRT **nxprt) 190 { 191 struct cots_master_data *cmd; 192 int err, retval; 193 SVCMASTERXPRT *xprt; 194 struct rpcstat *rpcstat; 195 struct T_addr_ack *ack_p; 196 struct strioctl getaddr; 197 198 if (nxprt == NULL) 199 return (EINVAL); 200 201 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 202 ASSERT(rpcstat != NULL); 203 204 xprt = kmem_zalloc(sizeof (SVCMASTERXPRT), KM_SLEEP); 205 206 cmd = kmem_zalloc(sizeof (*cmd) + sizeof (*ack_p) 207 + (2 * sizeof (sin6_t)), KM_SLEEP); 208 209 ack_p = (struct T_addr_ack *)&cmd[1]; 210 211 if ((tinfo->TIDU_size > COTS_MAX_ALLOCSIZE) || 212 (tinfo->TIDU_size <= 0)) 213 xprt->xp_msg_size = COTS_MAX_ALLOCSIZE; 214 else { 215 xprt->xp_msg_size = tinfo->TIDU_size - 216 (tinfo->TIDU_size % BYTES_PER_XDR_UNIT); 217 } 218 219 xprt->xp_ops = &svc_cots_op; 220 xprt->xp_p2 = (caddr_t)cmd; 221 cmd->cmd_xprt_started = 0; 222 cmd->cmd_stats = rpcstat->rpc_cots_server; 223 224 getaddr.ic_cmd = TI_GETINFO; 225 getaddr.ic_timout = -1; 226 getaddr.ic_len = sizeof (*ack_p) + (2 * sizeof (sin6_t)); 227 getaddr.ic_dp = (char *)ack_p; 228 ack_p->PRIM_type = T_ADDR_REQ; 229 230 err = strioctl(fp->f_vnode, I_STR, (intptr_t)&getaddr, 231 0, K_TO_K, CRED(), &retval); 232 if (err) { 233 kmem_free(cmd, sizeof (*cmd) + sizeof (*ack_p) + 234 (2 * sizeof (sin6_t))); 235 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 236 return (err); 237 } 238 239 xprt->xp_rtaddr.maxlen = ack_p->REMADDR_length; 240 xprt->xp_rtaddr.len = ack_p->REMADDR_length; 241 cmd->cmd_src_addr = xprt->xp_rtaddr.buf = 242 (char *)ack_p + ack_p->REMADDR_offset; 243 244 xprt->xp_lcladdr.maxlen = ack_p->LOCADDR_length; 245 xprt->xp_lcladdr.len = ack_p->LOCADDR_length; 246 xprt->xp_lcladdr.buf = (char *)ack_p + ack_p->LOCADDR_offset; 247 248 /* 249 * If the current sanity check size in rpcmod is smaller 250 * than the size needed for this xprt, then increase 251 * the sanity check. 252 */ 253 if (max_msgsize != 0 && svc_max_msg_sizep && 254 max_msgsize > *svc_max_msg_sizep) { 255 256 /* This check needs a lock */ 257 mutex_enter(&cots_kcreate_lock); 258 if (svc_max_msg_sizep && max_msgsize > *svc_max_msg_sizep) 259 *svc_max_msg_sizep = max_msgsize; 260 mutex_exit(&cots_kcreate_lock); 261 } 262 263 *nxprt = xprt; 264 265 return (0); 266 } 267 268 /* 269 * Destroy a master transport record. 270 * Frees the space allocated for a transport record. 271 */ 272 static void 273 svc_cots_kdestroy(SVCMASTERXPRT *xprt) 274 { 275 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 276 277 ASSERT(cmd); 278 279 if (xprt->xp_netid) 280 kmem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); 281 if (xprt->xp_addrmask.maxlen) 282 kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen); 283 284 mutex_destroy(&xprt->xp_req_lock); 285 mutex_destroy(&xprt->xp_thread_lock); 286 287 kmem_free(cmd, sizeof (*cmd) + sizeof (struct T_addr_ack) + 288 (2 * sizeof (sin6_t))); 289 290 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 291 } 292 293 /* 294 * svc_tli_kcreate() calls this function at the end to tell 295 * rpcmod that the transport is ready to receive requests. 296 */ 297 static void 298 svc_cots_kstart(SVCMASTERXPRT *xprt) 299 { 300 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 301 302 if (cmd->cmd_xprt_started == 0) { 303 /* 304 * Acquire the xp_req_lock in order to use xp_wq 305 * safely (we don't want to qenable a queue that has 306 * already been closed). 307 */ 308 mutex_enter(&xprt->xp_req_lock); 309 if (cmd->cmd_xprt_started == 0 && 310 xprt->xp_wq != NULL) { 311 (*mir_start)(xprt->xp_wq); 312 cmd->cmd_xprt_started = 1; 313 } 314 mutex_exit(&xprt->xp_req_lock); 315 } 316 } 317 318 /* 319 * Transport-type specific part of svc_xprt_cleanup(). 320 */ 321 static void 322 svc_cots_kclone_destroy(SVCXPRT *clone_xprt) 323 { 324 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 325 326 if (cd->cd_req_mp) { 327 freemsg(cd->cd_req_mp); 328 cd->cd_req_mp = (mblk_t *)0; 329 } 330 ASSERT(cd->cd_mp == NULL); 331 } 332 333 /* 334 * Transport Attributes. 335 */ 336 static void 337 svc_cots_ktattrs(SVCXPRT *clone_xprt, int attrflag, void **tattr) 338 { 339 *tattr = NULL; 340 341 switch (attrflag) { 342 case SVC_TATTR_ADDRMASK: 343 *tattr = (void *)&clone_xprt->xp_master->xp_addrmask; 344 } 345 } 346 347 /* 348 * Receive rpc requests. 349 * Checks if the message is intact, and deserializes the call packet. 350 */ 351 static bool_t 352 svc_cots_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 353 { 354 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 355 XDR *xdrs = &clone_xprt->xp_xdrin; 356 struct rpc_cots_server *stats = CLONE2STATS(clone_xprt); 357 358 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KRECV_START, 359 "svc_cots_krecv_start:"); 360 RPCLOG(4, "svc_cots_krecv_start clone_xprt = %p:\n", 361 (void *)clone_xprt); 362 363 RSSTAT_INCR(stats, rscalls); 364 365 if (mp->b_datap->db_type != M_DATA) { 366 RPCLOG(16, "svc_cots_krecv bad db_type %d\n", 367 mp->b_datap->db_type); 368 goto bad; 369 } 370 371 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 372 373 TRACE_0(TR_FAC_KRPC, TR_XDR_CALLMSG_START, 374 "xdr_callmsg_start:"); 375 RPCLOG0(4, "xdr_callmsg_start:\n"); 376 if (!xdr_callmsg(xdrs, msg)) { 377 XDR_DESTROY(xdrs); 378 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 379 "xdr_callmsg_end:(%S)", "bad"); 380 RPCLOG0(1, "svc_cots_krecv xdr_callmsg failure\n"); 381 RSSTAT_INCR(stats, rsxdrcall); 382 goto bad; 383 } 384 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 385 "xdr_callmsg_end:(%S)", "good"); 386 387 clone_xprt->xp_xid = msg->rm_xid; 388 cd->cd_req_mp = mp; 389 390 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 391 "svc_cots_krecv_end:(%S)", "good"); 392 RPCLOG0(4, "svc_cots_krecv_end:good\n"); 393 return (TRUE); 394 395 bad: 396 if (mp) 397 freemsg(mp); 398 399 RSSTAT_INCR(stats, rsbadcalls); 400 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 401 "svc_cots_krecv_end:(%S)", "bad"); 402 return (FALSE); 403 } 404 405 /* 406 * Send rpc reply. 407 */ 408 static bool_t 409 svc_cots_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg) 410 { 411 /* LINTED pointer alignment */ 412 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 413 XDR *xdrs = &(clone_xprt->xp_xdrout); 414 int retval = FALSE; 415 mblk_t *mp; 416 xdrproc_t xdr_results; 417 caddr_t xdr_location; 418 bool_t has_args; 419 420 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KSEND_START, 421 "svc_cots_ksend_start:"); 422 423 /* 424 * If there is a result procedure specified in the reply message, 425 * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 426 * We need to make sure it won't be processed twice, so we null 427 * it for xdr_replymsg here. 428 */ 429 has_args = FALSE; 430 if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 431 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 432 if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 433 has_args = TRUE; 434 xdr_location = msg->acpted_rply.ar_results.where; 435 msg->acpted_rply.ar_results.proc = xdr_void; 436 msg->acpted_rply.ar_results.where = NULL; 437 } 438 } 439 440 mp = cd->cd_mp; 441 if (mp) { 442 /* 443 * The program above pre-allocated an mblk and put 444 * the data in place. 445 */ 446 cd->cd_mp = (mblk_t *)NULL; 447 if (!(xdr_replymsg_body(xdrs, msg) && 448 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 449 xdr_results, xdr_location)))) { 450 XDR_DESTROY(xdrs); 451 RPCLOG0(1, "svc_cots_ksend: " 452 "xdr_replymsg_body/SVCAUTH_WRAP failed\n"); 453 freemsg(mp); 454 goto out; 455 } 456 } else { 457 int len; 458 int mpsize; 459 460 /* 461 * Leave space for protocol headers. 462 */ 463 len = MSG_OFFSET + clone_xprt->xp_msg_size; 464 465 /* 466 * Allocate an initial mblk for the response data. 467 */ 468 while (!(mp = allocb(len, BPRI_LO))) { 469 RPCLOG0(16, "svc_cots_ksend: allocb failed failed\n"); 470 if (strwaitbuf(len, BPRI_LO)) { 471 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 472 "svc_cots_ksend_end:(%S)", "strwaitbuf"); 473 RPCLOG0(1, 474 "svc_cots_ksend: strwaitbuf failed\n"); 475 goto out; 476 } 477 } 478 479 /* 480 * Initialize the XDR encode stream. Additional mblks 481 * will be allocated if necessary. They will be TIDU 482 * sized. 483 */ 484 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 485 mpsize = MBLKSIZE(mp); 486 ASSERT(mpsize >= len); 487 ASSERT(mp->b_rptr == mp->b_datap->db_base); 488 489 /* 490 * If the size of mblk is not appreciably larger than what we 491 * asked, then resize the mblk to exactly len bytes. Reason for 492 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 493 * (from TCP over ethernet), and the arguments to RPC require 494 * 2800 bytes. Ideally we want the protocol to render two 495 * ~1400 byte segments over the wire. If allocb() gives us a 2k 496 * mblk, and we allocate a second mblk for the rest, the 497 * protocol module may generate 3 segments over the wire: 498 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 499 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 500 * the XDR encoding will generate two ~1400 byte mblks, and the 501 * protocol module is more likely to produce properly sized 502 * segments. 503 */ 504 if ((mpsize >> 1) <= len) { 505 mp->b_rptr += (mpsize - len); 506 } 507 508 /* 509 * Adjust b_rptr to reserve space for the non-data protocol 510 * headers that any downstream modules might like to add, and 511 * for the record marking header. 512 */ 513 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 514 515 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 516 ASSERT(mp->b_wptr == mp->b_rptr); 517 518 msg->rm_xid = clone_xprt->xp_xid; 519 520 TRACE_0(TR_FAC_KRPC, TR_XDR_REPLYMSG_START, 521 "xdr_replymsg_start:"); 522 if (!(xdr_replymsg(xdrs, msg) && 523 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 524 xdr_results, xdr_location)))) { 525 XDR_DESTROY(xdrs); 526 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 527 "xdr_replymsg_end:(%S)", "bad"); 528 freemsg(mp); 529 RPCLOG0(1, "svc_cots_ksend: xdr_replymsg/SVCAUTH_WRAP " 530 "failed\n"); 531 goto out; 532 } 533 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 534 "xdr_replymsg_end:(%S)", "good"); 535 } 536 537 XDR_DESTROY(xdrs); 538 539 put(clone_xprt->xp_wq, mp); 540 retval = TRUE; 541 542 out: 543 /* 544 * This is completely disgusting. If public is set it is 545 * a pointer to a structure whose first field is the address 546 * of the function to free that structure and any related 547 * stuff. (see rrokfree in nfs_xdr.c). 548 */ 549 if (xdrs->x_public) { 550 /* LINTED pointer alignment */ 551 (**((int (**)())xdrs->x_public))(xdrs->x_public); 552 } 553 554 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 555 "svc_cots_ksend_end:(%S)", "done"); 556 return (retval); 557 } 558 559 /* 560 * Deserialize arguments. 561 */ 562 static bool_t 563 svc_cots_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 564 caddr_t args_ptr) 565 { 566 return (SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 567 xdr_args, args_ptr)); 568 } 569 570 static bool_t 571 svc_cots_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 572 caddr_t args_ptr) 573 { 574 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 575 /* LINTED pointer alignment */ 576 XDR *xdrs = &clone_xprt->xp_xdrin; 577 mblk_t *mp; 578 bool_t retval; 579 580 /* 581 * It is important to call the XDR routine before 582 * freeing the request mblk. Structures in the 583 * XDR data may point into the mblk and require that 584 * the memory be intact during the free routine. 585 */ 586 if (args_ptr) { 587 xdrs->x_op = XDR_FREE; 588 retval = (*xdr_args)(xdrs, args_ptr); 589 } else 590 retval = TRUE; 591 592 XDR_DESTROY(xdrs); 593 594 if ((mp = cd->cd_req_mp) != NULL) { 595 cd->cd_req_mp = (mblk_t *)0; 596 freemsg(mp); 597 } 598 599 return (retval); 600 } 601 602 static int32_t * 603 svc_cots_kgetres(SVCXPRT *clone_xprt, int size) 604 { 605 /* LINTED pointer alignment */ 606 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 607 XDR *xdrs = &clone_xprt->xp_xdrout; 608 mblk_t *mp; 609 int32_t *buf; 610 struct rpc_msg rply; 611 int len; 612 int mpsize; 613 614 /* 615 * Leave space for protocol headers. 616 */ 617 len = MSG_OFFSET + clone_xprt->xp_msg_size; 618 619 /* 620 * Allocate an initial mblk for the response data. 621 */ 622 while ((mp = allocb(len, BPRI_LO)) == NULL) { 623 if (strwaitbuf(len, BPRI_LO)) 624 return (NULL); 625 } 626 627 /* 628 * Initialize the XDR encode stream. Additional mblks 629 * will be allocated if necessary. They will be TIDU 630 * sized. 631 */ 632 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 633 mpsize = MBLKSIZE(mp); 634 ASSERT(mpsize >= len); 635 ASSERT(mp->b_rptr == mp->b_datap->db_base); 636 637 /* 638 * If the size of mblk is not appreciably larger than what we 639 * asked, then resize the mblk to exactly len bytes. Reason for 640 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 641 * (from TCP over ethernet), and the arguments to RPC require 642 * 2800 bytes. Ideally we want the protocol to render two 643 * ~1400 byte segments over the wire. If allocb() gives us a 2k 644 * mblk, and we allocate a second mblk for the rest, the 645 * protocol module may generate 3 segments over the wire: 646 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 647 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 648 * the XDR encoding will generate two ~1400 byte mblks, and the 649 * protocol module is more likely to produce properly sized 650 * segments. 651 */ 652 if ((mpsize >> 1) <= len) { 653 mp->b_rptr += (mpsize - len); 654 } 655 656 /* 657 * Adjust b_rptr to reserve space for the non-data protocol 658 * headers that any downstream modules might like to add, and 659 * for the record marking header. 660 */ 661 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 662 663 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 664 ASSERT(mp->b_wptr == mp->b_rptr); 665 666 /* 667 * Assume a successful RPC since most of them are. 668 */ 669 rply.rm_xid = clone_xprt->xp_xid; 670 rply.rm_direction = REPLY; 671 rply.rm_reply.rp_stat = MSG_ACCEPTED; 672 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 673 rply.acpted_rply.ar_stat = SUCCESS; 674 675 if (!xdr_replymsg_hdr(xdrs, &rply)) { 676 XDR_DESTROY(xdrs); 677 freeb(mp); 678 return (NULL); 679 } 680 681 buf = XDR_INLINE(xdrs, size); 682 if (buf == NULL) { 683 XDR_DESTROY(xdrs); 684 ASSERT(cd->cd_mp == NULL); 685 freemsg(mp); 686 } else { 687 cd->cd_mp = mp; 688 } 689 return (buf); 690 } 691 692 static void 693 svc_cots_kfreeres(SVCXPRT *clone_xprt) 694 { 695 cots_data_t *cd; 696 mblk_t *mp; 697 698 cd = (cots_data_t *)clone_xprt->xp_p2buf; 699 if ((mp = cd->cd_mp) != NULL) { 700 XDR_DESTROY(&clone_xprt->xp_xdrout); 701 cd->cd_mp = (mblk_t *)NULL; 702 freemsg(mp); 703 } 704 } 705 706 /* 707 * the dup cacheing routines below provide a cache of non-failure 708 * transaction id's. rpc service routines can use this to detect 709 * retransmissions and re-send a non-failure response. 710 */ 711 712 /* 713 * MAXDUPREQS is the number of cached items. It should be adjusted 714 * to the service load so that there is likely to be a response entry 715 * when the first retransmission comes in. 716 */ 717 #define MAXDUPREQS 8192 718 719 /* 720 * This should be appropriately scaled to MAXDUPREQS. To produce as less as 721 * possible collisions it is suggested to set this to a prime. 722 */ 723 #define DRHASHSZ 2053 724 725 #define XIDHASH(xid) ((xid) % DRHASHSZ) 726 #define DRHASH(dr) XIDHASH((dr)->dr_xid) 727 #define REQTOXID(req) ((req)->rq_xprt->xp_xid) 728 729 static int cotsndupreqs = 0; 730 int cotsmaxdupreqs = MAXDUPREQS; 731 static kmutex_t cotsdupreq_lock; 732 static struct dupreq *cotsdrhashtbl[DRHASHSZ]; 733 static int cotsdrhashstat[DRHASHSZ]; 734 735 static void unhash(struct dupreq *); 736 737 /* 738 * cotsdrmru points to the head of a circular linked list in lru order. 739 * cotsdrmru->dr_next == drlru 740 */ 741 struct dupreq *cotsdrmru; 742 743 /* 744 * PSARC 2003/523 Contract Private Interface 745 * svc_cots_kdup 746 * Changes must be reviewed by Solaris File Sharing 747 * Changes must be communicated to contract-2003-523@sun.com 748 * 749 * svc_cots_kdup searches the request cache and returns 0 if the 750 * request is not found in the cache. If it is found, then it 751 * returns the state of the request (in progress or done) and 752 * the status or attributes that were part of the original reply. 753 * 754 * If DUP_DONE (there is a duplicate) svc_cots_kdup copies over the 755 * value of the response. In that case, also return in *dupcachedp 756 * whether the response free routine is cached in the dupreq - in which case 757 * the caller should not be freeing it, because it will be done later 758 * in the svc_cots_kdup code when the dupreq is reused. 759 */ 760 static int 761 svc_cots_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 762 bool_t *dupcachedp) 763 { 764 struct rpc_cots_server *stats = CLONE2STATS(req->rq_xprt); 765 struct dupreq *dr; 766 uint32_t xid; 767 uint32_t drhash; 768 int status; 769 770 xid = REQTOXID(req); 771 mutex_enter(&cotsdupreq_lock); 772 RSSTAT_INCR(stats, rsdupchecks); 773 /* 774 * Check to see whether an entry already exists in the cache. 775 */ 776 dr = cotsdrhashtbl[XIDHASH(xid)]; 777 while (dr != NULL) { 778 if (dr->dr_xid == xid && 779 dr->dr_proc == req->rq_proc && 780 dr->dr_prog == req->rq_prog && 781 dr->dr_vers == req->rq_vers && 782 dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 783 bcmp((caddr_t)dr->dr_addr.buf, 784 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 785 dr->dr_addr.len) == 0) { 786 status = dr->dr_status; 787 if (status == DUP_DONE) { 788 bcopy(dr->dr_resp.buf, res, size); 789 if (dupcachedp != NULL) 790 *dupcachedp = (dr->dr_resfree != NULL); 791 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KDUP_DONE, 792 "svc_cots_kdup: DUP_DONE"); 793 } else { 794 dr->dr_status = DUP_INPROGRESS; 795 *drpp = dr; 796 TRACE_0(TR_FAC_KRPC, 797 TR_SVC_COTS_KDUP_INPROGRESS, 798 "svc_cots_kdup: DUP_INPROGRESS"); 799 } 800 RSSTAT_INCR(stats, rsdupreqs); 801 mutex_exit(&cotsdupreq_lock); 802 return (status); 803 } 804 dr = dr->dr_chain; 805 } 806 807 /* 808 * There wasn't an entry, either allocate a new one or recycle 809 * an old one. 810 */ 811 if (cotsndupreqs < cotsmaxdupreqs) { 812 dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 813 if (dr == NULL) { 814 mutex_exit(&cotsdupreq_lock); 815 return (DUP_ERROR); 816 } 817 dr->dr_resp.buf = NULL; 818 dr->dr_resp.maxlen = 0; 819 dr->dr_addr.buf = NULL; 820 dr->dr_addr.maxlen = 0; 821 if (cotsdrmru) { 822 dr->dr_next = cotsdrmru->dr_next; 823 cotsdrmru->dr_next = dr; 824 } else { 825 dr->dr_next = dr; 826 } 827 cotsndupreqs++; 828 } else { 829 dr = cotsdrmru->dr_next; 830 while (dr->dr_status == DUP_INPROGRESS) { 831 dr = dr->dr_next; 832 if (dr == cotsdrmru->dr_next) { 833 cmn_err(CE_WARN, "svc_cots_kdup no slots free"); 834 mutex_exit(&cotsdupreq_lock); 835 return (DUP_ERROR); 836 } 837 } 838 unhash(dr); 839 if (dr->dr_resfree) { 840 (*dr->dr_resfree)(dr->dr_resp.buf); 841 } 842 } 843 dr->dr_resfree = NULL; 844 cotsdrmru = dr; 845 846 dr->dr_xid = REQTOXID(req); 847 dr->dr_prog = req->rq_prog; 848 dr->dr_vers = req->rq_vers; 849 dr->dr_proc = req->rq_proc; 850 if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 851 if (dr->dr_addr.buf != NULL) 852 kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 853 dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 854 dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 855 if (dr->dr_addr.buf == NULL) { 856 dr->dr_addr.maxlen = 0; 857 dr->dr_status = DUP_DROP; 858 mutex_exit(&cotsdupreq_lock); 859 return (DUP_ERROR); 860 } 861 } 862 dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 863 bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 864 if (dr->dr_resp.maxlen < size) { 865 if (dr->dr_resp.buf != NULL) 866 kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 867 dr->dr_resp.maxlen = (unsigned int)size; 868 dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 869 if (dr->dr_resp.buf == NULL) { 870 dr->dr_resp.maxlen = 0; 871 dr->dr_status = DUP_DROP; 872 mutex_exit(&cotsdupreq_lock); 873 return (DUP_ERROR); 874 } 875 } 876 dr->dr_status = DUP_INPROGRESS; 877 878 drhash = (uint32_t)DRHASH(dr); 879 dr->dr_chain = cotsdrhashtbl[drhash]; 880 cotsdrhashtbl[drhash] = dr; 881 cotsdrhashstat[drhash]++; 882 mutex_exit(&cotsdupreq_lock); 883 *drpp = dr; 884 return (DUP_NEW); 885 } 886 887 /* 888 * PSARC 2003/523 Contract Private Interface 889 * svc_cots_kdupdone 890 * Changes must be reviewed by Solaris File Sharing 891 * Changes must be communicated to contract-2003-523@sun.com 892 * 893 * svc_cots_kdupdone marks the request done (DUP_DONE or DUP_DROP) 894 * and stores the response. 895 */ 896 static void 897 svc_cots_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 898 int size, int status) 899 { 900 ASSERT(dr->dr_resfree == NULL); 901 if (status == DUP_DONE) { 902 bcopy(res, dr->dr_resp.buf, size); 903 dr->dr_resfree = dis_resfree; 904 } 905 dr->dr_status = status; 906 } 907 908 /* 909 * This routine expects that the mutex, cotsdupreq_lock, is already held. 910 */ 911 static void 912 unhash(struct dupreq *dr) 913 { 914 struct dupreq *drt; 915 struct dupreq *drtprev = NULL; 916 uint32_t drhash; 917 918 ASSERT(MUTEX_HELD(&cotsdupreq_lock)); 919 920 drhash = (uint32_t)DRHASH(dr); 921 drt = cotsdrhashtbl[drhash]; 922 while (drt != NULL) { 923 if (drt == dr) { 924 cotsdrhashstat[drhash]--; 925 if (drtprev == NULL) { 926 cotsdrhashtbl[drhash] = drt->dr_chain; 927 } else { 928 drtprev->dr_chain = drt->dr_chain; 929 } 930 return; 931 } 932 drtprev = drt; 933 drt = drt->dr_chain; 934 } 935 } 936 937 void 938 svc_cots_stats_init(zoneid_t zoneid, struct rpc_cots_server **statsp) 939 { 940 *statsp = (struct rpc_cots_server *)rpcstat_zone_init_common(zoneid, 941 "unix", "rpc_cots_server", (const kstat_named_t *)&cots_rsstat_tmpl, 942 sizeof (cots_rsstat_tmpl)); 943 } 944 945 void 946 svc_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_server **statsp) 947 { 948 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_server"); 949 kmem_free(*statsp, sizeof (cots_rsstat_tmpl)); 950 } 951 952 void 953 svc_cots_init(void) 954 { 955 /* 956 * Check to make sure that the cots private data will fit into 957 * the stack buffer allocated by svc_run. The ASSERT is a safety 958 * net if the cots_data_t structure ever changes. 959 */ 960 /*CONSTANTCONDITION*/ 961 ASSERT(sizeof (cots_data_t) <= SVC_P2LEN); 962 963 mutex_init(&cots_kcreate_lock, NULL, MUTEX_DEFAULT, NULL); 964 mutex_init(&cotsdupreq_lock, NULL, MUTEX_DEFAULT, NULL); 965 } 966