1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (c) 2012 by Delphix. All rights reserved. 26 */ 27 28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 /* 32 * Portions of this source code were derived from Berkeley 4.3 BSD 33 * under license from the Regents of the University of California. 34 */ 35 36 /* 37 * svc_cots.c 38 * Server side for connection-oriented RPC in the kernel. 39 * 40 */ 41 42 #include <sys/param.h> 43 #include <sys/types.h> 44 #include <sys/sysmacros.h> 45 #include <sys/file.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/stropts.h> 50 #include <sys/tiuser.h> 51 #include <sys/timod.h> 52 #include <sys/tihdr.h> 53 #include <sys/fcntl.h> 54 #include <sys/errno.h> 55 #include <sys/kmem.h> 56 #include <sys/systm.h> 57 #include <sys/debug.h> 58 #include <sys/cmn_err.h> 59 #include <sys/kstat.h> 60 #include <sys/vtrace.h> 61 62 #include <rpc/types.h> 63 #include <rpc/xdr.h> 64 #include <rpc/auth.h> 65 #include <rpc/rpc_msg.h> 66 #include <rpc/svc.h> 67 #include <inet/ip.h> 68 69 #define COTS_MAX_ALLOCSIZE 2048 70 #define MSG_OFFSET 128 /* offset of call into the mblk */ 71 #define RM_HDR_SIZE 4 /* record mark header size */ 72 73 /* 74 * Routines exported through ops vector. 75 */ 76 static bool_t svc_cots_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 77 static bool_t svc_cots_ksend(SVCXPRT *, struct rpc_msg *); 78 static bool_t svc_cots_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 79 static bool_t svc_cots_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 80 static void svc_cots_kdestroy(SVCMASTERXPRT *); 81 static int svc_cots_kdup(struct svc_req *, caddr_t, int, 82 struct dupreq **, bool_t *); 83 static void svc_cots_kdupdone(struct dupreq *, caddr_t, 84 void (*)(), int, int); 85 static int32_t *svc_cots_kgetres(SVCXPRT *, int); 86 static void svc_cots_kfreeres(SVCXPRT *); 87 static void svc_cots_kclone_destroy(SVCXPRT *); 88 static void svc_cots_kstart(SVCMASTERXPRT *); 89 static void svc_cots_ktattrs(SVCXPRT *, int, void **); 90 91 /* 92 * Server transport operations vector. 93 */ 94 struct svc_ops svc_cots_op = { 95 svc_cots_krecv, /* Get requests */ 96 svc_cots_kgetargs, /* Deserialize arguments */ 97 svc_cots_ksend, /* Send reply */ 98 svc_cots_kfreeargs, /* Free argument data space */ 99 svc_cots_kdestroy, /* Destroy transport handle */ 100 svc_cots_kdup, /* Check entry in dup req cache */ 101 svc_cots_kdupdone, /* Mark entry in dup req cache as done */ 102 svc_cots_kgetres, /* Get pointer to response buffer */ 103 svc_cots_kfreeres, /* Destroy pre-serialized response header */ 104 svc_cots_kclone_destroy, /* Destroy a clone xprt */ 105 svc_cots_kstart, /* Tell `ready-to-receive' to rpcmod */ 106 NULL, /* Transport specific clone xprt */ 107 svc_cots_ktattrs /* Transport Attributes */ 108 }; 109 110 /* 111 * Master transport private data. 112 * Kept in xprt->xp_p2. 113 */ 114 struct cots_master_data { 115 char *cmd_src_addr; /* client's address */ 116 int cmd_xprt_started; /* flag for clone routine to call */ 117 /* rpcmod's start routine. */ 118 struct rpc_cots_server *cmd_stats; /* stats for zone */ 119 }; 120 121 /* 122 * Transport private data. 123 * Kept in clone_xprt->xp_p2buf. 124 */ 125 typedef struct cots_data { 126 mblk_t *cd_mp; /* pre-allocated reply message */ 127 mblk_t *cd_req_mp; /* request message */ 128 } cots_data_t; 129 130 /* 131 * Server statistics 132 * NOTE: This structure type is duplicated in the NFS fast path. 133 */ 134 static const struct rpc_cots_server { 135 kstat_named_t rscalls; 136 kstat_named_t rsbadcalls; 137 kstat_named_t rsnullrecv; 138 kstat_named_t rsbadlen; 139 kstat_named_t rsxdrcall; 140 kstat_named_t rsdupchecks; 141 kstat_named_t rsdupreqs; 142 } cots_rsstat_tmpl = { 143 { "calls", KSTAT_DATA_UINT64 }, 144 { "badcalls", KSTAT_DATA_UINT64 }, 145 { "nullrecv", KSTAT_DATA_UINT64 }, 146 { "badlen", KSTAT_DATA_UINT64 }, 147 { "xdrcall", KSTAT_DATA_UINT64 }, 148 { "dupchecks", KSTAT_DATA_UINT64 }, 149 { "dupreqs", KSTAT_DATA_UINT64 } 150 }; 151 152 #define CLONE2STATS(clone_xprt) \ 153 ((struct cots_master_data *)(clone_xprt)->xp_master->xp_p2)->cmd_stats 154 #define RSSTAT_INCR(s, x) \ 155 atomic_inc_64(&(s)->x.value.ui64) 156 157 /* 158 * Pointer to a transport specific `ready to receive' function in rpcmod 159 * (set from rpcmod). 160 */ 161 void (*mir_start)(queue_t *); 162 uint_t *svc_max_msg_sizep; 163 164 /* 165 * the address size of the underlying transport can sometimes be 166 * unknown (tinfo->ADDR_size == -1). For this case, it is 167 * necessary to figure out what the size is so the correct amount 168 * of data is allocated. This is an itterative process: 169 * 1. take a good guess (use T_MINADDRSIZE) 170 * 2. try it. 171 * 3. if it works then everything is ok 172 * 4. if the error is ENAMETOLONG, double the guess 173 * 5. go back to step 2. 174 */ 175 #define T_UNKNOWNADDRSIZE (-1) 176 #define T_MINADDRSIZE 32 177 178 /* 179 * Create a transport record. 180 * The transport record, output buffer, and private data structure 181 * are allocated. The output buffer is serialized into using xdrmem. 182 * There is one transport record per user process which implements a 183 * set of services. 184 */ 185 static kmutex_t cots_kcreate_lock; 186 187 int 188 svc_cots_kcreate(file_t *fp, uint_t max_msgsize, struct T_info_ack *tinfo, 189 SVCMASTERXPRT **nxprt) 190 { 191 struct cots_master_data *cmd; 192 int err, retval; 193 SVCMASTERXPRT *xprt; 194 struct rpcstat *rpcstat; 195 struct T_addr_ack *ack_p; 196 struct strioctl getaddr; 197 198 if (nxprt == NULL) 199 return (EINVAL); 200 201 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 202 ASSERT(rpcstat != NULL); 203 204 xprt = kmem_zalloc(sizeof (SVCMASTERXPRT), KM_SLEEP); 205 206 cmd = kmem_zalloc(sizeof (*cmd) + sizeof (*ack_p) 207 + (2 * sizeof (sin6_t)), KM_SLEEP); 208 209 ack_p = (struct T_addr_ack *)&cmd[1]; 210 211 if ((tinfo->TIDU_size > COTS_MAX_ALLOCSIZE) || 212 (tinfo->TIDU_size <= 0)) 213 xprt->xp_msg_size = COTS_MAX_ALLOCSIZE; 214 else { 215 xprt->xp_msg_size = tinfo->TIDU_size - 216 (tinfo->TIDU_size % BYTES_PER_XDR_UNIT); 217 } 218 219 xprt->xp_ops = &svc_cots_op; 220 xprt->xp_p2 = (caddr_t)cmd; 221 cmd->cmd_xprt_started = 0; 222 cmd->cmd_stats = rpcstat->rpc_cots_server; 223 224 getaddr.ic_cmd = TI_GETINFO; 225 getaddr.ic_timout = -1; 226 getaddr.ic_len = sizeof (*ack_p) + (2 * sizeof (sin6_t)); 227 getaddr.ic_dp = (char *)ack_p; 228 ack_p->PRIM_type = T_ADDR_REQ; 229 230 err = strioctl(fp->f_vnode, I_STR, (intptr_t)&getaddr, 231 0, K_TO_K, CRED(), &retval); 232 if (err) { 233 kmem_free(cmd, sizeof (*cmd) + sizeof (*ack_p) + 234 (2 * sizeof (sin6_t))); 235 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 236 return (err); 237 } 238 239 xprt->xp_rtaddr.maxlen = ack_p->REMADDR_length; 240 xprt->xp_rtaddr.len = ack_p->REMADDR_length; 241 cmd->cmd_src_addr = xprt->xp_rtaddr.buf = 242 (char *)ack_p + ack_p->REMADDR_offset; 243 244 xprt->xp_lcladdr.maxlen = ack_p->LOCADDR_length; 245 xprt->xp_lcladdr.len = ack_p->LOCADDR_length; 246 xprt->xp_lcladdr.buf = (char *)ack_p + ack_p->LOCADDR_offset; 247 248 /* 249 * If the current sanity check size in rpcmod is smaller 250 * than the size needed for this xprt, then increase 251 * the sanity check. 252 */ 253 if (max_msgsize != 0 && svc_max_msg_sizep && 254 max_msgsize > *svc_max_msg_sizep) { 255 256 /* This check needs a lock */ 257 mutex_enter(&cots_kcreate_lock); 258 if (svc_max_msg_sizep && max_msgsize > *svc_max_msg_sizep) 259 *svc_max_msg_sizep = max_msgsize; 260 mutex_exit(&cots_kcreate_lock); 261 } 262 263 *nxprt = xprt; 264 265 return (0); 266 } 267 268 /* 269 * Destroy a master transport record. 270 * Frees the space allocated for a transport record. 271 */ 272 static void 273 svc_cots_kdestroy(SVCMASTERXPRT *xprt) 274 { 275 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 276 277 ASSERT(cmd); 278 279 if (xprt->xp_netid) 280 kmem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); 281 if (xprt->xp_addrmask.maxlen) 282 kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen); 283 284 mutex_destroy(&xprt->xp_req_lock); 285 mutex_destroy(&xprt->xp_thread_lock); 286 287 kmem_free(cmd, sizeof (*cmd) + sizeof (struct T_addr_ack) + 288 (2 * sizeof (sin6_t))); 289 290 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 291 } 292 293 /* 294 * svc_tli_kcreate() calls this function at the end to tell 295 * rpcmod that the transport is ready to receive requests. 296 */ 297 static void 298 svc_cots_kstart(SVCMASTERXPRT *xprt) 299 { 300 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 301 302 if (cmd->cmd_xprt_started == 0) { 303 /* 304 * Acquire the xp_req_lock in order to use xp_wq 305 * safely (we don't want to qenable a queue that has 306 * already been closed). 307 */ 308 mutex_enter(&xprt->xp_req_lock); 309 if (cmd->cmd_xprt_started == 0 && 310 xprt->xp_wq != NULL) { 311 (*mir_start)(xprt->xp_wq); 312 cmd->cmd_xprt_started = 1; 313 } 314 mutex_exit(&xprt->xp_req_lock); 315 } 316 } 317 318 /* 319 * Transport-type specific part of svc_xprt_cleanup(). 320 */ 321 static void 322 svc_cots_kclone_destroy(SVCXPRT *clone_xprt) 323 { 324 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 325 326 if (cd->cd_req_mp) { 327 freemsg(cd->cd_req_mp); 328 cd->cd_req_mp = (mblk_t *)0; 329 } 330 ASSERT(cd->cd_mp == NULL); 331 } 332 333 /* 334 * Transport Attributes. 335 */ 336 static void 337 svc_cots_ktattrs(SVCXPRT *clone_xprt, int attrflag, void **tattr) 338 { 339 *tattr = NULL; 340 341 switch (attrflag) { 342 case SVC_TATTR_ADDRMASK: 343 *tattr = (void *)&clone_xprt->xp_master->xp_addrmask; 344 } 345 } 346 347 /* 348 * Receive rpc requests. 349 * Checks if the message is intact, and deserializes the call packet. 350 */ 351 static bool_t 352 svc_cots_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 353 { 354 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 355 XDR *xdrs = &clone_xprt->xp_xdrin; 356 struct rpc_cots_server *stats = CLONE2STATS(clone_xprt); 357 358 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KRECV_START, 359 "svc_cots_krecv_start:"); 360 RPCLOG(4, "svc_cots_krecv_start clone_xprt = %p:\n", 361 (void *)clone_xprt); 362 363 RSSTAT_INCR(stats, rscalls); 364 365 if (mp->b_datap->db_type != M_DATA) { 366 RPCLOG(16, "svc_cots_krecv bad db_type %d\n", 367 mp->b_datap->db_type); 368 goto bad; 369 } 370 371 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 372 373 TRACE_0(TR_FAC_KRPC, TR_XDR_CALLMSG_START, 374 "xdr_callmsg_start:"); 375 RPCLOG0(4, "xdr_callmsg_start:\n"); 376 if (!xdr_callmsg(xdrs, msg)) { 377 XDR_DESTROY(xdrs); 378 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 379 "xdr_callmsg_end:(%S)", "bad"); 380 RPCLOG0(1, "svc_cots_krecv xdr_callmsg failure\n"); 381 RSSTAT_INCR(stats, rsxdrcall); 382 goto bad; 383 } 384 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 385 "xdr_callmsg_end:(%S)", "good"); 386 387 clone_xprt->xp_xid = msg->rm_xid; 388 cd->cd_req_mp = mp; 389 390 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 391 "svc_cots_krecv_end:(%S)", "good"); 392 RPCLOG0(4, "svc_cots_krecv_end:good\n"); 393 return (TRUE); 394 395 bad: 396 if (mp) 397 freemsg(mp); 398 399 RSSTAT_INCR(stats, rsbadcalls); 400 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 401 "svc_cots_krecv_end:(%S)", "bad"); 402 return (FALSE); 403 } 404 405 /* 406 * Send rpc reply. 407 */ 408 static bool_t 409 svc_cots_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg) 410 { 411 /* LINTED pointer alignment */ 412 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 413 XDR *xdrs = &(clone_xprt->xp_xdrout); 414 int retval = FALSE; 415 mblk_t *mp; 416 xdrproc_t xdr_results; 417 caddr_t xdr_location; 418 bool_t has_args; 419 420 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KSEND_START, 421 "svc_cots_ksend_start:"); 422 423 /* 424 * If there is a result procedure specified in the reply message, 425 * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 426 * We need to make sure it won't be processed twice, so we null 427 * it for xdr_replymsg here. 428 */ 429 has_args = FALSE; 430 if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 431 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 432 if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 433 has_args = TRUE; 434 xdr_location = msg->acpted_rply.ar_results.where; 435 msg->acpted_rply.ar_results.proc = xdr_void; 436 msg->acpted_rply.ar_results.where = NULL; 437 } 438 } 439 440 mp = cd->cd_mp; 441 if (mp) { 442 /* 443 * The program above pre-allocated an mblk and put 444 * the data in place. 445 */ 446 cd->cd_mp = (mblk_t *)NULL; 447 if (!(xdr_replymsg_body(xdrs, msg) && 448 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 449 xdr_results, xdr_location)))) { 450 XDR_DESTROY(xdrs); 451 RPCLOG0(1, "svc_cots_ksend: " 452 "xdr_replymsg_body/SVCAUTH_WRAP failed\n"); 453 freemsg(mp); 454 goto out; 455 } 456 } else { 457 int len; 458 int mpsize; 459 460 /* 461 * Leave space for protocol headers. 462 */ 463 len = MSG_OFFSET + clone_xprt->xp_msg_size; 464 465 /* 466 * Allocate an initial mblk for the response data. 467 */ 468 while (!(mp = allocb(len, BPRI_LO))) { 469 RPCLOG0(16, "svc_cots_ksend: allocb failed failed\n"); 470 if (strwaitbuf(len, BPRI_LO)) { 471 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 472 "svc_cots_ksend_end:(%S)", "strwaitbuf"); 473 RPCLOG0(1, 474 "svc_cots_ksend: strwaitbuf failed\n"); 475 goto out; 476 } 477 } 478 479 /* 480 * Initialize the XDR encode stream. Additional mblks 481 * will be allocated if necessary. They will be TIDU 482 * sized. 483 */ 484 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 485 mpsize = MBLKSIZE(mp); 486 ASSERT(mpsize >= len); 487 ASSERT(mp->b_rptr == mp->b_datap->db_base); 488 489 /* 490 * If the size of mblk is not appreciably larger than what we 491 * asked, then resize the mblk to exactly len bytes. Reason for 492 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 493 * (from TCP over ethernet), and the arguments to RPC require 494 * 2800 bytes. Ideally we want the protocol to render two 495 * ~1400 byte segments over the wire. If allocb() gives us a 2k 496 * mblk, and we allocate a second mblk for the rest, the 497 * protocol module may generate 3 segments over the wire: 498 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 499 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 500 * the XDR encoding will generate two ~1400 byte mblks, and the 501 * protocol module is more likely to produce properly sized 502 * segments. 503 */ 504 if ((mpsize >> 1) <= len) { 505 mp->b_rptr += (mpsize - len); 506 } 507 508 /* 509 * Adjust b_rptr to reserve space for the non-data protocol 510 * headers that any downstream modules might like to add, and 511 * for the record marking header. 512 */ 513 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 514 515 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 516 ASSERT(mp->b_wptr == mp->b_rptr); 517 518 msg->rm_xid = clone_xprt->xp_xid; 519 520 TRACE_0(TR_FAC_KRPC, TR_XDR_REPLYMSG_START, 521 "xdr_replymsg_start:"); 522 if (!(xdr_replymsg(xdrs, msg) && 523 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 524 xdr_results, xdr_location)))) { 525 XDR_DESTROY(xdrs); 526 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 527 "xdr_replymsg_end:(%S)", "bad"); 528 freemsg(mp); 529 RPCLOG0(1, "svc_cots_ksend: xdr_replymsg/SVCAUTH_WRAP " 530 "failed\n"); 531 goto out; 532 } 533 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 534 "xdr_replymsg_end:(%S)", "good"); 535 } 536 537 XDR_DESTROY(xdrs); 538 539 put(clone_xprt->xp_wq, mp); 540 retval = TRUE; 541 542 out: 543 /* 544 * This is completely disgusting. If public is set it is 545 * a pointer to a structure whose first field is the address 546 * of the function to free that structure and any related 547 * stuff. (see rrokfree in nfs_xdr.c). 548 */ 549 if (xdrs->x_public) { 550 /* LINTED pointer alignment */ 551 (**((int (**)())xdrs->x_public))(xdrs->x_public); 552 } 553 554 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 555 "svc_cots_ksend_end:(%S)", "done"); 556 return (retval); 557 } 558 559 /* 560 * Deserialize arguments. 561 */ 562 static bool_t 563 svc_cots_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 564 caddr_t args_ptr) 565 { 566 return (SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 567 xdr_args, args_ptr)); 568 } 569 570 static bool_t 571 svc_cots_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 572 caddr_t args_ptr) 573 { 574 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 575 /* LINTED pointer alignment */ 576 XDR *xdrs = &clone_xprt->xp_xdrin; 577 mblk_t *mp; 578 bool_t retval; 579 580 /* 581 * It is important to call the XDR routine before 582 * freeing the request mblk. Structures in the 583 * XDR data may point into the mblk and require that 584 * the memory be intact during the free routine. 585 */ 586 if (args_ptr) { 587 xdrs->x_op = XDR_FREE; 588 retval = (*xdr_args)(xdrs, args_ptr); 589 } else 590 retval = TRUE; 591 592 XDR_DESTROY(xdrs); 593 594 if ((mp = cd->cd_req_mp) != NULL) { 595 cd->cd_req_mp = (mblk_t *)0; 596 freemsg(mp); 597 } 598 599 return (retval); 600 } 601 602 static int32_t * 603 svc_cots_kgetres(SVCXPRT *clone_xprt, int size) 604 { 605 /* LINTED pointer alignment */ 606 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 607 XDR *xdrs = &clone_xprt->xp_xdrout; 608 mblk_t *mp; 609 int32_t *buf; 610 struct rpc_msg rply; 611 int len; 612 int mpsize; 613 614 /* 615 * Leave space for protocol headers. 616 */ 617 len = MSG_OFFSET + clone_xprt->xp_msg_size; 618 619 /* 620 * Allocate an initial mblk for the response data. 621 */ 622 while ((mp = allocb(len, BPRI_LO)) == NULL) { 623 if (strwaitbuf(len, BPRI_LO)) 624 return (NULL); 625 } 626 627 /* 628 * Initialize the XDR encode stream. Additional mblks 629 * will be allocated if necessary. They will be TIDU 630 * sized. 631 */ 632 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 633 mpsize = MBLKSIZE(mp); 634 ASSERT(mpsize >= len); 635 ASSERT(mp->b_rptr == mp->b_datap->db_base); 636 637 /* 638 * If the size of mblk is not appreciably larger than what we 639 * asked, then resize the mblk to exactly len bytes. Reason for 640 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 641 * (from TCP over ethernet), and the arguments to RPC require 642 * 2800 bytes. Ideally we want the protocol to render two 643 * ~1400 byte segments over the wire. If allocb() gives us a 2k 644 * mblk, and we allocate a second mblk for the rest, the 645 * protocol module may generate 3 segments over the wire: 646 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 647 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 648 * the XDR encoding will generate two ~1400 byte mblks, and the 649 * protocol module is more likely to produce properly sized 650 * segments. 651 */ 652 if ((mpsize >> 1) <= len) { 653 mp->b_rptr += (mpsize - len); 654 } 655 656 /* 657 * Adjust b_rptr to reserve space for the non-data protocol 658 * headers that any downstream modules might like to add, and 659 * for the record marking header. 660 */ 661 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 662 663 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 664 ASSERT(mp->b_wptr == mp->b_rptr); 665 666 /* 667 * Assume a successful RPC since most of them are. 668 */ 669 rply.rm_xid = clone_xprt->xp_xid; 670 rply.rm_direction = REPLY; 671 rply.rm_reply.rp_stat = MSG_ACCEPTED; 672 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 673 rply.acpted_rply.ar_stat = SUCCESS; 674 675 if (!xdr_replymsg_hdr(xdrs, &rply)) { 676 XDR_DESTROY(xdrs); 677 freeb(mp); 678 return (NULL); 679 } 680 681 buf = XDR_INLINE(xdrs, size); 682 if (buf == NULL) { 683 XDR_DESTROY(xdrs); 684 ASSERT(cd->cd_mp == NULL); 685 freemsg(mp); 686 } else { 687 cd->cd_mp = mp; 688 } 689 return (buf); 690 } 691 692 static void 693 svc_cots_kfreeres(SVCXPRT *clone_xprt) 694 { 695 cots_data_t *cd; 696 mblk_t *mp; 697 698 cd = (cots_data_t *)clone_xprt->xp_p2buf; 699 if ((mp = cd->cd_mp) != NULL) { 700 XDR_DESTROY(&clone_xprt->xp_xdrout); 701 cd->cd_mp = (mblk_t *)NULL; 702 freemsg(mp); 703 } 704 } 705 706 /* 707 * the dup cacheing routines below provide a cache of non-failure 708 * transaction id's. rpc service routines can use this to detect 709 * retransmissions and re-send a non-failure response. 710 */ 711 712 /* 713 * MAXDUPREQS is the number of cached items. It should be adjusted 714 * to the service load so that there is likely to be a response entry 715 * when the first retransmission comes in. 716 */ 717 #define MAXDUPREQS 8192 718 719 /* 720 * This should be appropriately scaled to MAXDUPREQS. 721 */ 722 #define DRHASHSZ 2053 723 724 #if ((DRHASHSZ & (DRHASHSZ - 1)) == 0) 725 #define XIDHASH(xid) ((xid) & (DRHASHSZ - 1)) 726 #else 727 #define XIDHASH(xid) ((xid) % DRHASHSZ) 728 #endif 729 #define DRHASH(dr) XIDHASH((dr)->dr_xid) 730 #define REQTOXID(req) ((req)->rq_xprt->xp_xid) 731 732 static int cotsndupreqs = 0; 733 int cotsmaxdupreqs = MAXDUPREQS; 734 static kmutex_t cotsdupreq_lock; 735 static struct dupreq *cotsdrhashtbl[DRHASHSZ]; 736 static int cotsdrhashstat[DRHASHSZ]; 737 738 static void unhash(struct dupreq *); 739 740 /* 741 * cotsdrmru points to the head of a circular linked list in lru order. 742 * cotsdrmru->dr_next == drlru 743 */ 744 struct dupreq *cotsdrmru; 745 746 /* 747 * PSARC 2003/523 Contract Private Interface 748 * svc_cots_kdup 749 * Changes must be reviewed by Solaris File Sharing 750 * Changes must be communicated to contract-2003-523@sun.com 751 * 752 * svc_cots_kdup searches the request cache and returns 0 if the 753 * request is not found in the cache. If it is found, then it 754 * returns the state of the request (in progress or done) and 755 * the status or attributes that were part of the original reply. 756 * 757 * If DUP_DONE (there is a duplicate) svc_cots_kdup copies over the 758 * value of the response. In that case, also return in *dupcachedp 759 * whether the response free routine is cached in the dupreq - in which case 760 * the caller should not be freeing it, because it will be done later 761 * in the svc_cots_kdup code when the dupreq is reused. 762 */ 763 static int 764 svc_cots_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 765 bool_t *dupcachedp) 766 { 767 struct rpc_cots_server *stats = CLONE2STATS(req->rq_xprt); 768 struct dupreq *dr; 769 uint32_t xid; 770 uint32_t drhash; 771 int status; 772 773 xid = REQTOXID(req); 774 mutex_enter(&cotsdupreq_lock); 775 RSSTAT_INCR(stats, rsdupchecks); 776 /* 777 * Check to see whether an entry already exists in the cache. 778 */ 779 dr = cotsdrhashtbl[XIDHASH(xid)]; 780 while (dr != NULL) { 781 if (dr->dr_xid == xid && 782 dr->dr_proc == req->rq_proc && 783 dr->dr_prog == req->rq_prog && 784 dr->dr_vers == req->rq_vers && 785 dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 786 bcmp((caddr_t)dr->dr_addr.buf, 787 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 788 dr->dr_addr.len) == 0) { 789 status = dr->dr_status; 790 if (status == DUP_DONE) { 791 bcopy(dr->dr_resp.buf, res, size); 792 if (dupcachedp != NULL) 793 *dupcachedp = (dr->dr_resfree != NULL); 794 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KDUP_DONE, 795 "svc_cots_kdup: DUP_DONE"); 796 } else { 797 dr->dr_status = DUP_INPROGRESS; 798 *drpp = dr; 799 TRACE_0(TR_FAC_KRPC, 800 TR_SVC_COTS_KDUP_INPROGRESS, 801 "svc_cots_kdup: DUP_INPROGRESS"); 802 } 803 RSSTAT_INCR(stats, rsdupreqs); 804 mutex_exit(&cotsdupreq_lock); 805 return (status); 806 } 807 dr = dr->dr_chain; 808 } 809 810 /* 811 * There wasn't an entry, either allocate a new one or recycle 812 * an old one. 813 */ 814 if (cotsndupreqs < cotsmaxdupreqs) { 815 dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 816 if (dr == NULL) { 817 mutex_exit(&cotsdupreq_lock); 818 return (DUP_ERROR); 819 } 820 dr->dr_resp.buf = NULL; 821 dr->dr_resp.maxlen = 0; 822 dr->dr_addr.buf = NULL; 823 dr->dr_addr.maxlen = 0; 824 if (cotsdrmru) { 825 dr->dr_next = cotsdrmru->dr_next; 826 cotsdrmru->dr_next = dr; 827 } else { 828 dr->dr_next = dr; 829 } 830 cotsndupreqs++; 831 } else { 832 dr = cotsdrmru->dr_next; 833 while (dr->dr_status == DUP_INPROGRESS) { 834 dr = dr->dr_next; 835 if (dr == cotsdrmru->dr_next) { 836 cmn_err(CE_WARN, "svc_cots_kdup no slots free"); 837 mutex_exit(&cotsdupreq_lock); 838 return (DUP_ERROR); 839 } 840 } 841 unhash(dr); 842 if (dr->dr_resfree) { 843 (*dr->dr_resfree)(dr->dr_resp.buf); 844 } 845 } 846 dr->dr_resfree = NULL; 847 cotsdrmru = dr; 848 849 dr->dr_xid = REQTOXID(req); 850 dr->dr_prog = req->rq_prog; 851 dr->dr_vers = req->rq_vers; 852 dr->dr_proc = req->rq_proc; 853 if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 854 if (dr->dr_addr.buf != NULL) 855 kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 856 dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 857 dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 858 if (dr->dr_addr.buf == NULL) { 859 dr->dr_addr.maxlen = 0; 860 dr->dr_status = DUP_DROP; 861 mutex_exit(&cotsdupreq_lock); 862 return (DUP_ERROR); 863 } 864 } 865 dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 866 bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 867 if (dr->dr_resp.maxlen < size) { 868 if (dr->dr_resp.buf != NULL) 869 kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 870 dr->dr_resp.maxlen = (unsigned int)size; 871 dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 872 if (dr->dr_resp.buf == NULL) { 873 dr->dr_resp.maxlen = 0; 874 dr->dr_status = DUP_DROP; 875 mutex_exit(&cotsdupreq_lock); 876 return (DUP_ERROR); 877 } 878 } 879 dr->dr_status = DUP_INPROGRESS; 880 881 drhash = (uint32_t)DRHASH(dr); 882 dr->dr_chain = cotsdrhashtbl[drhash]; 883 cotsdrhashtbl[drhash] = dr; 884 cotsdrhashstat[drhash]++; 885 mutex_exit(&cotsdupreq_lock); 886 *drpp = dr; 887 return (DUP_NEW); 888 } 889 890 /* 891 * PSARC 2003/523 Contract Private Interface 892 * svc_cots_kdupdone 893 * Changes must be reviewed by Solaris File Sharing 894 * Changes must be communicated to contract-2003-523@sun.com 895 * 896 * svc_cots_kdupdone marks the request done (DUP_DONE or DUP_DROP) 897 * and stores the response. 898 */ 899 static void 900 svc_cots_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 901 int size, int status) 902 { 903 ASSERT(dr->dr_resfree == NULL); 904 if (status == DUP_DONE) { 905 bcopy(res, dr->dr_resp.buf, size); 906 dr->dr_resfree = dis_resfree; 907 } 908 dr->dr_status = status; 909 } 910 911 /* 912 * This routine expects that the mutex, cotsdupreq_lock, is already held. 913 */ 914 static void 915 unhash(struct dupreq *dr) 916 { 917 struct dupreq *drt; 918 struct dupreq *drtprev = NULL; 919 uint32_t drhash; 920 921 ASSERT(MUTEX_HELD(&cotsdupreq_lock)); 922 923 drhash = (uint32_t)DRHASH(dr); 924 drt = cotsdrhashtbl[drhash]; 925 while (drt != NULL) { 926 if (drt == dr) { 927 cotsdrhashstat[drhash]--; 928 if (drtprev == NULL) { 929 cotsdrhashtbl[drhash] = drt->dr_chain; 930 } else { 931 drtprev->dr_chain = drt->dr_chain; 932 } 933 return; 934 } 935 drtprev = drt; 936 drt = drt->dr_chain; 937 } 938 } 939 940 void 941 svc_cots_stats_init(zoneid_t zoneid, struct rpc_cots_server **statsp) 942 { 943 *statsp = (struct rpc_cots_server *)rpcstat_zone_init_common(zoneid, 944 "unix", "rpc_cots_server", (const kstat_named_t *)&cots_rsstat_tmpl, 945 sizeof (cots_rsstat_tmpl)); 946 } 947 948 void 949 svc_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_server **statsp) 950 { 951 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_server"); 952 kmem_free(*statsp, sizeof (cots_rsstat_tmpl)); 953 } 954 955 void 956 svc_cots_init(void) 957 { 958 /* 959 * Check to make sure that the cots private data will fit into 960 * the stack buffer allocated by svc_run. The ASSERT is a safety 961 * net if the cots_data_t structure ever changes. 962 */ 963 /*CONSTANTCONDITION*/ 964 ASSERT(sizeof (cots_data_t) <= SVC_P2LEN); 965 966 mutex_init(&cots_kcreate_lock, NULL, MUTEX_DEFAULT, NULL); 967 mutex_init(&cotsdupreq_lock, NULL, MUTEX_DEFAULT, NULL); 968 } 969