1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (c) 2012 by Delphix. All rights reserved. 26 * Copyright 2012 Marcel Telka <marcel@telka.sk> 27 * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. 28 */ 29 30 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 31 /* All Rights Reserved */ 32 33 /* 34 * Portions of this source code were derived from Berkeley 4.3 BSD 35 * under license from the Regents of the University of California. 36 */ 37 38 /* 39 * svc_cots.c 40 * Server side for connection-oriented RPC in the kernel. 41 * 42 */ 43 44 #include <sys/param.h> 45 #include <sys/types.h> 46 #include <sys/sysmacros.h> 47 #include <sys/file.h> 48 #include <sys/stream.h> 49 #include <sys/strsubr.h> 50 #include <sys/strsun.h> 51 #include <sys/stropts.h> 52 #include <sys/tiuser.h> 53 #include <sys/timod.h> 54 #include <sys/tihdr.h> 55 #include <sys/fcntl.h> 56 #include <sys/errno.h> 57 #include <sys/kmem.h> 58 #include <sys/systm.h> 59 #include <sys/debug.h> 60 #include <sys/cmn_err.h> 61 #include <sys/kstat.h> 62 #include <sys/vtrace.h> 63 64 #include <rpc/types.h> 65 #include <rpc/xdr.h> 66 #include <rpc/auth.h> 67 #include <rpc/rpc_msg.h> 68 #include <rpc/svc.h> 69 #include <inet/ip.h> 70 71 #define COTS_MAX_ALLOCSIZE 2048 72 #define MSG_OFFSET 128 /* offset of call into the mblk */ 73 #define RM_HDR_SIZE 4 /* record mark header size */ 74 75 /* 76 * Routines exported through ops vector. 77 */ 78 static bool_t svc_cots_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 79 static bool_t svc_cots_ksend(SVCXPRT *, struct rpc_msg *); 80 static bool_t svc_cots_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 81 static bool_t svc_cots_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 82 static void svc_cots_kdestroy(SVCMASTERXPRT *); 83 static int svc_cots_kdup(struct svc_req *, caddr_t, int, 84 struct dupreq **, bool_t *); 85 static void svc_cots_kdupdone(struct dupreq *, caddr_t, 86 void (*)(), int, int); 87 static int32_t *svc_cots_kgetres(SVCXPRT *, int); 88 static void svc_cots_kfreeres(SVCXPRT *); 89 static void svc_cots_kclone_destroy(SVCXPRT *); 90 static void svc_cots_kstart(SVCMASTERXPRT *); 91 static void svc_cots_ktattrs(SVCXPRT *, int, void **); 92 93 /* 94 * Server transport operations vector. 95 */ 96 struct svc_ops svc_cots_op = { 97 svc_cots_krecv, /* Get requests */ 98 svc_cots_kgetargs, /* Deserialize arguments */ 99 svc_cots_ksend, /* Send reply */ 100 svc_cots_kfreeargs, /* Free argument data space */ 101 svc_cots_kdestroy, /* Destroy transport handle */ 102 svc_cots_kdup, /* Check entry in dup req cache */ 103 svc_cots_kdupdone, /* Mark entry in dup req cache as done */ 104 svc_cots_kgetres, /* Get pointer to response buffer */ 105 svc_cots_kfreeres, /* Destroy pre-serialized response header */ 106 svc_cots_kclone_destroy, /* Destroy a clone xprt */ 107 svc_cots_kstart, /* Tell `ready-to-receive' to rpcmod */ 108 NULL, /* Transport specific clone xprt */ 109 svc_cots_ktattrs, /* Transport Attributes */ 110 mir_svc_hold, /* Increment transport reference count */ 111 mir_svc_release /* Decrement transport reference count */ 112 }; 113 114 /* 115 * Master transport private data. 116 * Kept in xprt->xp_p2. 117 */ 118 struct cots_master_data { 119 char *cmd_src_addr; /* client's address */ 120 int cmd_xprt_started; /* flag for clone routine to call */ 121 /* rpcmod's start routine. */ 122 struct rpc_cots_server *cmd_stats; /* stats for zone */ 123 }; 124 125 /* 126 * Transport private data. 127 * Kept in clone_xprt->xp_p2buf. 128 */ 129 typedef struct cots_data { 130 mblk_t *cd_mp; /* pre-allocated reply message */ 131 mblk_t *cd_req_mp; /* request message */ 132 } cots_data_t; 133 134 /* 135 * Server statistics 136 * NOTE: This structure type is duplicated in the NFS fast path. 137 */ 138 static const struct rpc_cots_server { 139 kstat_named_t rscalls; 140 kstat_named_t rsbadcalls; 141 kstat_named_t rsnullrecv; 142 kstat_named_t rsbadlen; 143 kstat_named_t rsxdrcall; 144 kstat_named_t rsdupchecks; 145 kstat_named_t rsdupreqs; 146 } cots_rsstat_tmpl = { 147 { "calls", KSTAT_DATA_UINT64 }, 148 { "badcalls", KSTAT_DATA_UINT64 }, 149 { "nullrecv", KSTAT_DATA_UINT64 }, 150 { "badlen", KSTAT_DATA_UINT64 }, 151 { "xdrcall", KSTAT_DATA_UINT64 }, 152 { "dupchecks", KSTAT_DATA_UINT64 }, 153 { "dupreqs", KSTAT_DATA_UINT64 } 154 }; 155 156 #define CLONE2STATS(clone_xprt) \ 157 ((struct cots_master_data *)(clone_xprt)->xp_master->xp_p2)->cmd_stats 158 #define RSSTAT_INCR(s, x) \ 159 atomic_inc_64(&(s)->x.value.ui64) 160 161 /* 162 * Pointer to a transport specific `ready to receive' function in rpcmod 163 * (set from rpcmod). 164 */ 165 void (*mir_start)(queue_t *); 166 uint_t *svc_max_msg_sizep; 167 168 /* 169 * the address size of the underlying transport can sometimes be 170 * unknown (tinfo->ADDR_size == -1). For this case, it is 171 * necessary to figure out what the size is so the correct amount 172 * of data is allocated. This is an itterative process: 173 * 1. take a good guess (use T_MINADDRSIZE) 174 * 2. try it. 175 * 3. if it works then everything is ok 176 * 4. if the error is ENAMETOLONG, double the guess 177 * 5. go back to step 2. 178 */ 179 #define T_UNKNOWNADDRSIZE (-1) 180 #define T_MINADDRSIZE 32 181 182 /* 183 * Create a transport record. 184 * The transport record, output buffer, and private data structure 185 * are allocated. The output buffer is serialized into using xdrmem. 186 * There is one transport record per user process which implements a 187 * set of services. 188 */ 189 static kmutex_t cots_kcreate_lock; 190 191 int 192 svc_cots_kcreate(file_t *fp, uint_t max_msgsize, struct T_info_ack *tinfo, 193 SVCMASTERXPRT **nxprt) 194 { 195 struct cots_master_data *cmd; 196 int err, retval; 197 SVCMASTERXPRT *xprt; 198 struct rpcstat *rpcstat; 199 struct T_addr_ack *ack_p; 200 struct strioctl getaddr; 201 202 if (nxprt == NULL) 203 return (EINVAL); 204 205 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 206 ASSERT(rpcstat != NULL); 207 208 xprt = kmem_zalloc(sizeof (SVCMASTERXPRT), KM_SLEEP); 209 210 cmd = kmem_zalloc(sizeof (*cmd) + sizeof (*ack_p) 211 + (2 * sizeof (sin6_t)), KM_SLEEP); 212 213 ack_p = (struct T_addr_ack *)&cmd[1]; 214 215 if ((tinfo->TIDU_size > COTS_MAX_ALLOCSIZE) || 216 (tinfo->TIDU_size <= 0)) 217 xprt->xp_msg_size = COTS_MAX_ALLOCSIZE; 218 else { 219 xprt->xp_msg_size = tinfo->TIDU_size - 220 (tinfo->TIDU_size % BYTES_PER_XDR_UNIT); 221 } 222 223 xprt->xp_ops = &svc_cots_op; 224 xprt->xp_p2 = (caddr_t)cmd; 225 cmd->cmd_xprt_started = 0; 226 cmd->cmd_stats = rpcstat->rpc_cots_server; 227 228 getaddr.ic_cmd = TI_GETINFO; 229 getaddr.ic_timout = -1; 230 getaddr.ic_len = sizeof (*ack_p) + (2 * sizeof (sin6_t)); 231 getaddr.ic_dp = (char *)ack_p; 232 ack_p->PRIM_type = T_ADDR_REQ; 233 234 err = strioctl(fp->f_vnode, I_STR, (intptr_t)&getaddr, 235 0, K_TO_K, CRED(), &retval); 236 if (err) { 237 kmem_free(cmd, sizeof (*cmd) + sizeof (*ack_p) + 238 (2 * sizeof (sin6_t))); 239 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 240 return (err); 241 } 242 243 xprt->xp_rtaddr.maxlen = ack_p->REMADDR_length; 244 xprt->xp_rtaddr.len = ack_p->REMADDR_length; 245 cmd->cmd_src_addr = xprt->xp_rtaddr.buf = 246 (char *)ack_p + ack_p->REMADDR_offset; 247 248 xprt->xp_lcladdr.maxlen = ack_p->LOCADDR_length; 249 xprt->xp_lcladdr.len = ack_p->LOCADDR_length; 250 xprt->xp_lcladdr.buf = (char *)ack_p + ack_p->LOCADDR_offset; 251 252 /* 253 * If the current sanity check size in rpcmod is smaller 254 * than the size needed for this xprt, then increase 255 * the sanity check. 256 */ 257 if (max_msgsize != 0 && svc_max_msg_sizep && 258 max_msgsize > *svc_max_msg_sizep) { 259 260 /* This check needs a lock */ 261 mutex_enter(&cots_kcreate_lock); 262 if (svc_max_msg_sizep && max_msgsize > *svc_max_msg_sizep) 263 *svc_max_msg_sizep = max_msgsize; 264 mutex_exit(&cots_kcreate_lock); 265 } 266 267 *nxprt = xprt; 268 269 return (0); 270 } 271 272 /* 273 * Destroy a master transport record. 274 * Frees the space allocated for a transport record. 275 */ 276 static void 277 svc_cots_kdestroy(SVCMASTERXPRT *xprt) 278 { 279 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 280 281 ASSERT(cmd); 282 283 if (xprt->xp_netid) 284 kmem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); 285 if (xprt->xp_addrmask.maxlen) 286 kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen); 287 288 mutex_destroy(&xprt->xp_req_lock); 289 mutex_destroy(&xprt->xp_thread_lock); 290 291 kmem_free(cmd, sizeof (*cmd) + sizeof (struct T_addr_ack) + 292 (2 * sizeof (sin6_t))); 293 294 kmem_free(xprt, sizeof (SVCMASTERXPRT)); 295 } 296 297 /* 298 * svc_tli_kcreate() calls this function at the end to tell 299 * rpcmod that the transport is ready to receive requests. 300 */ 301 static void 302 svc_cots_kstart(SVCMASTERXPRT *xprt) 303 { 304 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2; 305 306 if (cmd->cmd_xprt_started == 0) { 307 /* 308 * Acquire the xp_req_lock in order to use xp_wq 309 * safely (we don't want to qenable a queue that has 310 * already been closed). 311 */ 312 mutex_enter(&xprt->xp_req_lock); 313 if (cmd->cmd_xprt_started == 0 && 314 xprt->xp_wq != NULL) { 315 (*mir_start)(xprt->xp_wq); 316 cmd->cmd_xprt_started = 1; 317 } 318 mutex_exit(&xprt->xp_req_lock); 319 } 320 } 321 322 /* 323 * Transport-type specific part of svc_xprt_cleanup(). 324 */ 325 static void 326 svc_cots_kclone_destroy(SVCXPRT *clone_xprt) 327 { 328 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 329 330 if (cd->cd_req_mp) { 331 freemsg(cd->cd_req_mp); 332 cd->cd_req_mp = (mblk_t *)0; 333 } 334 ASSERT(cd->cd_mp == NULL); 335 } 336 337 /* 338 * Transport Attributes. 339 */ 340 static void 341 svc_cots_ktattrs(SVCXPRT *clone_xprt, int attrflag, void **tattr) 342 { 343 *tattr = NULL; 344 345 switch (attrflag) { 346 case SVC_TATTR_ADDRMASK: 347 *tattr = (void *)&clone_xprt->xp_master->xp_addrmask; 348 } 349 } 350 351 /* 352 * Receive rpc requests. 353 * Checks if the message is intact, and deserializes the call packet. 354 */ 355 static bool_t 356 svc_cots_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 357 { 358 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 359 XDR *xdrs = &clone_xprt->xp_xdrin; 360 struct rpc_cots_server *stats = CLONE2STATS(clone_xprt); 361 362 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KRECV_START, 363 "svc_cots_krecv_start:"); 364 RPCLOG(4, "svc_cots_krecv_start clone_xprt = %p:\n", 365 (void *)clone_xprt); 366 367 RSSTAT_INCR(stats, rscalls); 368 369 if (mp->b_datap->db_type != M_DATA) { 370 RPCLOG(16, "svc_cots_krecv bad db_type %d\n", 371 mp->b_datap->db_type); 372 goto bad; 373 } 374 375 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 376 377 TRACE_0(TR_FAC_KRPC, TR_XDR_CALLMSG_START, 378 "xdr_callmsg_start:"); 379 RPCLOG0(4, "xdr_callmsg_start:\n"); 380 if (!xdr_callmsg(xdrs, msg)) { 381 XDR_DESTROY(xdrs); 382 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 383 "xdr_callmsg_end:(%S)", "bad"); 384 RPCLOG0(1, "svc_cots_krecv xdr_callmsg failure\n"); 385 RSSTAT_INCR(stats, rsxdrcall); 386 goto bad; 387 } 388 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END, 389 "xdr_callmsg_end:(%S)", "good"); 390 391 clone_xprt->xp_xid = msg->rm_xid; 392 cd->cd_req_mp = mp; 393 394 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 395 "svc_cots_krecv_end:(%S)", "good"); 396 RPCLOG0(4, "svc_cots_krecv_end:good\n"); 397 return (TRUE); 398 399 bad: 400 if (mp) 401 freemsg(mp); 402 403 RSSTAT_INCR(stats, rsbadcalls); 404 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END, 405 "svc_cots_krecv_end:(%S)", "bad"); 406 return (FALSE); 407 } 408 409 /* 410 * Send rpc reply. 411 */ 412 static bool_t 413 svc_cots_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg) 414 { 415 /* LINTED pointer alignment */ 416 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 417 XDR *xdrs = &(clone_xprt->xp_xdrout); 418 int retval = FALSE; 419 mblk_t *mp; 420 xdrproc_t xdr_results; 421 caddr_t xdr_location; 422 bool_t has_args; 423 424 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KSEND_START, 425 "svc_cots_ksend_start:"); 426 427 /* 428 * If there is a result procedure specified in the reply message, 429 * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 430 * We need to make sure it won't be processed twice, so we null 431 * it for xdr_replymsg here. 432 */ 433 has_args = FALSE; 434 if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 435 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 436 if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 437 has_args = TRUE; 438 xdr_location = msg->acpted_rply.ar_results.where; 439 msg->acpted_rply.ar_results.proc = xdr_void; 440 msg->acpted_rply.ar_results.where = NULL; 441 } 442 } 443 444 mp = cd->cd_mp; 445 if (mp) { 446 /* 447 * The program above pre-allocated an mblk and put 448 * the data in place. 449 */ 450 cd->cd_mp = (mblk_t *)NULL; 451 if (!(xdr_replymsg_body(xdrs, msg) && 452 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 453 xdr_results, xdr_location)))) { 454 XDR_DESTROY(xdrs); 455 RPCLOG0(1, "svc_cots_ksend: " 456 "xdr_replymsg_body/SVCAUTH_WRAP failed\n"); 457 freemsg(mp); 458 goto out; 459 } 460 } else { 461 int len; 462 int mpsize; 463 464 /* 465 * Leave space for protocol headers. 466 */ 467 len = MSG_OFFSET + clone_xprt->xp_msg_size; 468 469 /* 470 * Allocate an initial mblk for the response data. 471 */ 472 while (!(mp = allocb(len, BPRI_LO))) { 473 RPCLOG0(16, "svc_cots_ksend: allocb failed failed\n"); 474 if (strwaitbuf(len, BPRI_LO)) { 475 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 476 "svc_cots_ksend_end:(%S)", "strwaitbuf"); 477 RPCLOG0(1, 478 "svc_cots_ksend: strwaitbuf failed\n"); 479 goto out; 480 } 481 } 482 483 /* 484 * Initialize the XDR encode stream. Additional mblks 485 * will be allocated if necessary. They will be TIDU 486 * sized. 487 */ 488 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 489 mpsize = MBLKSIZE(mp); 490 ASSERT(mpsize >= len); 491 ASSERT(mp->b_rptr == mp->b_datap->db_base); 492 493 /* 494 * If the size of mblk is not appreciably larger than what we 495 * asked, then resize the mblk to exactly len bytes. Reason for 496 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 497 * (from TCP over ethernet), and the arguments to RPC require 498 * 2800 bytes. Ideally we want the protocol to render two 499 * ~1400 byte segments over the wire. If allocb() gives us a 2k 500 * mblk, and we allocate a second mblk for the rest, the 501 * protocol module may generate 3 segments over the wire: 502 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 503 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 504 * the XDR encoding will generate two ~1400 byte mblks, and the 505 * protocol module is more likely to produce properly sized 506 * segments. 507 */ 508 if ((mpsize >> 1) <= len) { 509 mp->b_rptr += (mpsize - len); 510 } 511 512 /* 513 * Adjust b_rptr to reserve space for the non-data protocol 514 * headers that any downstream modules might like to add, and 515 * for the record marking header. 516 */ 517 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 518 519 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 520 ASSERT(mp->b_wptr == mp->b_rptr); 521 522 msg->rm_xid = clone_xprt->xp_xid; 523 524 TRACE_0(TR_FAC_KRPC, TR_XDR_REPLYMSG_START, 525 "xdr_replymsg_start:"); 526 if (!(xdr_replymsg(xdrs, msg) && 527 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 528 xdr_results, xdr_location)))) { 529 XDR_DESTROY(xdrs); 530 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 531 "xdr_replymsg_end:(%S)", "bad"); 532 freemsg(mp); 533 RPCLOG0(1, "svc_cots_ksend: xdr_replymsg/SVCAUTH_WRAP " 534 "failed\n"); 535 goto out; 536 } 537 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END, 538 "xdr_replymsg_end:(%S)", "good"); 539 } 540 541 XDR_DESTROY(xdrs); 542 543 put(clone_xprt->xp_wq, mp); 544 retval = TRUE; 545 546 out: 547 /* 548 * This is completely disgusting. If public is set it is 549 * a pointer to a structure whose first field is the address 550 * of the function to free that structure and any related 551 * stuff. (see rrokfree in nfs_xdr.c). 552 */ 553 if (xdrs->x_public) { 554 /* LINTED pointer alignment */ 555 (**((int (**)())xdrs->x_public))(xdrs->x_public); 556 } 557 558 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END, 559 "svc_cots_ksend_end:(%S)", "done"); 560 return (retval); 561 } 562 563 /* 564 * Deserialize arguments. 565 */ 566 static bool_t 567 svc_cots_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 568 caddr_t args_ptr) 569 { 570 return (SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 571 xdr_args, args_ptr)); 572 } 573 574 static bool_t 575 svc_cots_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 576 caddr_t args_ptr) 577 { 578 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 579 /* LINTED pointer alignment */ 580 XDR *xdrs = &clone_xprt->xp_xdrin; 581 mblk_t *mp; 582 bool_t retval; 583 584 /* 585 * It is important to call the XDR routine before 586 * freeing the request mblk. Structures in the 587 * XDR data may point into the mblk and require that 588 * the memory be intact during the free routine. 589 */ 590 if (args_ptr) { 591 xdrs->x_op = XDR_FREE; 592 retval = (*xdr_args)(xdrs, args_ptr); 593 } else 594 retval = TRUE; 595 596 XDR_DESTROY(xdrs); 597 598 if ((mp = cd->cd_req_mp) != NULL) { 599 cd->cd_req_mp = (mblk_t *)0; 600 freemsg(mp); 601 } 602 603 return (retval); 604 } 605 606 static int32_t * 607 svc_cots_kgetres(SVCXPRT *clone_xprt, int size) 608 { 609 /* LINTED pointer alignment */ 610 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf; 611 XDR *xdrs = &clone_xprt->xp_xdrout; 612 mblk_t *mp; 613 int32_t *buf; 614 struct rpc_msg rply; 615 int len; 616 int mpsize; 617 618 /* 619 * Leave space for protocol headers. 620 */ 621 len = MSG_OFFSET + clone_xprt->xp_msg_size; 622 623 /* 624 * Allocate an initial mblk for the response data. 625 */ 626 while ((mp = allocb(len, BPRI_LO)) == NULL) { 627 if (strwaitbuf(len, BPRI_LO)) 628 return (NULL); 629 } 630 631 /* 632 * Initialize the XDR encode stream. Additional mblks 633 * will be allocated if necessary. They will be TIDU 634 * sized. 635 */ 636 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size); 637 mpsize = MBLKSIZE(mp); 638 ASSERT(mpsize >= len); 639 ASSERT(mp->b_rptr == mp->b_datap->db_base); 640 641 /* 642 * If the size of mblk is not appreciably larger than what we 643 * asked, then resize the mblk to exactly len bytes. Reason for 644 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 645 * (from TCP over ethernet), and the arguments to RPC require 646 * 2800 bytes. Ideally we want the protocol to render two 647 * ~1400 byte segments over the wire. If allocb() gives us a 2k 648 * mblk, and we allocate a second mblk for the rest, the 649 * protocol module may generate 3 segments over the wire: 650 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and 651 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk, 652 * the XDR encoding will generate two ~1400 byte mblks, and the 653 * protocol module is more likely to produce properly sized 654 * segments. 655 */ 656 if ((mpsize >> 1) <= len) { 657 mp->b_rptr += (mpsize - len); 658 } 659 660 /* 661 * Adjust b_rptr to reserve space for the non-data protocol 662 * headers that any downstream modules might like to add, and 663 * for the record marking header. 664 */ 665 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 666 667 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 668 ASSERT(mp->b_wptr == mp->b_rptr); 669 670 /* 671 * Assume a successful RPC since most of them are. 672 */ 673 rply.rm_xid = clone_xprt->xp_xid; 674 rply.rm_direction = REPLY; 675 rply.rm_reply.rp_stat = MSG_ACCEPTED; 676 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 677 rply.acpted_rply.ar_stat = SUCCESS; 678 679 if (!xdr_replymsg_hdr(xdrs, &rply)) { 680 XDR_DESTROY(xdrs); 681 freeb(mp); 682 return (NULL); 683 } 684 685 buf = XDR_INLINE(xdrs, size); 686 if (buf == NULL) { 687 XDR_DESTROY(xdrs); 688 ASSERT(cd->cd_mp == NULL); 689 freemsg(mp); 690 } else { 691 cd->cd_mp = mp; 692 } 693 return (buf); 694 } 695 696 static void 697 svc_cots_kfreeres(SVCXPRT *clone_xprt) 698 { 699 cots_data_t *cd; 700 mblk_t *mp; 701 702 cd = (cots_data_t *)clone_xprt->xp_p2buf; 703 if ((mp = cd->cd_mp) != NULL) { 704 XDR_DESTROY(&clone_xprt->xp_xdrout); 705 cd->cd_mp = (mblk_t *)NULL; 706 freemsg(mp); 707 } 708 } 709 710 /* 711 * the dup cacheing routines below provide a cache of non-failure 712 * transaction id's. rpc service routines can use this to detect 713 * retransmissions and re-send a non-failure response. 714 */ 715 716 /* 717 * MAXDUPREQS is the number of cached items. It should be adjusted 718 * to the service load so that there is likely to be a response entry 719 * when the first retransmission comes in. 720 */ 721 #define MAXDUPREQS 8192 722 723 /* 724 * This should be appropriately scaled to MAXDUPREQS. To produce as less as 725 * possible collisions it is suggested to set this to a prime. 726 */ 727 #define DRHASHSZ 2053 728 729 #define XIDHASH(xid) ((xid) % DRHASHSZ) 730 #define DRHASH(dr) XIDHASH((dr)->dr_xid) 731 #define REQTOXID(req) ((req)->rq_xprt->xp_xid) 732 733 static int cotsndupreqs = 0; 734 int cotsmaxdupreqs = MAXDUPREQS; 735 static kmutex_t cotsdupreq_lock; 736 static struct dupreq *cotsdrhashtbl[DRHASHSZ]; 737 static int cotsdrhashstat[DRHASHSZ]; 738 739 static void unhash(struct dupreq *); 740 741 /* 742 * cotsdrmru points to the head of a circular linked list in lru order. 743 * cotsdrmru->dr_next == drlru 744 */ 745 struct dupreq *cotsdrmru; 746 747 /* 748 * PSARC 2003/523 Contract Private Interface 749 * svc_cots_kdup 750 * Changes must be reviewed by Solaris File Sharing 751 * Changes must be communicated to contract-2003-523@sun.com 752 * 753 * svc_cots_kdup searches the request cache and returns 0 if the 754 * request is not found in the cache. If it is found, then it 755 * returns the state of the request (in progress or done) and 756 * the status or attributes that were part of the original reply. 757 * 758 * If DUP_DONE (there is a duplicate) svc_cots_kdup copies over the 759 * value of the response. In that case, also return in *dupcachedp 760 * whether the response free routine is cached in the dupreq - in which case 761 * the caller should not be freeing it, because it will be done later 762 * in the svc_cots_kdup code when the dupreq is reused. 763 */ 764 static int 765 svc_cots_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 766 bool_t *dupcachedp) 767 { 768 struct rpc_cots_server *stats = CLONE2STATS(req->rq_xprt); 769 struct dupreq *dr; 770 uint32_t xid; 771 uint32_t drhash; 772 int status; 773 774 xid = REQTOXID(req); 775 mutex_enter(&cotsdupreq_lock); 776 RSSTAT_INCR(stats, rsdupchecks); 777 /* 778 * Check to see whether an entry already exists in the cache. 779 */ 780 dr = cotsdrhashtbl[XIDHASH(xid)]; 781 while (dr != NULL) { 782 if (dr->dr_xid == xid && 783 dr->dr_proc == req->rq_proc && 784 dr->dr_prog == req->rq_prog && 785 dr->dr_vers == req->rq_vers && 786 dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 787 bcmp((caddr_t)dr->dr_addr.buf, 788 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 789 dr->dr_addr.len) == 0) { 790 status = dr->dr_status; 791 if (status == DUP_DONE) { 792 bcopy(dr->dr_resp.buf, res, size); 793 if (dupcachedp != NULL) 794 *dupcachedp = (dr->dr_resfree != NULL); 795 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KDUP_DONE, 796 "svc_cots_kdup: DUP_DONE"); 797 } else { 798 dr->dr_status = DUP_INPROGRESS; 799 *drpp = dr; 800 TRACE_0(TR_FAC_KRPC, 801 TR_SVC_COTS_KDUP_INPROGRESS, 802 "svc_cots_kdup: DUP_INPROGRESS"); 803 } 804 RSSTAT_INCR(stats, rsdupreqs); 805 mutex_exit(&cotsdupreq_lock); 806 return (status); 807 } 808 dr = dr->dr_chain; 809 } 810 811 /* 812 * There wasn't an entry, either allocate a new one or recycle 813 * an old one. 814 */ 815 if (cotsndupreqs < cotsmaxdupreqs) { 816 dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 817 if (dr == NULL) { 818 mutex_exit(&cotsdupreq_lock); 819 return (DUP_ERROR); 820 } 821 dr->dr_resp.buf = NULL; 822 dr->dr_resp.maxlen = 0; 823 dr->dr_addr.buf = NULL; 824 dr->dr_addr.maxlen = 0; 825 if (cotsdrmru) { 826 dr->dr_next = cotsdrmru->dr_next; 827 cotsdrmru->dr_next = dr; 828 } else { 829 dr->dr_next = dr; 830 } 831 cotsndupreqs++; 832 } else { 833 dr = cotsdrmru->dr_next; 834 while (dr->dr_status == DUP_INPROGRESS) { 835 dr = dr->dr_next; 836 if (dr == cotsdrmru->dr_next) { 837 cmn_err(CE_WARN, "svc_cots_kdup no slots free"); 838 mutex_exit(&cotsdupreq_lock); 839 return (DUP_ERROR); 840 } 841 } 842 unhash(dr); 843 if (dr->dr_resfree) { 844 (*dr->dr_resfree)(dr->dr_resp.buf); 845 } 846 } 847 dr->dr_resfree = NULL; 848 cotsdrmru = dr; 849 850 dr->dr_xid = REQTOXID(req); 851 dr->dr_prog = req->rq_prog; 852 dr->dr_vers = req->rq_vers; 853 dr->dr_proc = req->rq_proc; 854 if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 855 if (dr->dr_addr.buf != NULL) 856 kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 857 dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 858 dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 859 if (dr->dr_addr.buf == NULL) { 860 dr->dr_addr.maxlen = 0; 861 dr->dr_status = DUP_DROP; 862 mutex_exit(&cotsdupreq_lock); 863 return (DUP_ERROR); 864 } 865 } 866 dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 867 bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 868 if (dr->dr_resp.maxlen < size) { 869 if (dr->dr_resp.buf != NULL) 870 kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 871 dr->dr_resp.maxlen = (unsigned int)size; 872 dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 873 if (dr->dr_resp.buf == NULL) { 874 dr->dr_resp.maxlen = 0; 875 dr->dr_status = DUP_DROP; 876 mutex_exit(&cotsdupreq_lock); 877 return (DUP_ERROR); 878 } 879 } 880 dr->dr_status = DUP_INPROGRESS; 881 882 drhash = (uint32_t)DRHASH(dr); 883 dr->dr_chain = cotsdrhashtbl[drhash]; 884 cotsdrhashtbl[drhash] = dr; 885 cotsdrhashstat[drhash]++; 886 mutex_exit(&cotsdupreq_lock); 887 *drpp = dr; 888 return (DUP_NEW); 889 } 890 891 /* 892 * PSARC 2003/523 Contract Private Interface 893 * svc_cots_kdupdone 894 * Changes must be reviewed by Solaris File Sharing 895 * Changes must be communicated to contract-2003-523@sun.com 896 * 897 * svc_cots_kdupdone marks the request done (DUP_DONE or DUP_DROP) 898 * and stores the response. 899 */ 900 static void 901 svc_cots_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 902 int size, int status) 903 { 904 ASSERT(dr->dr_resfree == NULL); 905 if (status == DUP_DONE) { 906 bcopy(res, dr->dr_resp.buf, size); 907 dr->dr_resfree = dis_resfree; 908 } 909 dr->dr_status = status; 910 } 911 912 /* 913 * This routine expects that the mutex, cotsdupreq_lock, is already held. 914 */ 915 static void 916 unhash(struct dupreq *dr) 917 { 918 struct dupreq *drt; 919 struct dupreq *drtprev = NULL; 920 uint32_t drhash; 921 922 ASSERT(MUTEX_HELD(&cotsdupreq_lock)); 923 924 drhash = (uint32_t)DRHASH(dr); 925 drt = cotsdrhashtbl[drhash]; 926 while (drt != NULL) { 927 if (drt == dr) { 928 cotsdrhashstat[drhash]--; 929 if (drtprev == NULL) { 930 cotsdrhashtbl[drhash] = drt->dr_chain; 931 } else { 932 drtprev->dr_chain = drt->dr_chain; 933 } 934 return; 935 } 936 drtprev = drt; 937 drt = drt->dr_chain; 938 } 939 } 940 941 void 942 svc_cots_stats_init(zoneid_t zoneid, struct rpc_cots_server **statsp) 943 { 944 *statsp = (struct rpc_cots_server *)rpcstat_zone_init_common(zoneid, 945 "unix", "rpc_cots_server", (const kstat_named_t *)&cots_rsstat_tmpl, 946 sizeof (cots_rsstat_tmpl)); 947 } 948 949 void 950 svc_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_server **statsp) 951 { 952 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_server"); 953 kmem_free(*statsp, sizeof (cots_rsstat_tmpl)); 954 } 955 956 void 957 svc_cots_init(void) 958 { 959 /* 960 * Check to make sure that the cots private data will fit into 961 * the stack buffer allocated by svc_run. The ASSERT is a safety 962 * net if the cots_data_t structure ever changes. 963 */ 964 /*CONSTANTCONDITION*/ 965 ASSERT(sizeof (cots_data_t) <= SVC_P2LEN); 966 967 mutex_init(&cots_kcreate_lock, NULL, MUTEX_DEFAULT, NULL); 968 mutex_init(&cotsdupreq_lock, NULL, MUTEX_DEFAULT, NULL); 969 } 970