1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2012 by Delphix. All rights reserved.
26 * Copyright 2012 Marcel Telka <marcel@telka.sk>
27 * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
28 */
29
30 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32
33 /*
34 * Portions of this source code were derived from Berkeley 4.3 BSD
35 * under license from the Regents of the University of California.
36 */
37
38 /*
39 * svc_cots.c
40 * Server side for connection-oriented RPC in the kernel.
41 *
42 */
43
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/sysmacros.h>
47 #include <sys/file.h>
48 #include <sys/stream.h>
49 #include <sys/strsubr.h>
50 #include <sys/strsun.h>
51 #include <sys/stropts.h>
52 #include <sys/tiuser.h>
53 #include <sys/timod.h>
54 #include <sys/tihdr.h>
55 #include <sys/fcntl.h>
56 #include <sys/errno.h>
57 #include <sys/kmem.h>
58 #include <sys/systm.h>
59 #include <sys/debug.h>
60 #include <sys/cmn_err.h>
61 #include <sys/kstat.h>
62 #include <sys/vtrace.h>
63
64 #include <rpc/types.h>
65 #include <rpc/xdr.h>
66 #include <rpc/auth.h>
67 #include <rpc/rpc_msg.h>
68 #include <rpc/svc.h>
69 #include <inet/ip.h>
70
71 #define COTS_MAX_ALLOCSIZE 2048
72 #define MSG_OFFSET 128 /* offset of call into the mblk */
73 #define RM_HDR_SIZE 4 /* record mark header size */
74
75 /*
76 * Routines exported through ops vector.
77 */
78 static bool_t svc_cots_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *);
79 static bool_t svc_cots_ksend(SVCXPRT *, struct rpc_msg *);
80 static bool_t svc_cots_kgetargs(SVCXPRT *, xdrproc_t, caddr_t);
81 static bool_t svc_cots_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t);
82 static void svc_cots_kdestroy(SVCMASTERXPRT *);
83 static int svc_cots_kdup(struct svc_req *, caddr_t, int,
84 struct dupreq **, bool_t *);
85 static void svc_cots_kdupdone(struct dupreq *, caddr_t,
86 void (*)(), int, int);
87 static int32_t *svc_cots_kgetres(SVCXPRT *, int);
88 static void svc_cots_kfreeres(SVCXPRT *);
89 static void svc_cots_kclone_destroy(SVCXPRT *);
90 static void svc_cots_kstart(SVCMASTERXPRT *);
91 static void svc_cots_ktattrs(SVCXPRT *, int, void **);
92
93 /*
94 * Server transport operations vector.
95 */
96 struct svc_ops svc_cots_op = {
97 svc_cots_krecv, /* Get requests */
98 svc_cots_kgetargs, /* Deserialize arguments */
99 svc_cots_ksend, /* Send reply */
100 svc_cots_kfreeargs, /* Free argument data space */
101 svc_cots_kdestroy, /* Destroy transport handle */
102 svc_cots_kdup, /* Check entry in dup req cache */
103 svc_cots_kdupdone, /* Mark entry in dup req cache as done */
104 svc_cots_kgetres, /* Get pointer to response buffer */
105 svc_cots_kfreeres, /* Destroy pre-serialized response header */
106 svc_cots_kclone_destroy, /* Destroy a clone xprt */
107 svc_cots_kstart, /* Tell `ready-to-receive' to rpcmod */
108 NULL, /* Transport specific clone xprt */
109 svc_cots_ktattrs, /* Transport Attributes */
110 mir_svc_hold, /* Increment transport reference count */
111 mir_svc_release /* Decrement transport reference count */
112 };
113
114 /*
115 * Master transport private data.
116 * Kept in xprt->xp_p2.
117 */
118 struct cots_master_data {
119 char *cmd_src_addr; /* client's address */
120 int cmd_xprt_started; /* flag for clone routine to call */
121 /* rpcmod's start routine. */
122 struct rpc_cots_server *cmd_stats; /* stats for zone */
123 };
124
125 /*
126 * Transport private data.
127 * Kept in clone_xprt->xp_p2buf.
128 */
129 typedef struct cots_data {
130 mblk_t *cd_mp; /* pre-allocated reply message */
131 mblk_t *cd_req_mp; /* request message */
132 } cots_data_t;
133
134 /*
135 * Server statistics
136 * NOTE: This structure type is duplicated in the NFS fast path.
137 */
138 static const struct rpc_cots_server {
139 kstat_named_t rscalls;
140 kstat_named_t rsbadcalls;
141 kstat_named_t rsnullrecv;
142 kstat_named_t rsbadlen;
143 kstat_named_t rsxdrcall;
144 kstat_named_t rsdupchecks;
145 kstat_named_t rsdupreqs;
146 } cots_rsstat_tmpl = {
147 { "calls", KSTAT_DATA_UINT64 },
148 { "badcalls", KSTAT_DATA_UINT64 },
149 { "nullrecv", KSTAT_DATA_UINT64 },
150 { "badlen", KSTAT_DATA_UINT64 },
151 { "xdrcall", KSTAT_DATA_UINT64 },
152 { "dupchecks", KSTAT_DATA_UINT64 },
153 { "dupreqs", KSTAT_DATA_UINT64 }
154 };
155
156 #define CLONE2STATS(clone_xprt) \
157 ((struct cots_master_data *)(clone_xprt)->xp_master->xp_p2)->cmd_stats
158 #define RSSTAT_INCR(s, x) \
159 atomic_inc_64(&(s)->x.value.ui64)
160
161 /*
162 * Pointer to a transport specific `ready to receive' function in rpcmod
163 * (set from rpcmod).
164 */
165 void (*mir_start)(queue_t *);
166 uint_t *svc_max_msg_sizep;
167
168 /*
169 * the address size of the underlying transport can sometimes be
170 * unknown (tinfo->ADDR_size == -1). For this case, it is
171 * necessary to figure out what the size is so the correct amount
172 * of data is allocated. This is an itterative process:
173 * 1. take a good guess (use T_MINADDRSIZE)
174 * 2. try it.
175 * 3. if it works then everything is ok
176 * 4. if the error is ENAMETOLONG, double the guess
177 * 5. go back to step 2.
178 */
179 #define T_UNKNOWNADDRSIZE (-1)
180 #define T_MINADDRSIZE 32
181
182 /*
183 * Create a transport record.
184 * The transport record, output buffer, and private data structure
185 * are allocated. The output buffer is serialized into using xdrmem.
186 * There is one transport record per user process which implements a
187 * set of services.
188 */
189 static kmutex_t cots_kcreate_lock;
190
191 int
svc_cots_kcreate(file_t * fp,uint_t max_msgsize,struct T_info_ack * tinfo,SVCMASTERXPRT ** nxprt)192 svc_cots_kcreate(file_t *fp, uint_t max_msgsize, struct T_info_ack *tinfo,
193 SVCMASTERXPRT **nxprt)
194 {
195 struct cots_master_data *cmd;
196 int err, retval;
197 SVCMASTERXPRT *xprt;
198 struct rpcstat *rpcstat;
199 struct T_addr_ack *ack_p;
200 struct strioctl getaddr;
201
202 if (nxprt == NULL)
203 return (EINVAL);
204
205 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone);
206 ASSERT(rpcstat != NULL);
207
208 xprt = kmem_zalloc(sizeof (SVCMASTERXPRT), KM_SLEEP);
209
210 cmd = kmem_zalloc(sizeof (*cmd) + sizeof (*ack_p)
211 + (2 * sizeof (sin6_t)), KM_SLEEP);
212
213 ack_p = (struct T_addr_ack *)&cmd[1];
214
215 if ((tinfo->TIDU_size > COTS_MAX_ALLOCSIZE) ||
216 (tinfo->TIDU_size <= 0))
217 xprt->xp_msg_size = COTS_MAX_ALLOCSIZE;
218 else {
219 xprt->xp_msg_size = tinfo->TIDU_size -
220 (tinfo->TIDU_size % BYTES_PER_XDR_UNIT);
221 }
222
223 xprt->xp_ops = &svc_cots_op;
224 xprt->xp_p2 = (caddr_t)cmd;
225 cmd->cmd_xprt_started = 0;
226 cmd->cmd_stats = rpcstat->rpc_cots_server;
227
228 getaddr.ic_cmd = TI_GETINFO;
229 getaddr.ic_timout = -1;
230 getaddr.ic_len = sizeof (*ack_p) + (2 * sizeof (sin6_t));
231 getaddr.ic_dp = (char *)ack_p;
232 ack_p->PRIM_type = T_ADDR_REQ;
233
234 err = strioctl(fp->f_vnode, I_STR, (intptr_t)&getaddr,
235 0, K_TO_K, CRED(), &retval);
236 if (err) {
237 kmem_free(cmd, sizeof (*cmd) + sizeof (*ack_p) +
238 (2 * sizeof (sin6_t)));
239 kmem_free(xprt, sizeof (SVCMASTERXPRT));
240 return (err);
241 }
242
243 xprt->xp_rtaddr.maxlen = ack_p->REMADDR_length;
244 xprt->xp_rtaddr.len = ack_p->REMADDR_length;
245 cmd->cmd_src_addr = xprt->xp_rtaddr.buf =
246 (char *)ack_p + ack_p->REMADDR_offset;
247
248 xprt->xp_lcladdr.maxlen = ack_p->LOCADDR_length;
249 xprt->xp_lcladdr.len = ack_p->LOCADDR_length;
250 xprt->xp_lcladdr.buf = (char *)ack_p + ack_p->LOCADDR_offset;
251
252 /*
253 * If the current sanity check size in rpcmod is smaller
254 * than the size needed for this xprt, then increase
255 * the sanity check.
256 */
257 if (max_msgsize != 0 && svc_max_msg_sizep &&
258 max_msgsize > *svc_max_msg_sizep) {
259
260 /* This check needs a lock */
261 mutex_enter(&cots_kcreate_lock);
262 if (svc_max_msg_sizep && max_msgsize > *svc_max_msg_sizep)
263 *svc_max_msg_sizep = max_msgsize;
264 mutex_exit(&cots_kcreate_lock);
265 }
266
267 *nxprt = xprt;
268
269 return (0);
270 }
271
272 /*
273 * Destroy a master transport record.
274 * Frees the space allocated for a transport record.
275 */
276 static void
svc_cots_kdestroy(SVCMASTERXPRT * xprt)277 svc_cots_kdestroy(SVCMASTERXPRT *xprt)
278 {
279 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2;
280
281 ASSERT(cmd);
282
283 if (xprt->xp_netid)
284 kmem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1);
285 if (xprt->xp_addrmask.maxlen)
286 kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen);
287
288 mutex_destroy(&xprt->xp_req_lock);
289 mutex_destroy(&xprt->xp_thread_lock);
290
291 kmem_free(cmd, sizeof (*cmd) + sizeof (struct T_addr_ack) +
292 (2 * sizeof (sin6_t)));
293
294 kmem_free(xprt, sizeof (SVCMASTERXPRT));
295 }
296
297 /*
298 * svc_tli_kcreate() calls this function at the end to tell
299 * rpcmod that the transport is ready to receive requests.
300 */
301 static void
svc_cots_kstart(SVCMASTERXPRT * xprt)302 svc_cots_kstart(SVCMASTERXPRT *xprt)
303 {
304 struct cots_master_data *cmd = (struct cots_master_data *)xprt->xp_p2;
305
306 if (cmd->cmd_xprt_started == 0) {
307 /*
308 * Acquire the xp_req_lock in order to use xp_wq
309 * safely (we don't want to qenable a queue that has
310 * already been closed).
311 */
312 mutex_enter(&xprt->xp_req_lock);
313 if (cmd->cmd_xprt_started == 0 &&
314 xprt->xp_wq != NULL) {
315 (*mir_start)(xprt->xp_wq);
316 cmd->cmd_xprt_started = 1;
317 }
318 mutex_exit(&xprt->xp_req_lock);
319 }
320 }
321
322 /*
323 * Transport-type specific part of svc_xprt_cleanup().
324 */
325 static void
svc_cots_kclone_destroy(SVCXPRT * clone_xprt)326 svc_cots_kclone_destroy(SVCXPRT *clone_xprt)
327 {
328 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf;
329
330 if (cd->cd_req_mp) {
331 freemsg(cd->cd_req_mp);
332 cd->cd_req_mp = (mblk_t *)0;
333 }
334 ASSERT(cd->cd_mp == NULL);
335 }
336
337 /*
338 * Transport Attributes.
339 */
340 static void
svc_cots_ktattrs(SVCXPRT * clone_xprt,int attrflag,void ** tattr)341 svc_cots_ktattrs(SVCXPRT *clone_xprt, int attrflag, void **tattr)
342 {
343 *tattr = NULL;
344
345 switch (attrflag) {
346 case SVC_TATTR_ADDRMASK:
347 *tattr = (void *)&clone_xprt->xp_master->xp_addrmask;
348 }
349 }
350
351 /*
352 * Receive rpc requests.
353 * Checks if the message is intact, and deserializes the call packet.
354 */
355 static bool_t
svc_cots_krecv(SVCXPRT * clone_xprt,mblk_t * mp,struct rpc_msg * msg)356 svc_cots_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg)
357 {
358 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf;
359 XDR *xdrs = &clone_xprt->xp_xdrin;
360 struct rpc_cots_server *stats = CLONE2STATS(clone_xprt);
361
362 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KRECV_START,
363 "svc_cots_krecv_start:");
364 RPCLOG(4, "svc_cots_krecv_start clone_xprt = %p:\n",
365 (void *)clone_xprt);
366
367 RSSTAT_INCR(stats, rscalls);
368
369 if (mp->b_datap->db_type != M_DATA) {
370 RPCLOG(16, "svc_cots_krecv bad db_type %d\n",
371 mp->b_datap->db_type);
372 goto bad;
373 }
374
375 xdrmblk_init(xdrs, mp, XDR_DECODE, 0);
376
377 TRACE_0(TR_FAC_KRPC, TR_XDR_CALLMSG_START,
378 "xdr_callmsg_start:");
379 RPCLOG0(4, "xdr_callmsg_start:\n");
380 if (!xdr_callmsg(xdrs, msg)) {
381 XDR_DESTROY(xdrs);
382 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END,
383 "xdr_callmsg_end:(%S)", "bad");
384 RPCLOG0(1, "svc_cots_krecv xdr_callmsg failure\n");
385 RSSTAT_INCR(stats, rsxdrcall);
386 goto bad;
387 }
388 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END,
389 "xdr_callmsg_end:(%S)", "good");
390
391 clone_xprt->xp_xid = msg->rm_xid;
392 cd->cd_req_mp = mp;
393
394 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END,
395 "svc_cots_krecv_end:(%S)", "good");
396 RPCLOG0(4, "svc_cots_krecv_end:good\n");
397 return (TRUE);
398
399 bad:
400 if (mp)
401 freemsg(mp);
402
403 RSSTAT_INCR(stats, rsbadcalls);
404 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KRECV_END,
405 "svc_cots_krecv_end:(%S)", "bad");
406 return (FALSE);
407 }
408
409 /*
410 * Send rpc reply.
411 */
412 static bool_t
svc_cots_ksend(SVCXPRT * clone_xprt,struct rpc_msg * msg)413 svc_cots_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg)
414 {
415 /* LINTED pointer alignment */
416 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf;
417 XDR *xdrs = &(clone_xprt->xp_xdrout);
418 int retval = FALSE;
419 mblk_t *mp;
420 xdrproc_t xdr_results;
421 caddr_t xdr_location;
422 bool_t has_args;
423
424 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KSEND_START,
425 "svc_cots_ksend_start:");
426
427 /*
428 * If there is a result procedure specified in the reply message,
429 * it will be processed in the xdr_replymsg and SVCAUTH_WRAP.
430 * We need to make sure it won't be processed twice, so we null
431 * it for xdr_replymsg here.
432 */
433 has_args = FALSE;
434 if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
435 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
436 if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) {
437 has_args = TRUE;
438 xdr_location = msg->acpted_rply.ar_results.where;
439 msg->acpted_rply.ar_results.proc = xdr_void;
440 msg->acpted_rply.ar_results.where = NULL;
441 }
442 }
443
444 mp = cd->cd_mp;
445 if (mp) {
446 /*
447 * The program above pre-allocated an mblk and put
448 * the data in place.
449 */
450 cd->cd_mp = (mblk_t *)NULL;
451 if (!(xdr_replymsg_body(xdrs, msg) &&
452 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs,
453 xdr_results, xdr_location)))) {
454 XDR_DESTROY(xdrs);
455 RPCLOG0(1, "svc_cots_ksend: "
456 "xdr_replymsg_body/SVCAUTH_WRAP failed\n");
457 freemsg(mp);
458 goto out;
459 }
460 } else {
461 int len;
462 int mpsize;
463
464 /*
465 * Leave space for protocol headers.
466 */
467 len = MSG_OFFSET + clone_xprt->xp_msg_size;
468
469 /*
470 * Allocate an initial mblk for the response data.
471 */
472 while (!(mp = allocb(len, BPRI_LO))) {
473 RPCLOG0(16, "svc_cots_ksend: allocb failed failed\n");
474 if (strwaitbuf(len, BPRI_LO)) {
475 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END,
476 "svc_cots_ksend_end:(%S)", "strwaitbuf");
477 RPCLOG0(1,
478 "svc_cots_ksend: strwaitbuf failed\n");
479 goto out;
480 }
481 }
482
483 /*
484 * Initialize the XDR encode stream. Additional mblks
485 * will be allocated if necessary. They will be TIDU
486 * sized.
487 */
488 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size);
489 mpsize = MBLKSIZE(mp);
490 ASSERT(mpsize >= len);
491 ASSERT(mp->b_rptr == mp->b_datap->db_base);
492
493 /*
494 * If the size of mblk is not appreciably larger than what we
495 * asked, then resize the mblk to exactly len bytes. Reason for
496 * this: suppose len is 1600 bytes, the tidu is 1460 bytes
497 * (from TCP over ethernet), and the arguments to RPC require
498 * 2800 bytes. Ideally we want the protocol to render two
499 * ~1400 byte segments over the wire. If allocb() gives us a 2k
500 * mblk, and we allocate a second mblk for the rest, the
501 * protocol module may generate 3 segments over the wire:
502 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and
503 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk,
504 * the XDR encoding will generate two ~1400 byte mblks, and the
505 * protocol module is more likely to produce properly sized
506 * segments.
507 */
508 if ((mpsize >> 1) <= len) {
509 mp->b_rptr += (mpsize - len);
510 }
511
512 /*
513 * Adjust b_rptr to reserve space for the non-data protocol
514 * headers that any downstream modules might like to add, and
515 * for the record marking header.
516 */
517 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE);
518
519 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base));
520 ASSERT(mp->b_wptr == mp->b_rptr);
521
522 msg->rm_xid = clone_xprt->xp_xid;
523
524 TRACE_0(TR_FAC_KRPC, TR_XDR_REPLYMSG_START,
525 "xdr_replymsg_start:");
526 if (!(xdr_replymsg(xdrs, msg) &&
527 (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs,
528 xdr_results, xdr_location)))) {
529 XDR_DESTROY(xdrs);
530 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END,
531 "xdr_replymsg_end:(%S)", "bad");
532 freemsg(mp);
533 RPCLOG0(1, "svc_cots_ksend: xdr_replymsg/SVCAUTH_WRAP "
534 "failed\n");
535 goto out;
536 }
537 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END,
538 "xdr_replymsg_end:(%S)", "good");
539 }
540
541 XDR_DESTROY(xdrs);
542
543 put(clone_xprt->xp_wq, mp);
544 retval = TRUE;
545
546 out:
547 /*
548 * This is completely disgusting. If public is set it is
549 * a pointer to a structure whose first field is the address
550 * of the function to free that structure and any related
551 * stuff. (see rrokfree in nfs_xdr.c).
552 */
553 if (xdrs->x_public) {
554 /* LINTED pointer alignment */
555 (**((int (**)())xdrs->x_public))(xdrs->x_public);
556 }
557
558 TRACE_1(TR_FAC_KRPC, TR_SVC_COTS_KSEND_END,
559 "svc_cots_ksend_end:(%S)", "done");
560 return (retval);
561 }
562
563 /*
564 * Deserialize arguments.
565 */
566 static bool_t
svc_cots_kgetargs(SVCXPRT * clone_xprt,xdrproc_t xdr_args,caddr_t args_ptr)567 svc_cots_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args,
568 caddr_t args_ptr)
569 {
570 return (SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin,
571 xdr_args, args_ptr));
572 }
573
574 static bool_t
svc_cots_kfreeargs(SVCXPRT * clone_xprt,xdrproc_t xdr_args,caddr_t args_ptr)575 svc_cots_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args,
576 caddr_t args_ptr)
577 {
578 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf;
579 /* LINTED pointer alignment */
580 XDR *xdrs = &clone_xprt->xp_xdrin;
581 mblk_t *mp;
582 bool_t retval;
583
584 /*
585 * It is important to call the XDR routine before
586 * freeing the request mblk. Structures in the
587 * XDR data may point into the mblk and require that
588 * the memory be intact during the free routine.
589 */
590 if (args_ptr) {
591 xdrs->x_op = XDR_FREE;
592 retval = (*xdr_args)(xdrs, args_ptr);
593 } else
594 retval = TRUE;
595
596 XDR_DESTROY(xdrs);
597
598 if ((mp = cd->cd_req_mp) != NULL) {
599 cd->cd_req_mp = (mblk_t *)0;
600 freemsg(mp);
601 }
602
603 return (retval);
604 }
605
606 static int32_t *
svc_cots_kgetres(SVCXPRT * clone_xprt,int size)607 svc_cots_kgetres(SVCXPRT *clone_xprt, int size)
608 {
609 /* LINTED pointer alignment */
610 cots_data_t *cd = (cots_data_t *)clone_xprt->xp_p2buf;
611 XDR *xdrs = &clone_xprt->xp_xdrout;
612 mblk_t *mp;
613 int32_t *buf;
614 struct rpc_msg rply;
615 int len;
616 int mpsize;
617
618 /*
619 * Leave space for protocol headers.
620 */
621 len = MSG_OFFSET + clone_xprt->xp_msg_size;
622
623 /*
624 * Allocate an initial mblk for the response data.
625 */
626 while ((mp = allocb(len, BPRI_LO)) == NULL) {
627 if (strwaitbuf(len, BPRI_LO))
628 return (NULL);
629 }
630
631 /*
632 * Initialize the XDR encode stream. Additional mblks
633 * will be allocated if necessary. They will be TIDU
634 * sized.
635 */
636 xdrmblk_init(xdrs, mp, XDR_ENCODE, clone_xprt->xp_msg_size);
637 mpsize = MBLKSIZE(mp);
638 ASSERT(mpsize >= len);
639 ASSERT(mp->b_rptr == mp->b_datap->db_base);
640
641 /*
642 * If the size of mblk is not appreciably larger than what we
643 * asked, then resize the mblk to exactly len bytes. Reason for
644 * this: suppose len is 1600 bytes, the tidu is 1460 bytes
645 * (from TCP over ethernet), and the arguments to RPC require
646 * 2800 bytes. Ideally we want the protocol to render two
647 * ~1400 byte segments over the wire. If allocb() gives us a 2k
648 * mblk, and we allocate a second mblk for the rest, the
649 * protocol module may generate 3 segments over the wire:
650 * 1460 bytes for the first, 448 (2048 - 1600) for the 2nd, and
651 * 892 for the 3rd. If we "waste" 448 bytes in the first mblk,
652 * the XDR encoding will generate two ~1400 byte mblks, and the
653 * protocol module is more likely to produce properly sized
654 * segments.
655 */
656 if ((mpsize >> 1) <= len) {
657 mp->b_rptr += (mpsize - len);
658 }
659
660 /*
661 * Adjust b_rptr to reserve space for the non-data protocol
662 * headers that any downstream modules might like to add, and
663 * for the record marking header.
664 */
665 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE);
666
667 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base));
668 ASSERT(mp->b_wptr == mp->b_rptr);
669
670 /*
671 * Assume a successful RPC since most of them are.
672 */
673 rply.rm_xid = clone_xprt->xp_xid;
674 rply.rm_direction = REPLY;
675 rply.rm_reply.rp_stat = MSG_ACCEPTED;
676 rply.acpted_rply.ar_verf = clone_xprt->xp_verf;
677 rply.acpted_rply.ar_stat = SUCCESS;
678
679 if (!xdr_replymsg_hdr(xdrs, &rply)) {
680 XDR_DESTROY(xdrs);
681 freeb(mp);
682 return (NULL);
683 }
684
685 buf = XDR_INLINE(xdrs, size);
686 if (buf == NULL) {
687 XDR_DESTROY(xdrs);
688 ASSERT(cd->cd_mp == NULL);
689 freemsg(mp);
690 } else {
691 cd->cd_mp = mp;
692 }
693 return (buf);
694 }
695
696 static void
svc_cots_kfreeres(SVCXPRT * clone_xprt)697 svc_cots_kfreeres(SVCXPRT *clone_xprt)
698 {
699 cots_data_t *cd;
700 mblk_t *mp;
701
702 cd = (cots_data_t *)clone_xprt->xp_p2buf;
703 if ((mp = cd->cd_mp) != NULL) {
704 XDR_DESTROY(&clone_xprt->xp_xdrout);
705 cd->cd_mp = (mblk_t *)NULL;
706 freemsg(mp);
707 }
708 }
709
710 /*
711 * the dup cacheing routines below provide a cache of non-failure
712 * transaction id's. rpc service routines can use this to detect
713 * retransmissions and re-send a non-failure response.
714 */
715
716 /*
717 * MAXDUPREQS is the number of cached items. It should be adjusted
718 * to the service load so that there is likely to be a response entry
719 * when the first retransmission comes in.
720 */
721 #define MAXDUPREQS 8192
722
723 /*
724 * This should be appropriately scaled to MAXDUPREQS. To produce as less as
725 * possible collisions it is suggested to set this to a prime.
726 */
727 #define DRHASHSZ 2053
728
729 #define XIDHASH(xid) ((xid) % DRHASHSZ)
730 #define DRHASH(dr) XIDHASH((dr)->dr_xid)
731 #define REQTOXID(req) ((req)->rq_xprt->xp_xid)
732
733 static int cotsndupreqs = 0;
734 int cotsmaxdupreqs = MAXDUPREQS;
735 static kmutex_t cotsdupreq_lock;
736 static struct dupreq *cotsdrhashtbl[DRHASHSZ];
737 static int cotsdrhashstat[DRHASHSZ];
738
739 static void unhash(struct dupreq *);
740
741 /*
742 * cotsdrmru points to the head of a circular linked list in lru order.
743 * cotsdrmru->dr_next == drlru
744 */
745 struct dupreq *cotsdrmru;
746
747 /*
748 * PSARC 2003/523 Contract Private Interface
749 * svc_cots_kdup
750 * Changes must be reviewed by Solaris File Sharing
751 * Changes must be communicated to contract-2003-523@sun.com
752 *
753 * svc_cots_kdup searches the request cache and returns 0 if the
754 * request is not found in the cache. If it is found, then it
755 * returns the state of the request (in progress or done) and
756 * the status or attributes that were part of the original reply.
757 *
758 * If DUP_DONE (there is a duplicate) svc_cots_kdup copies over the
759 * value of the response. In that case, also return in *dupcachedp
760 * whether the response free routine is cached in the dupreq - in which case
761 * the caller should not be freeing it, because it will be done later
762 * in the svc_cots_kdup code when the dupreq is reused.
763 */
764 static int
svc_cots_kdup(struct svc_req * req,caddr_t res,int size,struct dupreq ** drpp,bool_t * dupcachedp)765 svc_cots_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp,
766 bool_t *dupcachedp)
767 {
768 struct rpc_cots_server *stats = CLONE2STATS(req->rq_xprt);
769 struct dupreq *dr;
770 uint32_t xid;
771 uint32_t drhash;
772 int status;
773
774 xid = REQTOXID(req);
775 mutex_enter(&cotsdupreq_lock);
776 RSSTAT_INCR(stats, rsdupchecks);
777 /*
778 * Check to see whether an entry already exists in the cache.
779 */
780 dr = cotsdrhashtbl[XIDHASH(xid)];
781 while (dr != NULL) {
782 if (dr->dr_xid == xid &&
783 dr->dr_proc == req->rq_proc &&
784 dr->dr_prog == req->rq_prog &&
785 dr->dr_vers == req->rq_vers &&
786 dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len &&
787 bcmp((caddr_t)dr->dr_addr.buf,
788 (caddr_t)req->rq_xprt->xp_rtaddr.buf,
789 dr->dr_addr.len) == 0) {
790 status = dr->dr_status;
791 if (status == DUP_DONE) {
792 bcopy(dr->dr_resp.buf, res, size);
793 if (dupcachedp != NULL)
794 *dupcachedp = (dr->dr_resfree != NULL);
795 TRACE_0(TR_FAC_KRPC, TR_SVC_COTS_KDUP_DONE,
796 "svc_cots_kdup: DUP_DONE");
797 } else {
798 dr->dr_status = DUP_INPROGRESS;
799 *drpp = dr;
800 TRACE_0(TR_FAC_KRPC,
801 TR_SVC_COTS_KDUP_INPROGRESS,
802 "svc_cots_kdup: DUP_INPROGRESS");
803 }
804 RSSTAT_INCR(stats, rsdupreqs);
805 mutex_exit(&cotsdupreq_lock);
806 return (status);
807 }
808 dr = dr->dr_chain;
809 }
810
811 /*
812 * There wasn't an entry, either allocate a new one or recycle
813 * an old one.
814 */
815 if (cotsndupreqs < cotsmaxdupreqs) {
816 dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP);
817 if (dr == NULL) {
818 mutex_exit(&cotsdupreq_lock);
819 return (DUP_ERROR);
820 }
821 dr->dr_resp.buf = NULL;
822 dr->dr_resp.maxlen = 0;
823 dr->dr_addr.buf = NULL;
824 dr->dr_addr.maxlen = 0;
825 if (cotsdrmru) {
826 dr->dr_next = cotsdrmru->dr_next;
827 cotsdrmru->dr_next = dr;
828 } else {
829 dr->dr_next = dr;
830 }
831 cotsndupreqs++;
832 } else {
833 dr = cotsdrmru->dr_next;
834 while (dr->dr_status == DUP_INPROGRESS) {
835 dr = dr->dr_next;
836 if (dr == cotsdrmru->dr_next) {
837 cmn_err(CE_WARN, "svc_cots_kdup no slots free");
838 mutex_exit(&cotsdupreq_lock);
839 return (DUP_ERROR);
840 }
841 }
842 unhash(dr);
843 if (dr->dr_resfree) {
844 (*dr->dr_resfree)(dr->dr_resp.buf);
845 }
846 }
847 dr->dr_resfree = NULL;
848 cotsdrmru = dr;
849
850 dr->dr_xid = REQTOXID(req);
851 dr->dr_prog = req->rq_prog;
852 dr->dr_vers = req->rq_vers;
853 dr->dr_proc = req->rq_proc;
854 if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) {
855 if (dr->dr_addr.buf != NULL)
856 kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen);
857 dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len;
858 dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP);
859 if (dr->dr_addr.buf == NULL) {
860 dr->dr_addr.maxlen = 0;
861 dr->dr_status = DUP_DROP;
862 mutex_exit(&cotsdupreq_lock);
863 return (DUP_ERROR);
864 }
865 }
866 dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len;
867 bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len);
868 if (dr->dr_resp.maxlen < size) {
869 if (dr->dr_resp.buf != NULL)
870 kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen);
871 dr->dr_resp.maxlen = (unsigned int)size;
872 dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP);
873 if (dr->dr_resp.buf == NULL) {
874 dr->dr_resp.maxlen = 0;
875 dr->dr_status = DUP_DROP;
876 mutex_exit(&cotsdupreq_lock);
877 return (DUP_ERROR);
878 }
879 }
880 dr->dr_status = DUP_INPROGRESS;
881
882 drhash = (uint32_t)DRHASH(dr);
883 dr->dr_chain = cotsdrhashtbl[drhash];
884 cotsdrhashtbl[drhash] = dr;
885 cotsdrhashstat[drhash]++;
886 mutex_exit(&cotsdupreq_lock);
887 *drpp = dr;
888 return (DUP_NEW);
889 }
890
891 /*
892 * PSARC 2003/523 Contract Private Interface
893 * svc_cots_kdupdone
894 * Changes must be reviewed by Solaris File Sharing
895 * Changes must be communicated to contract-2003-523@sun.com
896 *
897 * svc_cots_kdupdone marks the request done (DUP_DONE or DUP_DROP)
898 * and stores the response.
899 */
900 static void
svc_cots_kdupdone(struct dupreq * dr,caddr_t res,void (* dis_resfree)(),int size,int status)901 svc_cots_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(),
902 int size, int status)
903 {
904 ASSERT(dr->dr_resfree == NULL);
905 if (status == DUP_DONE) {
906 bcopy(res, dr->dr_resp.buf, size);
907 dr->dr_resfree = dis_resfree;
908 }
909 dr->dr_status = status;
910 }
911
912 /*
913 * This routine expects that the mutex, cotsdupreq_lock, is already held.
914 */
915 static void
unhash(struct dupreq * dr)916 unhash(struct dupreq *dr)
917 {
918 struct dupreq *drt;
919 struct dupreq *drtprev = NULL;
920 uint32_t drhash;
921
922 ASSERT(MUTEX_HELD(&cotsdupreq_lock));
923
924 drhash = (uint32_t)DRHASH(dr);
925 drt = cotsdrhashtbl[drhash];
926 while (drt != NULL) {
927 if (drt == dr) {
928 cotsdrhashstat[drhash]--;
929 if (drtprev == NULL) {
930 cotsdrhashtbl[drhash] = drt->dr_chain;
931 } else {
932 drtprev->dr_chain = drt->dr_chain;
933 }
934 return;
935 }
936 drtprev = drt;
937 drt = drt->dr_chain;
938 }
939 }
940
941 void
svc_cots_stats_init(zoneid_t zoneid,struct rpc_cots_server ** statsp)942 svc_cots_stats_init(zoneid_t zoneid, struct rpc_cots_server **statsp)
943 {
944 *statsp = (struct rpc_cots_server *)rpcstat_zone_init_common(zoneid,
945 "unix", "rpc_cots_server", (const kstat_named_t *)&cots_rsstat_tmpl,
946 sizeof (cots_rsstat_tmpl));
947 }
948
949 void
svc_cots_stats_fini(zoneid_t zoneid,struct rpc_cots_server ** statsp)950 svc_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_server **statsp)
951 {
952 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_server");
953 kmem_free(*statsp, sizeof (cots_rsstat_tmpl));
954 }
955
956 void
svc_cots_init(void)957 svc_cots_init(void)
958 {
959 /*
960 * Check to make sure that the cots private data will fit into
961 * the stack buffer allocated by svc_run. The ASSERT is a safety
962 * net if the cots_data_t structure ever changes.
963 */
964 /*CONSTANTCONDITION*/
965 ASSERT(sizeof (cots_data_t) <= SVC_P2LEN);
966
967 mutex_init(&cots_kcreate_lock, NULL, MUTEX_DEFAULT, NULL);
968 mutex_init(&cotsdupreq_lock, NULL, MUTEX_DEFAULT, NULL);
969 }
970