xref: /titanic_50/usr/src/lib/libnsl/rpc/xdr_rec.c (revision 936b7af69172dce89b577831f79c0e18d15e854b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 /*
30  * Portions of this source code were derived from Berkeley
31  * 4.3 BSD under license from the Regents of the University of
32  * California.
33  */
34 
35 #pragma ident	"%Z%%M%	%I%	%E% SMI"
36 
37 /*
38  * xdr_rec.c, Implements (TCP/IP based) XDR streams with a "record marking"
39  * layer above connection oriented transport layer (e.g. tcp) (for rpc's use).
40  *
41  *
42  * These routines interface XDRSTREAMS to a (tcp/ip) connection transport.
43  * There is a record marking layer between the xdr stream
44  * and the (tcp) cv transport level.  A record is composed on one or more
45  * record fragments.  A record fragment is a thirty-two bit header followed
46  * by n bytes of data, where n is contained in the header.  The header
47  * is represented as a htonl(ulong_t).  The order bit encodes
48  * whether or not the fragment is the last fragment of the record
49  * (1 => fragment is last, 0 => more fragments to follow.
50  * The other 31 bits encode the byte length of the fragment.
51  */
52 
53 #include "mt.h"
54 #include "rpc_mt.h"
55 #include <stdio.h>
56 #include <rpc/types.h>
57 #include <rpc/rpc.h>
58 #include <sys/types.h>
59 #include <syslog.h>
60 #include <memory.h>
61 #include <stdlib.h>
62 #include <unistd.h>
63 #include <inttypes.h>
64 #include <string.h>
65 
66 /*
67  * A record is composed of one or more record fragments.
68  * A record fragment is a four-byte header followed by zero to
69  * 2**32-1 bytes.  The header is treated as a long unsigned and is
70  * encode/decoded to the network via htonl/ntohl.  The low order 31 bits
71  * are a byte count of the fragment.  The highest order bit is a boolean:
72  * 1 => this fragment is the last fragment of the record,
73  * 0 => this fragment is followed by more fragment(s).
74  *
75  * The fragment/record machinery is not general;  it is constructed to
76  * meet the needs of xdr and rpc based on tcp.
77  */
78 
79 #define	LAST_FRAG (((uint32_t)1 << 31))
80 
81 /*
82  * Minimum fragment size is size of rpc callmsg over TCP:
83  * xid direction vers prog vers proc
84  *   cred flavor, cred length, cred
85  *   verf flavor, verf length, verf
86  *   (with no cred or verf allocated)
87  */
88 #define	MIN_FRAG	(10 * BYTES_PER_XDR_UNIT)
89 
90 typedef struct rec_strm {
91 	caddr_t tcp_handle;
92 	/*
93 	 * out-going bits
94 	 */
95 	int (*writeit)();
96 	caddr_t out_base;	/* output buffer (points to frag header) */
97 	caddr_t out_finger;	/* next output position */
98 	caddr_t out_boundry;	/* data cannot up to this address */
99 	uint32_t *frag_header;	/* beginning of current fragment */
100 	bool_t frag_sent;	/* true if buffer sent in middle of record */
101 	/*
102 	 * in-coming bits
103 	 */
104 	int (*readit)();
105 	caddr_t in_base;	/* input buffer */
106 	caddr_t in_finger;	/* location of next byte to be had */
107 	caddr_t in_boundry;	/* can read up to this location */
108 	int fbtbc;		/* fragment bytes to be consumed */
109 	bool_t last_frag;
110 	uint_t sendsize;
111 	uint_t recvsize;
112 	/*
113 	 * Is this the first time that the
114 	 * getbytes routine has been called ?
115 	 */
116 	uint_t firsttime;
117 	/*
118 	 * Is this non-blocked?
119 	 */
120 	uint_t in_nonblock;	/* non-blocked input */
121 	uint_t in_needpoll;	/* need to poll to get more data ? */
122 	uint32_t in_maxrecsz;	/* maximum record size */
123 	caddr_t in_nextrec;	/* start of next record */
124 	uint32_t in_nextrecsz;	/* part of next record in buffer */
125 } RECSTREAM;
126 
127 static uint_t	fix_buf_size(uint_t);
128 static struct	xdr_ops *xdrrec_ops(void);
129 static bool_t	xdrrec_getbytes(XDR *, caddr_t, int);
130 static bool_t	flush_out(RECSTREAM *, bool_t);
131 static bool_t	get_input_bytes(RECSTREAM *, caddr_t, int, bool_t);
132 static bool_t	set_input_fragment(RECSTREAM *);
133 static bool_t	skip_input_bytes(RECSTREAM *, int32_t);
134 
135 bool_t		__xdrrec_getbytes_nonblock(XDR *, enum xprt_stat *);
136 
137 /*
138  * Create an xdr handle for xdrrec
139  * xdrrec_create fills in xdrs.  Sendsize and recvsize are
140  * send and recv buffer sizes (0 => use default).
141  * vc_handle is an opaque handle that is passed as the first parameter to
142  * the procedures readit and writeit.  Readit and writeit are read and
143  * write respectively. They are like the system calls expect that they
144  * take an opaque handle rather than an fd.
145  */
146 
147 static const char mem_err_msg_rec[] = "xdrrec_create: out of memory";
148 
149 void
150 xdrrec_create(XDR *xdrs, const uint_t sendsize, const uint_t recvsize,
151     const caddr_t tcp_handle, int (*readit)(), int (*writeit)())
152 {
153 	RECSTREAM *rstrm = malloc(sizeof (RECSTREAM));
154 
155 	/*
156 	 * XXX: Should still rework xdrrec_create to return a handle,
157 	 * and in any malloc-failure case return NULL.
158 	 */
159 	if (rstrm == NULL) {
160 		(void) syslog(LOG_ERR, mem_err_msg_rec);
161 		return;
162 	}
163 	/*
164 	 * Adjust sizes and allocate buffers; malloc(3C)
165 	 * provides a buffer suitably aligned for any use, so
166 	 * there's no need for us to mess around with alignment.
167 	 *
168 	 * Since non-blocking connections may need to reallocate the input
169 	 * buffer, we use separate malloc()s for input and output.
170 	 */
171 	rstrm->sendsize = fix_buf_size(sendsize);
172 	rstrm->recvsize = fix_buf_size(recvsize);
173 	rstrm->out_base = malloc(rstrm->sendsize);
174 	if (rstrm->out_base == NULL) {
175 		(void) syslog(LOG_ERR, mem_err_msg_rec);
176 		free(rstrm);
177 		return;
178 	}
179 	rstrm->in_base = malloc(rstrm->recvsize);
180 	if (rstrm->in_base == NULL) {
181 		(void) syslog(LOG_ERR, mem_err_msg_rec);
182 		free(rstrm->out_base);
183 		free(rstrm);
184 		return;
185 	}
186 
187 	/*
188 	 * now the rest ...
189 	 */
190 
191 	xdrs->x_ops = xdrrec_ops();
192 	xdrs->x_private = (caddr_t)rstrm;
193 	rstrm->tcp_handle = tcp_handle;
194 	rstrm->readit = readit;
195 	rstrm->writeit = writeit;
196 	rstrm->out_finger = rstrm->out_boundry = rstrm->out_base;
197 	/* LINTED pointer cast */
198 	rstrm->frag_header = (uint32_t *)rstrm->out_base;
199 	rstrm->out_finger += sizeof (uint_t);
200 	rstrm->out_boundry += rstrm->sendsize;
201 	rstrm->frag_sent = FALSE;
202 	rstrm->in_boundry = rstrm->in_base;
203 	rstrm->in_finger = (rstrm->in_boundry += rstrm->recvsize);
204 	rstrm->fbtbc = 0;
205 	rstrm->last_frag = TRUE;
206 	rstrm->firsttime = 0;
207 	rstrm->in_nonblock = 0;
208 	rstrm->in_needpoll = 1;
209 	rstrm->in_maxrecsz = 0;
210 	rstrm->in_nextrec = rstrm->in_base;
211 	rstrm->in_nextrecsz = 0;
212 }
213 
214 /*
215  * Align input stream.  If all applications behaved correctly, this
216  * defensive procedure will not be necessary, since received data will be
217  * aligned correctly.
218  */
219 static void
220 align_instream(RECSTREAM *rstrm)
221 {
222 	int current = rstrm->in_boundry - rstrm->in_finger;
223 
224 	(void) memcpy(rstrm->in_base, rstrm->in_finger, current);
225 	rstrm->in_finger = rstrm->in_base;
226 	rstrm->in_boundry = rstrm->in_finger + current;
227 }
228 
229 /*
230  * The routines defined below are the xdr ops which will go into the
231  * xdr handle filled in by xdrrec_create.
232  */
233 static bool_t
234 xdrrec_getint32(XDR *xdrs, int32_t *ip)
235 {
236 	/* LINTED pointer cast */
237 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
238 	/* LINTED pointer cast */
239 	int32_t *buflp = (int32_t *)(rstrm->in_finger);
240 	int32_t mylong;
241 
242 	/* first try the inline, fast case */
243 	if ((rstrm->fbtbc >= (int)sizeof (int32_t)) &&
244 		((uint_t)(rstrm->in_boundry - (caddr_t)buflp) >=
245 					(uint_t)sizeof (int32_t))) {
246 		/*
247 		 * Check if buflp is longword aligned.  If not, align it.
248 		 */
249 		if (((uintptr_t)buflp) & ((int)sizeof (int32_t) - 1)) {
250 			align_instream(rstrm);
251 			/* LINTED pointer cast */
252 			buflp = (int32_t *)(rstrm->in_finger);
253 		}
254 		*ip = (int32_t)ntohl((uint32_t)(*buflp));
255 		rstrm->fbtbc -= (int)sizeof (int32_t);
256 		rstrm->in_finger += sizeof (int32_t);
257 	} else {
258 		if (!xdrrec_getbytes(xdrs, (caddr_t)&mylong, sizeof (int32_t)))
259 			return (FALSE);
260 		*ip = (int32_t)ntohl((uint32_t)mylong);
261 	}
262 	return (TRUE);
263 }
264 
265 static bool_t
266 xdrrec_putint32(XDR *xdrs, int32_t *ip)
267 {
268 	/* LINTED pointer cast */
269 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
270 	/* LINTED pointer cast */
271 	int32_t *dest_lp = ((int32_t *)(rstrm->out_finger));
272 
273 	if ((rstrm->out_finger += sizeof (int32_t)) > rstrm->out_boundry) {
274 		/*
275 		 * this case should almost never happen so the code is
276 		 * inefficient
277 		 */
278 		rstrm->out_finger -= sizeof (int32_t);
279 		rstrm->frag_sent = TRUE;
280 		if (!flush_out(rstrm, FALSE))
281 			return (FALSE);
282 		/* LINTED pointer cast */
283 		dest_lp = ((int32_t *)(rstrm->out_finger));
284 		rstrm->out_finger += sizeof (int32_t);
285 	}
286 	*dest_lp = (int32_t)htonl((uint32_t)(*ip));
287 	return (TRUE);
288 }
289 
290 static bool_t
291 xdrrec_getlong(XDR *xdrs, long *lp)
292 {
293 	int32_t i;
294 
295 	if (!xdrrec_getint32(xdrs, &i))
296 		return (FALSE);
297 	*lp = (long)i;
298 	return (TRUE);
299 }
300 
301 static bool_t
302 xdrrec_putlong(XDR *xdrs, long *lp)
303 {
304 	int32_t i;
305 
306 #if defined(_LP64)
307 	if ((*lp > INT32_MAX) || (*lp < INT32_MIN))
308 		return (FALSE);
309 #endif
310 
311 	i = (int32_t)*lp;
312 
313 	return (xdrrec_putint32(xdrs, &i));
314 }
315 
316 static bool_t	/* must manage buffers, fragments, and records */
317 xdrrec_getbytes(XDR *xdrs, caddr_t addr, int len)
318 {
319 	/* LINTED pointer cast */
320 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
321 	int current;
322 
323 	while (len > 0) {
324 		current = rstrm->fbtbc;
325 		if (current == 0) {
326 			if (rstrm->last_frag)
327 				return (FALSE);
328 			if (!set_input_fragment(rstrm))
329 				return (FALSE);
330 			continue;
331 		}
332 		current = (len < current) ? len : current;
333 		if (!get_input_bytes(rstrm, addr, current, FALSE))
334 			return (FALSE);
335 		addr += current;
336 		rstrm->fbtbc -= current;
337 		len -= current;
338 	}
339 	return (TRUE);
340 }
341 
342 static bool_t
343 xdrrec_putbytes(XDR *xdrs, caddr_t addr, int len)
344 {
345 	/* LINTED pointer cast */
346 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
347 	int current;
348 
349 	while (len > 0) {
350 
351 		current = (uintptr_t)rstrm->out_boundry -
352 			(uintptr_t)rstrm->out_finger;
353 		current = (len < current) ? len : current;
354 		(void) memcpy(rstrm->out_finger, addr, current);
355 		rstrm->out_finger += current;
356 		addr += current;
357 		len -= current;
358 		if (rstrm->out_finger == rstrm->out_boundry) {
359 			rstrm->frag_sent = TRUE;
360 			if (!flush_out(rstrm, FALSE))
361 				return (FALSE);
362 		}
363 	}
364 	return (TRUE);
365 }
366 /*
367  * This is just like the ops vector x_getbytes(), except that
368  * instead of returning success or failure on getting a certain number
369  * of bytes, it behaves much more like the read() system call against a
370  * pipe -- it returns up to the number of bytes requested and a return of
371  * zero indicates end-of-record.  A -1 means something very bad happened.
372  */
373 uint_t /* must manage buffers, fragments, and records */
374 xdrrec_readbytes(XDR *xdrs, caddr_t addr, uint_t l)
375 {
376 	/* LINTED pointer cast */
377 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
378 	int current, len;
379 
380 	len = l;
381 	while (len > 0) {
382 		current = rstrm->fbtbc;
383 		if (current == 0) {
384 			if (rstrm->last_frag)
385 				return (l - len);
386 			if (!set_input_fragment(rstrm))
387 				return ((uint_t)-1);
388 			continue;
389 		}
390 		current = (len < current) ? len : current;
391 		if (!get_input_bytes(rstrm, addr, current, FALSE))
392 			return ((uint_t)-1);
393 		addr += current;
394 		rstrm->fbtbc -= current;
395 		len -= current;
396 	}
397 	return (l - len);
398 }
399 
400 static uint_t
401 xdrrec_getpos(XDR *xdrs)
402 {
403 	/* LINTED pointer cast */
404 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
405 	int32_t pos;
406 
407 	pos = lseek((intptr_t)rstrm->tcp_handle, 0, 1);
408 	if (pos != -1)
409 		switch (xdrs->x_op) {
410 
411 		case XDR_ENCODE:
412 			pos += rstrm->out_finger - rstrm->out_base;
413 			break;
414 
415 		case XDR_DECODE:
416 			pos -= rstrm->in_boundry - rstrm->in_finger;
417 			break;
418 
419 		default:
420 			pos = (uint_t)-1;
421 			break;
422 		}
423 	return ((uint_t)pos);
424 }
425 
426 static bool_t
427 xdrrec_setpos(XDR *xdrs, uint_t pos)
428 {
429 	/* LINTED pointer cast */
430 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
431 	uint_t currpos = xdrrec_getpos(xdrs);
432 	int delta = currpos - pos;
433 	caddr_t newpos;
434 
435 	if ((int)currpos != -1)
436 		switch (xdrs->x_op) {
437 
438 		case XDR_ENCODE:
439 			newpos = rstrm->out_finger - delta;
440 			if ((newpos > (caddr_t)(rstrm->frag_header)) &&
441 				(newpos < rstrm->out_boundry)) {
442 				rstrm->out_finger = newpos;
443 				return (TRUE);
444 			}
445 			break;
446 
447 		case XDR_DECODE:
448 			newpos = rstrm->in_finger - delta;
449 			if ((delta < (int)(rstrm->fbtbc)) &&
450 				(newpos <= rstrm->in_boundry) &&
451 				(newpos >= rstrm->in_base)) {
452 				rstrm->in_finger = newpos;
453 				rstrm->fbtbc -= delta;
454 				return (TRUE);
455 			}
456 			break;
457 		}
458 	return (FALSE);
459 }
460 
461 static rpc_inline_t *
462 xdrrec_inline(XDR *xdrs, int len)
463 {
464 	/* LINTED pointer cast */
465 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
466 	rpc_inline_t *buf = NULL;
467 
468 	switch (xdrs->x_op) {
469 
470 	case XDR_ENCODE:
471 		if ((rstrm->out_finger + len) <= rstrm->out_boundry) {
472 			/* LINTED pointer cast */
473 			buf = (rpc_inline_t *)rstrm->out_finger;
474 			rstrm->out_finger += len;
475 		}
476 		break;
477 
478 	case XDR_DECODE:
479 		if ((len <= rstrm->fbtbc) &&
480 			((rstrm->in_finger + len) <= rstrm->in_boundry)) {
481 			/*
482 			 * Check if rstrm->in_finger is longword aligned;
483 			 * if not, align it.
484 			 */
485 			if (((intptr_t)rstrm->in_finger) &
486 			    (sizeof (int32_t) - 1))
487 				align_instream(rstrm);
488 			/* LINTED pointer cast */
489 			buf = (rpc_inline_t *)rstrm->in_finger;
490 			rstrm->fbtbc -= len;
491 			rstrm->in_finger += len;
492 		}
493 		break;
494 	}
495 	return (buf);
496 }
497 
498 static void
499 xdrrec_destroy(XDR *xdrs)
500 {
501 	/* LINTED pointer cast */
502 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
503 
504 	free(rstrm->out_base);
505 	free(rstrm->in_base);
506 	free(rstrm);
507 }
508 
509 
510 /*
511  * Exported routines to manage xdr records
512  */
513 
514 /*
515  * Before reading (deserializing) from the stream, one should always call
516  * this procedure to guarantee proper record alignment.
517  */
518 bool_t
519 xdrrec_skiprecord(XDR *xdrs)
520 {
521 	/* LINTED pointer cast */
522 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
523 
524 	if (rstrm->in_nonblock) {
525 		enum xprt_stat pstat;
526 		/*
527 		 * Read and discard a record from the non-blocking
528 		 * buffer. Return succes only if a complete record can
529 		 * be retrieved without blocking, or if the buffer was
530 		 * empty and there was no data to fetch.
531 		 */
532 		if (__xdrrec_getbytes_nonblock(xdrs, &pstat) ||
533 			(pstat == XPRT_MOREREQS &&
534 				rstrm->in_finger == rstrm->in_boundry)) {
535 			rstrm->fbtbc = 0;
536 			return (TRUE);
537 		}
538 		return (FALSE);
539 	}
540 	while (rstrm->fbtbc > 0 || (!rstrm->last_frag)) {
541 		if (!skip_input_bytes(rstrm, rstrm->fbtbc))
542 			return (FALSE);
543 		rstrm->fbtbc = 0;
544 		if ((!rstrm->last_frag) && (!set_input_fragment(rstrm)))
545 			return (FALSE);
546 	}
547 	rstrm->last_frag = FALSE;
548 	return (TRUE);
549 }
550 
551 /*
552  * Look ahead fuction.
553  * Returns TRUE iff there is no more input in the buffer
554  * after consuming the rest of the current record.
555  */
556 bool_t
557 xdrrec_eof(XDR *xdrs)
558 {
559 	/* LINTED pointer cast */
560 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
561 
562 	if (rstrm->in_nonblock) {
563 		/*
564 		 * If in_needpoll is true, the non-blocking XDR stream
565 		 * does not have a complete record.
566 		 */
567 		return (rstrm->in_needpoll);
568 	}
569 	while (rstrm->fbtbc > 0 || (!rstrm->last_frag)) {
570 		if (!skip_input_bytes(rstrm, rstrm->fbtbc))
571 			return (TRUE);
572 		rstrm->fbtbc = 0;
573 		if ((!rstrm->last_frag) && (!set_input_fragment(rstrm)))
574 			return (TRUE);
575 	}
576 	if (rstrm->in_finger == rstrm->in_boundry)
577 		return (TRUE);
578 	return (FALSE);
579 }
580 
581 /*
582  * The client must tell the package when an end-of-record has occurred.
583  * The second parameters tells whether the record should be flushed to the
584  * (output) tcp stream.  (This let's the package support batched or
585  * pipelined procedure calls.)  TRUE => immmediate flush to tcp connection.
586  */
587 bool_t
588 xdrrec_endofrecord(XDR *xdrs, bool_t sendnow)
589 {
590 	/* LINTED pointer cast */
591 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
592 	uint32_t len;	/* fragment length */
593 
594 	if (sendnow || rstrm->frag_sent ||
595 		((uintptr_t)rstrm->out_finger + sizeof (uint32_t) >=
596 		(uintptr_t)rstrm->out_boundry)) {
597 		rstrm->frag_sent = FALSE;
598 		return (flush_out(rstrm, TRUE));
599 	}
600 	len = (uintptr_t)(rstrm->out_finger) - (uintptr_t)(rstrm->frag_header) -
601 		sizeof (uint32_t);
602 	*(rstrm->frag_header) = htonl((uint32_t)len | LAST_FRAG);
603 	/* LINTED pointer cast */
604 	rstrm->frag_header = (uint32_t *)rstrm->out_finger;
605 	rstrm->out_finger += sizeof (uint32_t);
606 	return (TRUE);
607 }
608 
609 
610 /*
611  * Internal useful routines
612  */
613 static bool_t
614 flush_out(RECSTREAM *rstrm, bool_t eor)
615 {
616 	uint32_t eormask = (eor == TRUE) ? LAST_FRAG : 0;
617 	uint32_t len = (uintptr_t)(rstrm->out_finger) -
618 		(uintptr_t)(rstrm->frag_header) - sizeof (uint32_t);
619 	int written;
620 
621 	*(rstrm->frag_header) = htonl(len | eormask);
622 	len = (uintptr_t)(rstrm->out_finger) - (uintptr_t)(rstrm->out_base);
623 
624 	written = (*(rstrm->writeit))
625 	    (rstrm->tcp_handle, rstrm->out_base, (int)len);
626 	/*
627 	 * Handle the specific 'CANT_STORE' error. In this case, the
628 	 * fragment must be cleared.
629 	 */
630 	if ((written != (int)len) && (written != -2))
631 		return (FALSE);
632 	/* LINTED pointer cast */
633 	rstrm->frag_header = (uint32_t *)rstrm->out_base;
634 	rstrm->out_finger = (caddr_t)rstrm->out_base + sizeof (uint32_t);
635 
636 	return (TRUE);
637 }
638 
639 /* knows nothing about records!  Only about input buffers */
640 static bool_t
641 fill_input_buf(RECSTREAM *rstrm, bool_t do_align)
642 {
643 	caddr_t where;
644 	int len;
645 
646 	if (rstrm->in_nonblock) {
647 		/* Should never get here in the non-blocking case */
648 		return (FALSE);
649 	}
650 	where = rstrm->in_base;
651 	if (do_align) {
652 		len = rstrm->recvsize;
653 	} else {
654 		uint_t i = (uintptr_t)rstrm->in_boundry % BYTES_PER_XDR_UNIT;
655 
656 		where += i;
657 		len = rstrm->recvsize - i;
658 	}
659 	if ((len = (*(rstrm->readit))(rstrm->tcp_handle, where, len)) == -1)
660 		return (FALSE);
661 	rstrm->in_finger = where;
662 	where += len;
663 	rstrm->in_boundry = where;
664 	return (TRUE);
665 }
666 
667 /* knows nothing about records!  Only about input buffers */
668 static bool_t
669 get_input_bytes(RECSTREAM *rstrm, caddr_t addr,
670 		int len, bool_t do_align)
671 {
672 	int current;
673 
674 	if (rstrm->in_nonblock) {
675 		/*
676 		 * Data should already be in the rstrm buffer, so we just
677 		 * need to copy it to 'addr'.
678 		 */
679 		current = (int)(rstrm->in_boundry - rstrm->in_finger);
680 		if (len > current)
681 			return (FALSE);
682 		(void) memcpy(addr, rstrm->in_finger, len);
683 		rstrm->in_finger += len;
684 		addr += len;
685 		return (TRUE);
686 	}
687 
688 	while (len > 0) {
689 		current = (intptr_t)rstrm->in_boundry -
690 			(intptr_t)rstrm->in_finger;
691 		if (current == 0) {
692 			if (!fill_input_buf(rstrm, do_align))
693 				return (FALSE);
694 			continue;
695 		}
696 		current = (len < current) ? len : current;
697 		(void) memcpy(addr, rstrm->in_finger, current);
698 		rstrm->in_finger += current;
699 		addr += current;
700 		len -= current;
701 		do_align = FALSE;
702 	}
703 	return (TRUE);
704 }
705 
706 /* next four bytes of the input stream are treated as a header */
707 static bool_t
708 set_input_fragment(RECSTREAM *rstrm)
709 {
710 	uint32_t header;
711 
712 	if (rstrm->in_nonblock) {
713 		/*
714 		 * In the non-blocking case, the fragment headers should
715 		 * already have been consumed, so we should never get
716 		 * here. Might as well return failure right away.
717 		 */
718 		return (FALSE);
719 	}
720 	if (!get_input_bytes(rstrm, (caddr_t)&header, (int)sizeof (header),
721 							rstrm->last_frag))
722 		return (FALSE);
723 	header = (uint32_t)ntohl(header);
724 	rstrm->last_frag = ((header & LAST_FRAG) == 0) ? FALSE : TRUE;
725 	rstrm->fbtbc = header & (~LAST_FRAG);
726 	return (TRUE);
727 }
728 
729 /* consumes input bytes; knows nothing about records! */
730 static bool_t
731 skip_input_bytes(RECSTREAM *rstrm, int32_t cnt)
732 {
733 	int current;
734 
735 	while (cnt > 0) {
736 		current = (intptr_t)rstrm->in_boundry -
737 			(intptr_t)rstrm->in_finger;
738 		if (current == 0) {
739 			if (!fill_input_buf(rstrm, FALSE))
740 				return (FALSE);
741 			continue;
742 		}
743 		current = (cnt < current) ? cnt : current;
744 		rstrm->in_finger += current;
745 		cnt -= current;
746 	}
747 	return (TRUE);
748 }
749 
750 
751 static bool_t
752 __xdrrec_nonblock_realloc(RECSTREAM *rstrm, uint32_t newsize)
753 {
754 	caddr_t newbuf = rstrm->in_base;
755 	ptrdiff_t offset;
756 	bool_t ret = TRUE;
757 
758 	if (newsize > rstrm->recvsize) {
759 		newbuf = (caddr_t)realloc(newbuf, newsize);
760 		if (newbuf == 0) {
761 			ret = FALSE;
762 		} else {
763 			/* Make pointers valid for the new buffer */
764 			offset = newbuf - rstrm->in_base;
765 			rstrm->in_finger += offset;
766 			rstrm->in_boundry += offset;
767 			rstrm->in_nextrec += offset;
768 			rstrm->in_base = newbuf;
769 			rstrm->recvsize = newsize;
770 		}
771 	}
772 
773 	return (ret);
774 }
775 
776 /*
777  * adjust sizes and allocate buffer quad byte aligned
778  */
779 bool_t
780 __xdrrec_set_conn_nonblock(XDR *xdrs, uint32_t tcp_maxrecsz)
781 {
782 	/* LINTED pointer cast */
783 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
784 	size_t newsize;
785 
786 	rstrm->in_nonblock = TRUE;
787 	if (tcp_maxrecsz == 0) {
788 		/*
789 		 * If maxrecsz has not been set, use the default
790 		 * that was set from xdrrec_create() and
791 		 * fix_buf_size()
792 		 */
793 		rstrm->in_maxrecsz = rstrm->recvsize;
794 		return (TRUE);
795 	}
796 	rstrm->in_maxrecsz = tcp_maxrecsz;
797 	if (tcp_maxrecsz <= rstrm->recvsize)
798 		return (TRUE);
799 
800 	/*
801 	 * For nonblocked connection, the entire record is read into the
802 	 * buffer before any xdr processing. This implies that the record
803 	 * size must allow for the maximum expected message size of the
804 	 * service. However, it's inconvenient to allocate very large
805 	 * buffers up front, so we limit ourselves to a reasonable
806 	 * default size here, and reallocate (up to the maximum record
807 	 * size allowed for the connection) as necessary.
808 	 */
809 	if ((newsize = tcp_maxrecsz) > RPC_MAXDATASIZE) {
810 		newsize = RPC_MAXDATASIZE;
811 	}
812 	if (!__xdrrec_nonblock_realloc(rstrm, newsize)) {
813 		(void) syslog(LOG_ERR, mem_err_msg_rec);
814 		free(rstrm->out_base);
815 		free(rstrm->in_base);
816 		free(rstrm);
817 		return (FALSE);
818 	}
819 
820 	return (TRUE);
821 }
822 
823 /*
824  * Retrieve input data from the non-blocking connection, increase
825  * the size of the read buffer if necessary, and check that the
826  * record size stays below the allowed maximum for the connection.
827  */
828 bool_t
829 __xdrrec_getbytes_nonblock(XDR *xdrs, enum xprt_stat *pstat)
830 {
831 	/* LINTED pointer cast */
832 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
833 	uint32_t prevbytes_thisrec, minreqrecsize;
834 	uint32_t *header;
835 	int32_t len_received = 0;
836 	uint32_t unprocessed = 0;
837 
838 	/*
839 	 * For connection oriented protocols, there's no guarantee that
840 	 * we will receive the data nicely chopped into records, no
841 	 * matter how it was sent. We use the in_nextrec pointer to
842 	 * indicate where in the buffer the next record starts. If
843 	 * in_nextrec != in_base, there's data in the buffer from
844 	 * previous reads, and if in_nextrecsz > 0, we need to copy
845 	 * the portion of the next record already read to the start of
846 	 * the input buffer
847 	 */
848 	if (rstrm->in_nextrecsz > 0) {
849 		/* Starting on new record with data already in the buffer */
850 		(void) memmove(rstrm->in_base, rstrm->in_nextrec,
851 			rstrm->in_nextrecsz);
852 		rstrm->in_nextrec = rstrm->in_finger = rstrm->in_base;
853 		rstrm->in_boundry = rstrm->in_nextrec + rstrm->in_nextrecsz;
854 		unprocessed = rstrm->in_nextrecsz;
855 		rstrm->in_nextrecsz = 0;
856 	} else if (rstrm->in_nextrec == rstrm->in_base) {
857 		/* Starting on new record with empty buffer */
858 		rstrm->in_boundry = rstrm->in_finger = rstrm->in_base;
859 		rstrm->last_frag = FALSE;
860 		rstrm->in_needpoll = TRUE;
861 	}
862 
863 	prevbytes_thisrec = (uint32_t)(rstrm->in_boundry - rstrm->in_base);
864 
865 	/* Do we need to retrieve data ? */
866 	if (rstrm->in_needpoll) {
867 		int len_requested, len_total_received;
868 
869 		rstrm->in_needpoll = FALSE;
870 		len_total_received =
871 			(int)(rstrm->in_boundry - rstrm->in_base);
872 		len_requested = rstrm->recvsize - len_total_received;
873 		/*
874 		 * if len_requested is 0, this means that the input
875 		 * buffer is full and need to be increased.
876 		 * The minimum record size we will need is whatever's
877 		 * already in the buffer, plus what's yet to be
878 		 * consumed in the current fragment, plus space for at
879 		 * least one more fragment header, if this is not the
880 		 * last fragment. We use the RNDUP() macro to
881 		 * account for possible realignment of the next
882 		 * fragment header.
883 		 */
884 		if (len_requested == 0) {
885 			minreqrecsize = rstrm->recvsize +
886 			    rstrm->fbtbc +
887 			    (rstrm->last_frag ? 0 : sizeof (*header));
888 			minreqrecsize = RNDUP(minreqrecsize);
889 			if (minreqrecsize == rstrm->recvsize) {
890 				/*
891 				 * no more bytes to be consumed and
892 				 * last fragment. We should never end up
893 				 * here. Might as well return failure
894 				 * right away.
895 				 */
896 				*pstat = XPRT_DIED;
897 				return (FALSE);
898 			}
899 			if (minreqrecsize > rstrm->in_maxrecsz)
900 				goto recsz_invalid;
901 			else
902 				goto needpoll;
903 		}
904 		if ((len_received = (*(rstrm->readit))(rstrm->tcp_handle,
905 				rstrm->in_boundry, len_requested)) == -1) {
906 			*pstat = XPRT_DIED;
907 			return (FALSE);
908 		}
909 		rstrm->in_boundry += len_received;
910 		rstrm->in_nextrec = rstrm->in_boundry;
911 	}
912 
913 	/* Account for any left over data from previous processing */
914 	len_received += unprocessed;
915 
916 	/* Set a lower limit on the buffer space we'll need */
917 	minreqrecsize = prevbytes_thisrec + rstrm->fbtbc;
918 
919 	/*
920 	 * Consume bytes for this record until it's either complete,
921 	 * rejected, or we need to poll for more bytes.
922 	 *
923 	 * If fbtbc == 0, in_finger points to the start of the fragment
924 	 * header. Otherwise, it points to the start of the fragment data.
925 	 */
926 	while (len_received > 0) {
927 		if (rstrm->fbtbc == 0) {
928 			uint32_t hdrlen, minfraglen = 0;
929 			uint32_t len_recvd_thisfrag;
930 			bool_t last_frag;
931 
932 			len_recvd_thisfrag = (uint32_t)(rstrm->in_boundry -
933 						rstrm->in_finger);
934 			/* LINTED pointer cast */
935 			header = (uint32_t *)rstrm->in_finger;
936 			hdrlen = (len_recvd_thisfrag < sizeof (*header)) ?
937 				len_recvd_thisfrag : sizeof (*header);
938 			(void) memcpy(&minfraglen, header, hdrlen);
939 			last_frag = (ntohl(minfraglen) & LAST_FRAG) != 0;
940 			minfraglen = ntohl(minfraglen) & (~LAST_FRAG);
941 			/*
942 			 * The minimum record size we will need is whatever's
943 			 * already in the buffer, plus the size of this
944 			 * fragment, plus (if this isn't the last fragment)
945 			 * space for at least one more fragment header. We
946 			 * use the RNDUP() macro to account for possible
947 			 * realignment of the next fragment header.
948 			 */
949 			minreqrecsize += minfraglen +
950 					(last_frag?0:sizeof (*header));
951 			minreqrecsize = RNDUP(minreqrecsize);
952 
953 			if (hdrlen < sizeof (*header)) {
954 				/*
955 				 * We only have a partial fragment header,
956 				 * but we can still put a lower limit on the
957 				 * final fragment size, and check against the
958 				 * maximum allowed.
959 				 */
960 				if (len_recvd_thisfrag > 0 &&
961 					(minreqrecsize > rstrm->in_maxrecsz)) {
962 					goto recsz_invalid;
963 				}
964 				/* Need more bytes to obtain fbtbc value */
965 				goto needpoll;
966 			}
967 			/*
968 			 * We've got a complete fragment header, so
969 			 * 'minfraglen' is the actual fragment length, and
970 			 * 'minreqrecsize' the requested record size.
971 			 */
972 			rstrm->last_frag = last_frag;
973 			rstrm->fbtbc = minfraglen;
974 			/*
975 			 * Check that the sum of the total number of bytes read
976 			 * so far (for the record) and the size of the incoming
977 			 * fragment is less than the maximum allowed.
978 			 *
979 			 * If this is the last fragment, also check that the
980 			 * record (message) meets the minimum length
981 			 * requirement.
982 			 *
983 			 * If this isn't the last fragment, check for a zero
984 			 * fragment length. Accepting such fragments would
985 			 * leave us open to an attack where the sender keeps
986 			 * the connection open indefinitely, without any
987 			 * progress, by occasionally sending a zero length
988 			 * fragment.
989 			 */
990 			if ((minreqrecsize > rstrm->in_maxrecsz) ||
991 			(rstrm->last_frag && minreqrecsize < MIN_FRAG) ||
992 			(!rstrm->last_frag && minfraglen == 0)) {
993 recsz_invalid:
994 				rstrm->fbtbc = 0;
995 				rstrm->last_frag = 1;
996 				*pstat = XPRT_DIED;
997 				return (FALSE);
998 			}
999 			/*
1000 			 * Make this fragment abut the previous one. If it's
1001 			 * the first fragment, just advance in_finger past
1002 			 * the header. This avoids buffer copying for the
1003 			 * usual case where there's one fragment per record.
1004 			 */
1005 			if (rstrm->in_finger == rstrm->in_base) {
1006 				rstrm->in_finger += sizeof (*header);
1007 			} else {
1008 				rstrm->in_boundry -= sizeof (*header);
1009 				(void) memmove(rstrm->in_finger,
1010 					rstrm->in_finger + sizeof (*header),
1011 					rstrm->in_boundry - rstrm->in_finger);
1012 			}
1013 			/* Consume the fragment header */
1014 			if (len_received > sizeof (*header)) {
1015 				len_received -= sizeof (*header);
1016 			} else {
1017 				len_received = 0;
1018 			}
1019 		}
1020 		/*
1021 		 * Consume whatever fragment bytes we have.
1022 		 * If we've received all bytes for this fragment, advance
1023 		 * in_finger to point to the start of the next fragment
1024 		 * header. Otherwise, make fbtbc tell how much is left in
1025 		 * in this fragment and advance finger to point to end of
1026 		 * fragment data.
1027 		 */
1028 		if (len_received >= rstrm->fbtbc) {
1029 			len_received -= rstrm->fbtbc;
1030 			rstrm->in_finger += rstrm->fbtbc;
1031 			rstrm->fbtbc = 0;
1032 		} else {
1033 			rstrm->fbtbc -= len_received;
1034 			rstrm->in_finger += len_received;
1035 			len_received = 0;
1036 		}
1037 		/*
1038 		 * If there's more data in the buffer, there are two
1039 		 * possibilities:
1040 		 *
1041 		 * (1)	This is the last fragment, so the extra data
1042 		 *	presumably belongs to the next record.
1043 		 *
1044 		 * (2)	Not the last fragment, so we'll start over
1045 		 *	from the top of the loop.
1046 		 */
1047 		if (len_received > 0 && rstrm->last_frag) {
1048 			rstrm->in_nextrec = rstrm->in_finger;
1049 			rstrm->in_nextrecsz = (uint32_t)(rstrm->in_boundry -
1050 							rstrm->in_nextrec);
1051 			len_received = 0;
1052 		}
1053 	}
1054 
1055 	/* Was this the last fragment, and have we read the entire record ? */
1056 	if (rstrm->last_frag && rstrm->fbtbc == 0) {
1057 		*pstat = XPRT_MOREREQS;
1058 		/*
1059 		 * We've been using both in_finger and fbtbc for our own
1060 		 * purposes. Now's the time to update them to be what
1061 		 * xdrrec_inline() expects. Set in_finger to point to the
1062 		 * start of data for this record, and fbtbc to the number
1063 		 * of bytes in the record.
1064 		 */
1065 		rstrm->fbtbc = (int)(rstrm->in_finger -
1066 				rstrm->in_base - sizeof (*header));
1067 		rstrm->in_finger = rstrm->in_base + sizeof (*header);
1068 		if (rstrm->in_nextrecsz == 0)
1069 			rstrm->in_nextrec = rstrm->in_base;
1070 		return (TRUE);
1071 	}
1072 needpoll:
1073 	/*
1074 	 * Need more bytes, so we set the needpoll flag, and go back to
1075 	 * the main RPC request loop. However, first we reallocate the
1076 	 * input buffer, if necessary.
1077 	 */
1078 	if (minreqrecsize > rstrm->recvsize) {
1079 		if (!__xdrrec_nonblock_realloc(rstrm, minreqrecsize)) {
1080 			rstrm->fbtbc = 0;
1081 			rstrm->last_frag = 1;
1082 			*pstat = XPRT_DIED;
1083 			return (FALSE);
1084 		}
1085 	}
1086 
1087 	rstrm->in_needpoll = TRUE;
1088 	*pstat = XPRT_MOREREQS;
1089 	return (FALSE);
1090 }
1091 
1092 int
1093 __is_xdrrec_first(XDR *xdrs)
1094 {
1095 	/* LINTED pointer cast */
1096 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
1097 	return ((rstrm->firsttime == TRUE) ? 1 : 0);
1098 }
1099 
1100 int
1101 __xdrrec_setfirst(XDR *xdrs)
1102 {
1103 	/* LINTED pointer cast */
1104 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
1105 
1106 	/*
1107 	 * Set rstrm->firsttime only if the input buffer is empty.
1108 	 * Otherwise, the first read from the network could skip
1109 	 * a poll.
1110 	 */
1111 	if (rstrm->in_finger == rstrm->in_boundry)
1112 		rstrm->firsttime = TRUE;
1113 	else
1114 		rstrm->firsttime = FALSE;
1115 	return (1);
1116 }
1117 
1118 int
1119 __xdrrec_resetfirst(XDR *xdrs)
1120 {
1121 	/* LINTED pointer cast */
1122 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
1123 
1124 	rstrm->firsttime = FALSE;
1125 	return (1);
1126 }
1127 
1128 
1129 static uint_t
1130 fix_buf_size(uint_t s)
1131 {
1132 	if (s < 100)
1133 		s = 4000;
1134 	return (RNDUP(s));
1135 }
1136 
1137 
1138 
1139 static bool_t
1140 xdrrec_control(XDR *xdrs, int request, void *info)
1141 {
1142 	/* LINTED pointer cast */
1143 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
1144 	xdr_bytesrec *xptr;
1145 
1146 	switch (request) {
1147 
1148 	case XDR_GET_BYTES_AVAIL:
1149 		/* Check if at end of fragment and not last fragment */
1150 		if ((rstrm->fbtbc == 0)	&& (!rstrm->last_frag))
1151 			if (!set_input_fragment(rstrm)) {
1152 				return (FALSE);
1153 			};
1154 
1155 		xptr = (xdr_bytesrec *)info;
1156 		xptr->xc_is_last_record = rstrm->last_frag;
1157 		xptr->xc_num_avail = rstrm->fbtbc;
1158 
1159 		return (TRUE);
1160 	default:
1161 		return (FALSE);
1162 
1163 	}
1164 
1165 }
1166 
1167 static struct xdr_ops *
1168 xdrrec_ops(void)
1169 {
1170 	static struct xdr_ops ops;
1171 	extern mutex_t	ops_lock;
1172 
1173 /* VARIABLES PROTECTED BY ops_lock: ops */
1174 
1175 	(void) mutex_lock(&ops_lock);
1176 	if (ops.x_getlong == NULL) {
1177 		ops.x_getlong = xdrrec_getlong;
1178 		ops.x_putlong = xdrrec_putlong;
1179 		ops.x_getbytes = xdrrec_getbytes;
1180 		ops.x_putbytes = xdrrec_putbytes;
1181 		ops.x_getpostn = xdrrec_getpos;
1182 		ops.x_setpostn = xdrrec_setpos;
1183 		ops.x_inline = xdrrec_inline;
1184 		ops.x_destroy = xdrrec_destroy;
1185 		ops.x_control = xdrrec_control;
1186 #if defined(_LP64)
1187 		ops.x_getint32 = xdrrec_getint32;
1188 		ops.x_putint32 = xdrrec_putint32;
1189 #endif
1190 	}
1191 	(void) mutex_unlock(&ops_lock);
1192 	return (&ops);
1193 }
1194