xref: /illumos-gate/usr/src/lib/libnsl/rpc/xdr_rec.c (revision 012e6ce759c490003aed29439cc47d3d73a99ad3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 /*
30  * Portions of this source code were derived from Berkeley
31  * 4.3 BSD under license from the Regents of the University of
32  * California.
33  */
34 
35 /*
36  * xdr_rec.c, Implements (TCP/IP based) XDR streams with a "record marking"
37  * layer above connection oriented transport layer (e.g. tcp) (for rpc's use).
38  *
39  *
40  * These routines interface XDRSTREAMS to a (tcp/ip) connection transport.
41  * There is a record marking layer between the xdr stream
42  * and the (tcp) cv transport level.  A record is composed on one or more
43  * record fragments.  A record fragment is a thirty-two bit header followed
44  * by n bytes of data, where n is contained in the header.  The header
45  * is represented as a htonl(ulong_t).  The order bit encodes
46  * whether or not the fragment is the last fragment of the record
47  * (1 => fragment is last, 0 => more fragments to follow.
48  * The other 31 bits encode the byte length of the fragment.
49  */
50 
51 #include "mt.h"
52 #include "rpc_mt.h"
53 #include <stdio.h>
54 #include <rpc/types.h>
55 #include <rpc/rpc.h>
56 #include <sys/types.h>
57 #include <syslog.h>
58 #include <memory.h>
59 #include <stdlib.h>
60 #include <unistd.h>
61 #include <inttypes.h>
62 #include <string.h>
63 
64 /*
65  * A record is composed of one or more record fragments.
66  * A record fragment is a four-byte header followed by zero to
67  * 2**32-1 bytes.  The header is treated as a long unsigned and is
68  * encode/decoded to the network via htonl/ntohl.  The low order 31 bits
69  * are a byte count of the fragment.  The highest order bit is a boolean:
70  * 1 => this fragment is the last fragment of the record,
71  * 0 => this fragment is followed by more fragment(s).
72  *
73  * The fragment/record machinery is not general;  it is constructed to
74  * meet the needs of xdr and rpc based on tcp.
75  */
76 
77 #define	LAST_FRAG (((uint32_t)1 << 31))
78 
79 /*
80  * Minimum fragment size is size of rpc callmsg over TCP:
81  * xid direction vers prog vers proc
82  *   cred flavor, cred length, cred
83  *   verf flavor, verf length, verf
84  *   (with no cred or verf allocated)
85  */
86 #define	MIN_FRAG	(10 * BYTES_PER_XDR_UNIT)
87 
88 typedef struct rec_strm {
89 	caddr_t tcp_handle;
90 	/*
91 	 * out-going bits
92 	 */
93 	int (*writeit)();
94 	caddr_t out_base;	/* output buffer (points to frag header) */
95 	caddr_t out_finger;	/* next output position */
96 	caddr_t out_boundry;	/* data cannot up to this address */
97 	uint32_t *frag_header;	/* beginning of current fragment */
98 	bool_t frag_sent;	/* true if buffer sent in middle of record */
99 	/*
100 	 * in-coming bits
101 	 */
102 	int (*readit)();
103 	caddr_t in_base;	/* input buffer */
104 	caddr_t in_finger;	/* location of next byte to be had */
105 	caddr_t in_boundry;	/* can read up to this location */
106 	int fbtbc;		/* fragment bytes to be consumed */
107 	bool_t last_frag;
108 	uint_t sendsize;
109 	uint_t recvsize;
110 	/*
111 	 * Is this the first time that the
112 	 * getbytes routine has been called ?
113 	 */
114 	uint_t firsttime;
115 	/*
116 	 * Is this non-blocked?
117 	 */
118 	uint_t in_nonblock;	/* non-blocked input */
119 	uint_t in_needpoll;	/* need to poll to get more data ? */
120 	uint32_t in_maxrecsz;	/* maximum record size */
121 	caddr_t in_nextrec;	/* start of next record */
122 	uint32_t in_nextrecsz;	/* part of next record in buffer */
123 } RECSTREAM;
124 
125 static uint_t	fix_buf_size(uint_t);
126 static struct	xdr_ops *xdrrec_ops(void);
127 static bool_t	xdrrec_getbytes(XDR *, caddr_t, int);
128 static bool_t	flush_out(RECSTREAM *, bool_t);
129 static bool_t	get_input_bytes(RECSTREAM *, caddr_t, int, bool_t);
130 static bool_t	set_input_fragment(RECSTREAM *);
131 static bool_t	skip_input_bytes(RECSTREAM *, int32_t);
132 
133 bool_t		__xdrrec_getbytes_nonblock(XDR *, enum xprt_stat *);
134 
135 /*
136  * Create an xdr handle for xdrrec
137  * xdrrec_create fills in xdrs.  Sendsize and recvsize are
138  * send and recv buffer sizes (0 => use default).
139  * vc_handle is an opaque handle that is passed as the first parameter to
140  * the procedures readit and writeit.  Readit and writeit are read and
141  * write respectively. They are like the system calls expect that they
142  * take an opaque handle rather than an fd.
143  */
144 
145 static const char mem_err_msg_rec[] = "xdrrec_create: out of memory";
146 
147 void
148 xdrrec_create(XDR *xdrs, const uint_t sendsize, const uint_t recvsize,
149     const caddr_t tcp_handle, int (*readit)(), int (*writeit)())
150 {
151 	RECSTREAM *rstrm = malloc(sizeof (RECSTREAM));
152 
153 	/*
154 	 * XXX: Should still rework xdrrec_create to return a handle,
155 	 * and in any malloc-failure case return NULL.
156 	 */
157 	if (rstrm == NULL) {
158 		(void) syslog(LOG_ERR, mem_err_msg_rec);
159 		return;
160 	}
161 	/*
162 	 * Adjust sizes and allocate buffers; malloc(3C)
163 	 * provides a buffer suitably aligned for any use, so
164 	 * there's no need for us to mess around with alignment.
165 	 *
166 	 * Since non-blocking connections may need to reallocate the input
167 	 * buffer, we use separate malloc()s for input and output.
168 	 */
169 	rstrm->sendsize = fix_buf_size(sendsize);
170 	rstrm->recvsize = fix_buf_size(recvsize);
171 	rstrm->out_base = malloc(rstrm->sendsize);
172 	if (rstrm->out_base == NULL) {
173 		(void) syslog(LOG_ERR, mem_err_msg_rec);
174 		free(rstrm);
175 		return;
176 	}
177 	rstrm->in_base = malloc(rstrm->recvsize);
178 	if (rstrm->in_base == NULL) {
179 		(void) syslog(LOG_ERR, mem_err_msg_rec);
180 		free(rstrm->out_base);
181 		free(rstrm);
182 		return;
183 	}
184 
185 	/*
186 	 * now the rest ...
187 	 */
188 
189 	xdrs->x_ops = xdrrec_ops();
190 	xdrs->x_private = (caddr_t)rstrm;
191 	rstrm->tcp_handle = tcp_handle;
192 	rstrm->readit = readit;
193 	rstrm->writeit = writeit;
194 	rstrm->out_finger = rstrm->out_boundry = rstrm->out_base;
195 	/* LINTED pointer cast */
196 	rstrm->frag_header = (uint32_t *)rstrm->out_base;
197 	rstrm->out_finger += sizeof (uint_t);
198 	rstrm->out_boundry += rstrm->sendsize;
199 	rstrm->frag_sent = FALSE;
200 	rstrm->in_boundry = rstrm->in_base;
201 	rstrm->in_finger = (rstrm->in_boundry += rstrm->recvsize);
202 	rstrm->fbtbc = 0;
203 	rstrm->last_frag = TRUE;
204 	rstrm->firsttime = 0;
205 	rstrm->in_nonblock = 0;
206 	rstrm->in_needpoll = 1;
207 	rstrm->in_maxrecsz = 0;
208 	rstrm->in_nextrec = rstrm->in_base;
209 	rstrm->in_nextrecsz = 0;
210 }
211 
212 /*
213  * Align input stream.  If all applications behaved correctly, this
214  * defensive procedure will not be necessary, since received data will be
215  * aligned correctly.
216  */
217 static void
218 align_instream(RECSTREAM *rstrm)
219 {
220 	int current = rstrm->in_boundry - rstrm->in_finger;
221 
222 	(void) memcpy(rstrm->in_base, rstrm->in_finger, current);
223 	rstrm->in_finger = rstrm->in_base;
224 	rstrm->in_boundry = rstrm->in_finger + current;
225 }
226 
227 /*
228  * The routines defined below are the xdr ops which will go into the
229  * xdr handle filled in by xdrrec_create.
230  */
231 static bool_t
232 xdrrec_getint32(XDR *xdrs, int32_t *ip)
233 {
234 	/* LINTED pointer cast */
235 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
236 	/* LINTED pointer cast */
237 	int32_t *buflp = (int32_t *)(rstrm->in_finger);
238 	int32_t mylong;
239 
240 	/* first try the inline, fast case */
241 	if ((rstrm->fbtbc >= (int)sizeof (int32_t)) &&
242 		((uint_t)(rstrm->in_boundry - (caddr_t)buflp) >=
243 					(uint_t)sizeof (int32_t))) {
244 		/*
245 		 * Check if buflp is longword aligned.  If not, align it.
246 		 */
247 		if (((uintptr_t)buflp) & ((int)sizeof (int32_t) - 1)) {
248 			align_instream(rstrm);
249 			/* LINTED pointer cast */
250 			buflp = (int32_t *)(rstrm->in_finger);
251 		}
252 		*ip = (int32_t)ntohl((uint32_t)(*buflp));
253 		rstrm->fbtbc -= (int)sizeof (int32_t);
254 		rstrm->in_finger += sizeof (int32_t);
255 	} else {
256 		if (!xdrrec_getbytes(xdrs, (caddr_t)&mylong, sizeof (int32_t)))
257 			return (FALSE);
258 		*ip = (int32_t)ntohl((uint32_t)mylong);
259 	}
260 	return (TRUE);
261 }
262 
263 static bool_t
264 xdrrec_putint32(XDR *xdrs, int32_t *ip)
265 {
266 	/* LINTED pointer cast */
267 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
268 	/* LINTED pointer cast */
269 	int32_t *dest_lp = ((int32_t *)(rstrm->out_finger));
270 
271 	if ((rstrm->out_finger += sizeof (int32_t)) > rstrm->out_boundry) {
272 		/*
273 		 * this case should almost never happen so the code is
274 		 * inefficient
275 		 */
276 		rstrm->out_finger -= sizeof (int32_t);
277 		rstrm->frag_sent = TRUE;
278 		if (!flush_out(rstrm, FALSE))
279 			return (FALSE);
280 		/* LINTED pointer cast */
281 		dest_lp = ((int32_t *)(rstrm->out_finger));
282 		rstrm->out_finger += sizeof (int32_t);
283 	}
284 	*dest_lp = (int32_t)htonl((uint32_t)(*ip));
285 	return (TRUE);
286 }
287 
288 static bool_t
289 xdrrec_getlong(XDR *xdrs, long *lp)
290 {
291 	int32_t i;
292 
293 	if (!xdrrec_getint32(xdrs, &i))
294 		return (FALSE);
295 	*lp = (long)i;
296 	return (TRUE);
297 }
298 
299 static bool_t
300 xdrrec_putlong(XDR *xdrs, long *lp)
301 {
302 	int32_t i;
303 
304 #if defined(_LP64)
305 	if ((*lp > INT32_MAX) || (*lp < INT32_MIN))
306 		return (FALSE);
307 #endif
308 
309 	i = (int32_t)*lp;
310 
311 	return (xdrrec_putint32(xdrs, &i));
312 }
313 
314 static bool_t	/* must manage buffers, fragments, and records */
315 xdrrec_getbytes(XDR *xdrs, caddr_t addr, int len)
316 {
317 	/* LINTED pointer cast */
318 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
319 	int current;
320 
321 	while (len > 0) {
322 		current = rstrm->fbtbc;
323 		if (current == 0) {
324 			if (rstrm->last_frag)
325 				return (FALSE);
326 			if (!set_input_fragment(rstrm))
327 				return (FALSE);
328 			continue;
329 		}
330 		current = (len < current) ? len : current;
331 		if (!get_input_bytes(rstrm, addr, current, FALSE))
332 			return (FALSE);
333 		addr += current;
334 		rstrm->fbtbc -= current;
335 		len -= current;
336 	}
337 	return (TRUE);
338 }
339 
340 static bool_t
341 xdrrec_putbytes(XDR *xdrs, caddr_t addr, int len)
342 {
343 	/* LINTED pointer cast */
344 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
345 	int current;
346 
347 	while (len > 0) {
348 
349 		current = (uintptr_t)rstrm->out_boundry -
350 			(uintptr_t)rstrm->out_finger;
351 		current = (len < current) ? len : current;
352 		(void) memcpy(rstrm->out_finger, addr, current);
353 		rstrm->out_finger += current;
354 		addr += current;
355 		len -= current;
356 		if (rstrm->out_finger == rstrm->out_boundry) {
357 			rstrm->frag_sent = TRUE;
358 			if (!flush_out(rstrm, FALSE))
359 				return (FALSE);
360 		}
361 	}
362 	return (TRUE);
363 }
364 /*
365  * This is just like the ops vector x_getbytes(), except that
366  * instead of returning success or failure on getting a certain number
367  * of bytes, it behaves much more like the read() system call against a
368  * pipe -- it returns up to the number of bytes requested and a return of
369  * zero indicates end-of-record.  A -1 means something very bad happened.
370  */
371 uint_t /* must manage buffers, fragments, and records */
372 xdrrec_readbytes(XDR *xdrs, caddr_t addr, uint_t l)
373 {
374 	/* LINTED pointer cast */
375 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
376 	int current, len;
377 
378 	len = l;
379 	while (len > 0) {
380 		current = rstrm->fbtbc;
381 		if (current == 0) {
382 			if (rstrm->last_frag)
383 				return (l - len);
384 			if (!set_input_fragment(rstrm))
385 				return ((uint_t)-1);
386 			continue;
387 		}
388 		current = (len < current) ? len : current;
389 		if (!get_input_bytes(rstrm, addr, current, FALSE))
390 			return ((uint_t)-1);
391 		addr += current;
392 		rstrm->fbtbc -= current;
393 		len -= current;
394 	}
395 	return (l - len);
396 }
397 
398 static uint_t
399 xdrrec_getpos(XDR *xdrs)
400 {
401 	/* LINTED pointer cast */
402 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
403 	int32_t pos;
404 
405 	pos = lseek((intptr_t)rstrm->tcp_handle, 0, 1);
406 	if (pos != -1)
407 		switch (xdrs->x_op) {
408 
409 		case XDR_ENCODE:
410 			pos += rstrm->out_finger - rstrm->out_base;
411 			break;
412 
413 		case XDR_DECODE:
414 			pos -= rstrm->in_boundry - rstrm->in_finger;
415 			break;
416 
417 		default:
418 			pos = (uint_t)-1;
419 			break;
420 		}
421 	return ((uint_t)pos);
422 }
423 
424 static bool_t
425 xdrrec_setpos(XDR *xdrs, uint_t pos)
426 {
427 	/* LINTED pointer cast */
428 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
429 	uint_t currpos = xdrrec_getpos(xdrs);
430 	int delta = currpos - pos;
431 	caddr_t newpos;
432 
433 	if ((int)currpos != -1)
434 		switch (xdrs->x_op) {
435 
436 		case XDR_ENCODE:
437 			newpos = rstrm->out_finger - delta;
438 			if ((newpos > (caddr_t)(rstrm->frag_header)) &&
439 				(newpos < rstrm->out_boundry)) {
440 				rstrm->out_finger = newpos;
441 				return (TRUE);
442 			}
443 			break;
444 
445 		case XDR_DECODE:
446 			newpos = rstrm->in_finger - delta;
447 			if ((delta < (int)(rstrm->fbtbc)) &&
448 				(newpos <= rstrm->in_boundry) &&
449 				(newpos >= rstrm->in_base)) {
450 				rstrm->in_finger = newpos;
451 				rstrm->fbtbc -= delta;
452 				return (TRUE);
453 			}
454 			break;
455 		}
456 	return (FALSE);
457 }
458 
459 static rpc_inline_t *
460 xdrrec_inline(XDR *xdrs, int len)
461 {
462 	/* LINTED pointer cast */
463 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
464 	rpc_inline_t *buf = NULL;
465 
466 	switch (xdrs->x_op) {
467 
468 	case XDR_ENCODE:
469 		if ((rstrm->out_finger + len) <= rstrm->out_boundry) {
470 			/* LINTED pointer cast */
471 			buf = (rpc_inline_t *)rstrm->out_finger;
472 			rstrm->out_finger += len;
473 		}
474 		break;
475 
476 	case XDR_DECODE:
477 		if ((len <= rstrm->fbtbc) &&
478 			((rstrm->in_finger + len) <= rstrm->in_boundry)) {
479 			/*
480 			 * Check if rstrm->in_finger is longword aligned;
481 			 * if not, align it.
482 			 */
483 			if (((intptr_t)rstrm->in_finger) &
484 			    (sizeof (int32_t) - 1))
485 				align_instream(rstrm);
486 			/* LINTED pointer cast */
487 			buf = (rpc_inline_t *)rstrm->in_finger;
488 			rstrm->fbtbc -= len;
489 			rstrm->in_finger += len;
490 		}
491 		break;
492 	}
493 	return (buf);
494 }
495 
496 static void
497 xdrrec_destroy(XDR *xdrs)
498 {
499 	/* LINTED pointer cast */
500 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
501 
502 	free(rstrm->out_base);
503 	free(rstrm->in_base);
504 	free(rstrm);
505 }
506 
507 
508 /*
509  * Exported routines to manage xdr records
510  */
511 
512 /*
513  * Before reading (deserializing) from the stream, one should always call
514  * this procedure to guarantee proper record alignment.
515  */
516 bool_t
517 xdrrec_skiprecord(XDR *xdrs)
518 {
519 	/* LINTED pointer cast */
520 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
521 
522 	if (rstrm->in_nonblock) {
523 		enum xprt_stat pstat;
524 		/*
525 		 * Read and discard a record from the non-blocking
526 		 * buffer. Return succes only if a complete record can
527 		 * be retrieved without blocking, or if the buffer was
528 		 * empty and there was no data to fetch.
529 		 */
530 		if (__xdrrec_getbytes_nonblock(xdrs, &pstat) ||
531 			(pstat == XPRT_MOREREQS &&
532 				rstrm->in_finger == rstrm->in_boundry)) {
533 			rstrm->fbtbc = 0;
534 			return (TRUE);
535 		}
536 		return (FALSE);
537 	}
538 	while (rstrm->fbtbc > 0 || (!rstrm->last_frag)) {
539 		if (!skip_input_bytes(rstrm, rstrm->fbtbc))
540 			return (FALSE);
541 		rstrm->fbtbc = 0;
542 		if ((!rstrm->last_frag) && (!set_input_fragment(rstrm)))
543 			return (FALSE);
544 	}
545 	rstrm->last_frag = FALSE;
546 	return (TRUE);
547 }
548 
549 /*
550  * Look ahead fuction.
551  * Returns TRUE iff there is no more input in the buffer
552  * after consuming the rest of the current record.
553  */
554 bool_t
555 xdrrec_eof(XDR *xdrs)
556 {
557 	/* LINTED pointer cast */
558 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
559 
560 	if (rstrm->in_nonblock) {
561 		/*
562 		 * If in_needpoll is true, the non-blocking XDR stream
563 		 * does not have a complete record.
564 		 */
565 		return (rstrm->in_needpoll);
566 	}
567 	while (rstrm->fbtbc > 0 || (!rstrm->last_frag)) {
568 		if (!skip_input_bytes(rstrm, rstrm->fbtbc))
569 			return (TRUE);
570 		rstrm->fbtbc = 0;
571 		if ((!rstrm->last_frag) && (!set_input_fragment(rstrm)))
572 			return (TRUE);
573 	}
574 	if (rstrm->in_finger == rstrm->in_boundry)
575 		return (TRUE);
576 	return (FALSE);
577 }
578 
579 /*
580  * The client must tell the package when an end-of-record has occurred.
581  * The second parameters tells whether the record should be flushed to the
582  * (output) tcp stream.  (This let's the package support batched or
583  * pipelined procedure calls.)  TRUE => immmediate flush to tcp connection.
584  */
585 bool_t
586 xdrrec_endofrecord(XDR *xdrs, bool_t sendnow)
587 {
588 	/* LINTED pointer cast */
589 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
590 	uint32_t len;	/* fragment length */
591 
592 	if (sendnow || rstrm->frag_sent ||
593 		((uintptr_t)rstrm->out_finger + sizeof (uint32_t) >=
594 		(uintptr_t)rstrm->out_boundry)) {
595 		rstrm->frag_sent = FALSE;
596 		return (flush_out(rstrm, TRUE));
597 	}
598 	len = (uintptr_t)(rstrm->out_finger) - (uintptr_t)(rstrm->frag_header) -
599 		sizeof (uint32_t);
600 	*(rstrm->frag_header) = htonl((uint32_t)len | LAST_FRAG);
601 	/* LINTED pointer cast */
602 	rstrm->frag_header = (uint32_t *)rstrm->out_finger;
603 	rstrm->out_finger += sizeof (uint32_t);
604 	return (TRUE);
605 }
606 
607 
608 /*
609  * Internal useful routines
610  */
611 static bool_t
612 flush_out(RECSTREAM *rstrm, bool_t eor)
613 {
614 	uint32_t eormask = (eor == TRUE) ? LAST_FRAG : 0;
615 	uint32_t len = (uintptr_t)(rstrm->out_finger) -
616 		(uintptr_t)(rstrm->frag_header) - sizeof (uint32_t);
617 	int written;
618 
619 	*(rstrm->frag_header) = htonl(len | eormask);
620 	len = (uintptr_t)(rstrm->out_finger) - (uintptr_t)(rstrm->out_base);
621 
622 	written = (*(rstrm->writeit))
623 	    (rstrm->tcp_handle, rstrm->out_base, (int)len);
624 	/*
625 	 * Handle the specific 'CANT_STORE' error. In this case, the
626 	 * fragment must be cleared.
627 	 */
628 	if ((written != (int)len) && (written != -2))
629 		return (FALSE);
630 	/* LINTED pointer cast */
631 	rstrm->frag_header = (uint32_t *)rstrm->out_base;
632 	rstrm->out_finger = (caddr_t)rstrm->out_base + sizeof (uint32_t);
633 
634 	return (TRUE);
635 }
636 
637 /* knows nothing about records!  Only about input buffers */
638 static bool_t
639 fill_input_buf(RECSTREAM *rstrm, bool_t do_align)
640 {
641 	caddr_t where;
642 	int len;
643 
644 	if (rstrm->in_nonblock) {
645 		/* Should never get here in the non-blocking case */
646 		return (FALSE);
647 	}
648 	where = rstrm->in_base;
649 	if (do_align) {
650 		len = rstrm->recvsize;
651 	} else {
652 		uint_t i = (uintptr_t)rstrm->in_boundry % BYTES_PER_XDR_UNIT;
653 
654 		where += i;
655 		len = rstrm->recvsize - i;
656 	}
657 	if ((len = (*(rstrm->readit))(rstrm->tcp_handle, where, len)) == -1)
658 		return (FALSE);
659 	rstrm->in_finger = where;
660 	where += len;
661 	rstrm->in_boundry = where;
662 	return (TRUE);
663 }
664 
665 /* knows nothing about records!  Only about input buffers */
666 static bool_t
667 get_input_bytes(RECSTREAM *rstrm, caddr_t addr,
668 		int len, bool_t do_align)
669 {
670 	int current;
671 
672 	if (rstrm->in_nonblock) {
673 		/*
674 		 * Data should already be in the rstrm buffer, so we just
675 		 * need to copy it to 'addr'.
676 		 */
677 		current = (int)(rstrm->in_boundry - rstrm->in_finger);
678 		if (len > current)
679 			return (FALSE);
680 		(void) memcpy(addr, rstrm->in_finger, len);
681 		rstrm->in_finger += len;
682 		addr += len;
683 		return (TRUE);
684 	}
685 
686 	while (len > 0) {
687 		current = (intptr_t)rstrm->in_boundry -
688 			(intptr_t)rstrm->in_finger;
689 		if (current == 0) {
690 			if (!fill_input_buf(rstrm, do_align))
691 				return (FALSE);
692 			continue;
693 		}
694 		current = (len < current) ? len : current;
695 		(void) memcpy(addr, rstrm->in_finger, current);
696 		rstrm->in_finger += current;
697 		addr += current;
698 		len -= current;
699 		do_align = FALSE;
700 	}
701 	return (TRUE);
702 }
703 
704 /* next four bytes of the input stream are treated as a header */
705 static bool_t
706 set_input_fragment(RECSTREAM *rstrm)
707 {
708 	uint32_t header;
709 
710 	if (rstrm->in_nonblock) {
711 		/*
712 		 * In the non-blocking case, the fragment headers should
713 		 * already have been consumed, so we should never get
714 		 * here. Might as well return failure right away.
715 		 */
716 		return (FALSE);
717 	}
718 	if (!get_input_bytes(rstrm, (caddr_t)&header, (int)sizeof (header),
719 							rstrm->last_frag))
720 		return (FALSE);
721 	header = (uint32_t)ntohl(header);
722 	rstrm->last_frag = ((header & LAST_FRAG) == 0) ? FALSE : TRUE;
723 	rstrm->fbtbc = header & (~LAST_FRAG);
724 	return (TRUE);
725 }
726 
727 /* consumes input bytes; knows nothing about records! */
728 static bool_t
729 skip_input_bytes(RECSTREAM *rstrm, int32_t cnt)
730 {
731 	int current;
732 
733 	while (cnt > 0) {
734 		current = (intptr_t)rstrm->in_boundry -
735 			(intptr_t)rstrm->in_finger;
736 		if (current == 0) {
737 			if (!fill_input_buf(rstrm, FALSE))
738 				return (FALSE);
739 			continue;
740 		}
741 		current = (cnt < current) ? cnt : current;
742 		rstrm->in_finger += current;
743 		cnt -= current;
744 	}
745 	return (TRUE);
746 }
747 
748 
749 static bool_t
750 __xdrrec_nonblock_realloc(RECSTREAM *rstrm, uint32_t newsize)
751 {
752 	caddr_t newbuf = rstrm->in_base;
753 	ptrdiff_t offset;
754 	bool_t ret = TRUE;
755 
756 	if (newsize > rstrm->recvsize) {
757 		newbuf = (caddr_t)realloc(newbuf, newsize);
758 		if (newbuf == 0) {
759 			ret = FALSE;
760 		} else {
761 			/* Make pointers valid for the new buffer */
762 			offset = newbuf - rstrm->in_base;
763 			rstrm->in_finger += offset;
764 			rstrm->in_boundry += offset;
765 			rstrm->in_nextrec += offset;
766 			rstrm->in_base = newbuf;
767 			rstrm->recvsize = newsize;
768 		}
769 	}
770 
771 	return (ret);
772 }
773 
774 /*
775  * adjust sizes and allocate buffer quad byte aligned
776  */
777 bool_t
778 __xdrrec_set_conn_nonblock(XDR *xdrs, uint32_t tcp_maxrecsz)
779 {
780 	/* LINTED pointer cast */
781 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
782 	size_t newsize;
783 
784 	rstrm->in_nonblock = TRUE;
785 	if (tcp_maxrecsz == 0) {
786 		/*
787 		 * If maxrecsz has not been set, use the default
788 		 * that was set from xdrrec_create() and
789 		 * fix_buf_size()
790 		 */
791 		rstrm->in_maxrecsz = rstrm->recvsize;
792 		return (TRUE);
793 	}
794 	rstrm->in_maxrecsz = tcp_maxrecsz;
795 	if (tcp_maxrecsz <= rstrm->recvsize)
796 		return (TRUE);
797 
798 	/*
799 	 * For nonblocked connection, the entire record is read into the
800 	 * buffer before any xdr processing. This implies that the record
801 	 * size must allow for the maximum expected message size of the
802 	 * service. However, it's inconvenient to allocate very large
803 	 * buffers up front, so we limit ourselves to a reasonable
804 	 * default size here, and reallocate (up to the maximum record
805 	 * size allowed for the connection) as necessary.
806 	 */
807 	if ((newsize = tcp_maxrecsz) > RPC_MAXDATASIZE) {
808 		newsize = RPC_MAXDATASIZE;
809 	}
810 	if (!__xdrrec_nonblock_realloc(rstrm, newsize)) {
811 		(void) syslog(LOG_ERR, mem_err_msg_rec);
812 		free(rstrm->out_base);
813 		free(rstrm->in_base);
814 		free(rstrm);
815 		return (FALSE);
816 	}
817 
818 	return (TRUE);
819 }
820 
821 /*
822  * Retrieve input data from the non-blocking connection, increase
823  * the size of the read buffer if necessary, and check that the
824  * record size stays below the allowed maximum for the connection.
825  */
826 bool_t
827 __xdrrec_getbytes_nonblock(XDR *xdrs, enum xprt_stat *pstat)
828 {
829 	/* LINTED pointer cast */
830 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
831 	uint32_t prevbytes_thisrec, minreqrecsize;
832 	uint32_t *header;
833 	int32_t len_received = 0;
834 	uint32_t unprocessed = 0;
835 
836 	/*
837 	 * For connection oriented protocols, there's no guarantee that
838 	 * we will receive the data nicely chopped into records, no
839 	 * matter how it was sent. We use the in_nextrec pointer to
840 	 * indicate where in the buffer the next record starts. If
841 	 * in_nextrec != in_base, there's data in the buffer from
842 	 * previous reads, and if in_nextrecsz > 0, we need to copy
843 	 * the portion of the next record already read to the start of
844 	 * the input buffer
845 	 */
846 	if (rstrm->in_nextrecsz > 0) {
847 		/* Starting on new record with data already in the buffer */
848 		(void) memmove(rstrm->in_base, rstrm->in_nextrec,
849 			rstrm->in_nextrecsz);
850 		rstrm->in_nextrec = rstrm->in_finger = rstrm->in_base;
851 		rstrm->in_boundry = rstrm->in_nextrec + rstrm->in_nextrecsz;
852 		unprocessed = rstrm->in_nextrecsz;
853 		rstrm->in_nextrecsz = 0;
854 	} else if (rstrm->in_nextrec == rstrm->in_base) {
855 		/* Starting on new record with empty buffer */
856 		rstrm->in_boundry = rstrm->in_finger = rstrm->in_base;
857 		rstrm->last_frag = FALSE;
858 		rstrm->in_needpoll = TRUE;
859 	}
860 
861 	prevbytes_thisrec = (uint32_t)(rstrm->in_boundry - rstrm->in_base);
862 
863 	/* Do we need to retrieve data ? */
864 	if (rstrm->in_needpoll) {
865 		int len_requested, len_total_received;
866 
867 		rstrm->in_needpoll = FALSE;
868 		len_total_received =
869 			(int)(rstrm->in_boundry - rstrm->in_base);
870 		len_requested = rstrm->recvsize - len_total_received;
871 		/*
872 		 * if len_requested is 0, this means that the input
873 		 * buffer is full and need to be increased.
874 		 * The minimum record size we will need is whatever's
875 		 * already in the buffer, plus what's yet to be
876 		 * consumed in the current fragment, plus space for at
877 		 * least one more fragment header, if this is not the
878 		 * last fragment. We use the RNDUP() macro to
879 		 * account for possible realignment of the next
880 		 * fragment header.
881 		 */
882 		if (len_requested == 0) {
883 			minreqrecsize = rstrm->recvsize +
884 			    rstrm->fbtbc +
885 			    (rstrm->last_frag ? 0 : sizeof (*header));
886 			minreqrecsize = RNDUP(minreqrecsize);
887 			if (minreqrecsize == rstrm->recvsize) {
888 				/*
889 				 * no more bytes to be consumed and
890 				 * last fragment. We should never end up
891 				 * here. Might as well return failure
892 				 * right away.
893 				 */
894 				*pstat = XPRT_DIED;
895 				return (FALSE);
896 			}
897 			if (minreqrecsize > rstrm->in_maxrecsz)
898 				goto recsz_invalid;
899 			else
900 				goto needpoll;
901 		}
902 		if ((len_received = (*(rstrm->readit))(rstrm->tcp_handle,
903 				rstrm->in_boundry, len_requested)) == -1) {
904 			*pstat = XPRT_DIED;
905 			return (FALSE);
906 		}
907 		rstrm->in_boundry += len_received;
908 		rstrm->in_nextrec = rstrm->in_boundry;
909 	}
910 
911 	/* Account for any left over data from previous processing */
912 	len_received += unprocessed;
913 
914 	/* Set a lower limit on the buffer space we'll need */
915 	minreqrecsize = prevbytes_thisrec + rstrm->fbtbc;
916 
917 	/*
918 	 * Consume bytes for this record until it's either complete,
919 	 * rejected, or we need to poll for more bytes.
920 	 *
921 	 * If fbtbc == 0, in_finger points to the start of the fragment
922 	 * header. Otherwise, it points to the start of the fragment data.
923 	 */
924 	while (len_received > 0) {
925 		if (rstrm->fbtbc == 0) {
926 			uint32_t hdrlen, minfraglen = 0;
927 			uint32_t len_recvd_thisfrag;
928 			bool_t last_frag;
929 
930 			len_recvd_thisfrag = (uint32_t)(rstrm->in_boundry -
931 						rstrm->in_finger);
932 			/* LINTED pointer cast */
933 			header = (uint32_t *)rstrm->in_finger;
934 			hdrlen = (len_recvd_thisfrag < sizeof (*header)) ?
935 				len_recvd_thisfrag : sizeof (*header);
936 			(void) memcpy(&minfraglen, header, hdrlen);
937 			last_frag = (ntohl(minfraglen) & LAST_FRAG) != 0;
938 			minfraglen = ntohl(minfraglen) & (~LAST_FRAG);
939 			/*
940 			 * The minimum record size we will need is whatever's
941 			 * already in the buffer, plus the size of this
942 			 * fragment, plus (if this isn't the last fragment)
943 			 * space for at least one more fragment header. We
944 			 * use the RNDUP() macro to account for possible
945 			 * realignment of the next fragment header.
946 			 */
947 			minreqrecsize += minfraglen +
948 					(last_frag?0:sizeof (*header));
949 			minreqrecsize = RNDUP(minreqrecsize);
950 
951 			if (hdrlen < sizeof (*header)) {
952 				/*
953 				 * We only have a partial fragment header,
954 				 * but we can still put a lower limit on the
955 				 * final fragment size, and check against the
956 				 * maximum allowed.
957 				 */
958 				if (len_recvd_thisfrag > 0 &&
959 					(minreqrecsize > rstrm->in_maxrecsz)) {
960 					goto recsz_invalid;
961 				}
962 				/* Need more bytes to obtain fbtbc value */
963 				goto needpoll;
964 			}
965 			/*
966 			 * We've got a complete fragment header, so
967 			 * 'minfraglen' is the actual fragment length, and
968 			 * 'minreqrecsize' the requested record size.
969 			 */
970 			rstrm->last_frag = last_frag;
971 			rstrm->fbtbc = minfraglen;
972 			/*
973 			 * Check that the sum of the total number of bytes read
974 			 * so far (for the record) and the size of the incoming
975 			 * fragment is less than the maximum allowed.
976 			 *
977 			 * If this is the last fragment, also check that the
978 			 * record (message) meets the minimum length
979 			 * requirement.
980 			 *
981 			 * If this isn't the last fragment, check for a zero
982 			 * fragment length. Accepting such fragments would
983 			 * leave us open to an attack where the sender keeps
984 			 * the connection open indefinitely, without any
985 			 * progress, by occasionally sending a zero length
986 			 * fragment.
987 			 */
988 			if ((minreqrecsize > rstrm->in_maxrecsz) ||
989 			(rstrm->last_frag && minreqrecsize < MIN_FRAG) ||
990 			(!rstrm->last_frag && minfraglen == 0)) {
991 recsz_invalid:
992 				rstrm->fbtbc = 0;
993 				rstrm->last_frag = 1;
994 				*pstat = XPRT_DIED;
995 				return (FALSE);
996 			}
997 			/*
998 			 * Make this fragment abut the previous one. If it's
999 			 * the first fragment, just advance in_finger past
1000 			 * the header. This avoids buffer copying for the
1001 			 * usual case where there's one fragment per record.
1002 			 */
1003 			if (rstrm->in_finger == rstrm->in_base) {
1004 				rstrm->in_finger += sizeof (*header);
1005 			} else {
1006 				rstrm->in_boundry -= sizeof (*header);
1007 				(void) memmove(rstrm->in_finger,
1008 					rstrm->in_finger + sizeof (*header),
1009 					rstrm->in_boundry - rstrm->in_finger);
1010 			}
1011 			/* Consume the fragment header */
1012 			if (len_received > sizeof (*header)) {
1013 				len_received -= sizeof (*header);
1014 			} else {
1015 				len_received = 0;
1016 			}
1017 		}
1018 		/*
1019 		 * Consume whatever fragment bytes we have.
1020 		 * If we've received all bytes for this fragment, advance
1021 		 * in_finger to point to the start of the next fragment
1022 		 * header. Otherwise, make fbtbc tell how much is left in
1023 		 * in this fragment and advance finger to point to end of
1024 		 * fragment data.
1025 		 */
1026 		if (len_received >= rstrm->fbtbc) {
1027 			len_received -= rstrm->fbtbc;
1028 			rstrm->in_finger += rstrm->fbtbc;
1029 			rstrm->fbtbc = 0;
1030 		} else {
1031 			rstrm->fbtbc -= len_received;
1032 			rstrm->in_finger += len_received;
1033 			len_received = 0;
1034 		}
1035 		/*
1036 		 * If there's more data in the buffer, there are two
1037 		 * possibilities:
1038 		 *
1039 		 * (1)	This is the last fragment, so the extra data
1040 		 *	presumably belongs to the next record.
1041 		 *
1042 		 * (2)	Not the last fragment, so we'll start over
1043 		 *	from the top of the loop.
1044 		 */
1045 		if (len_received > 0 && rstrm->last_frag) {
1046 			rstrm->in_nextrec = rstrm->in_finger;
1047 			rstrm->in_nextrecsz = (uint32_t)(rstrm->in_boundry -
1048 							rstrm->in_nextrec);
1049 			len_received = 0;
1050 		}
1051 	}
1052 
1053 	/* Was this the last fragment, and have we read the entire record ? */
1054 	if (rstrm->last_frag && rstrm->fbtbc == 0) {
1055 		*pstat = XPRT_MOREREQS;
1056 		/*
1057 		 * We've been using both in_finger and fbtbc for our own
1058 		 * purposes. Now's the time to update them to be what
1059 		 * xdrrec_inline() expects. Set in_finger to point to the
1060 		 * start of data for this record, and fbtbc to the number
1061 		 * of bytes in the record.
1062 		 */
1063 		rstrm->fbtbc = (int)(rstrm->in_finger -
1064 				rstrm->in_base - sizeof (*header));
1065 		rstrm->in_finger = rstrm->in_base + sizeof (*header);
1066 		if (rstrm->in_nextrecsz == 0)
1067 			rstrm->in_nextrec = rstrm->in_base;
1068 		return (TRUE);
1069 	}
1070 needpoll:
1071 	/*
1072 	 * Need more bytes, so we set the needpoll flag, and go back to
1073 	 * the main RPC request loop. However, first we reallocate the
1074 	 * input buffer, if necessary.
1075 	 */
1076 	if (minreqrecsize > rstrm->recvsize) {
1077 		if (!__xdrrec_nonblock_realloc(rstrm, minreqrecsize)) {
1078 			rstrm->fbtbc = 0;
1079 			rstrm->last_frag = 1;
1080 			*pstat = XPRT_DIED;
1081 			return (FALSE);
1082 		}
1083 	}
1084 
1085 	rstrm->in_needpoll = TRUE;
1086 	*pstat = XPRT_MOREREQS;
1087 	return (FALSE);
1088 }
1089 
1090 int
1091 __is_xdrrec_first(XDR *xdrs)
1092 {
1093 	/* LINTED pointer cast */
1094 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
1095 	return ((rstrm->firsttime == TRUE) ? 1 : 0);
1096 }
1097 
1098 int
1099 __xdrrec_setfirst(XDR *xdrs)
1100 {
1101 	/* LINTED pointer cast */
1102 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
1103 
1104 	/*
1105 	 * Set rstrm->firsttime only if the input buffer is empty.
1106 	 * Otherwise, the first read from the network could skip
1107 	 * a poll.
1108 	 */
1109 	if (rstrm->in_finger == rstrm->in_boundry)
1110 		rstrm->firsttime = TRUE;
1111 	else
1112 		rstrm->firsttime = FALSE;
1113 	return (1);
1114 }
1115 
1116 int
1117 __xdrrec_resetfirst(XDR *xdrs)
1118 {
1119 	/* LINTED pointer cast */
1120 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
1121 
1122 	rstrm->firsttime = FALSE;
1123 	return (1);
1124 }
1125 
1126 
1127 static uint_t
1128 fix_buf_size(uint_t s)
1129 {
1130 	if (s < 100)
1131 		s = 4000;
1132 	return (RNDUP(s));
1133 }
1134 
1135 
1136 
1137 static bool_t
1138 xdrrec_control(XDR *xdrs, int request, void *info)
1139 {
1140 	/* LINTED pointer cast */
1141 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
1142 	xdr_bytesrec *xptr;
1143 
1144 	switch (request) {
1145 
1146 	case XDR_GET_BYTES_AVAIL:
1147 		/* Check if at end of fragment and not last fragment */
1148 		if ((rstrm->fbtbc == 0)	&& (!rstrm->last_frag))
1149 			if (!set_input_fragment(rstrm)) {
1150 				return (FALSE);
1151 			};
1152 
1153 		xptr = (xdr_bytesrec *)info;
1154 		xptr->xc_is_last_record = rstrm->last_frag;
1155 		xptr->xc_num_avail = rstrm->fbtbc;
1156 
1157 		return (TRUE);
1158 	default:
1159 		return (FALSE);
1160 
1161 	}
1162 
1163 }
1164 
1165 static struct xdr_ops *
1166 xdrrec_ops(void)
1167 {
1168 	static struct xdr_ops ops;
1169 	extern mutex_t	ops_lock;
1170 
1171 /* VARIABLES PROTECTED BY ops_lock: ops */
1172 
1173 	(void) mutex_lock(&ops_lock);
1174 	if (ops.x_getlong == NULL) {
1175 		ops.x_getlong = xdrrec_getlong;
1176 		ops.x_putlong = xdrrec_putlong;
1177 		ops.x_getbytes = xdrrec_getbytes;
1178 		ops.x_putbytes = xdrrec_putbytes;
1179 		ops.x_getpostn = xdrrec_getpos;
1180 		ops.x_setpostn = xdrrec_setpos;
1181 		ops.x_inline = xdrrec_inline;
1182 		ops.x_destroy = xdrrec_destroy;
1183 		ops.x_control = xdrrec_control;
1184 #if defined(_LP64)
1185 		ops.x_getint32 = xdrrec_getint32;
1186 		ops.x_putint32 = xdrrec_putint32;
1187 #endif
1188 	}
1189 	(void) mutex_unlock(&ops_lock);
1190 	return (&ops);
1191 }
1192