xref: /illumos-gate/usr/src/stand/lib/fs/nfs/xdr_rec.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 
30 /*
31  * Portions of this source code were derived from Berkeley 4.3 BSD
32  * under license from the Regents of the University of California.
33  */
34 
35 #pragma ident	"%Z%%M%	%I%	%E% SMI"
36 
37 /*
38  * xdr_rec.c, Implements TCP/IP based XDR streams with a "record marking"
39  * layer above tcp (for rpc's use).
40  *
41  * These routines interface XDRSTREAMS to a tcp/ip connection.
42  * There is a record marking layer between the xdr stream
43  * and the tcp transport level.  A record is composed on one or more
44  * record fragments.  A record fragment is a thirty-two bit header followed
45  * by n bytes of data, where n is contained in the header.  The header
46  * is represented as a htonl(u_long).  The high order bit encodes
47  * whether or not the fragment is the last fragment of the record
48  * (1 => fragment is last, 0 => more fragments to follow.
49  * The other 31 bits encode the byte length of the fragment.
50  */
51 
52 #include <rpc/types.h>
53 #include <rpc/xdr.h>
54 #include <netinet/in.h>
55 #include <sys/promif.h>
56 #include <sys/salib.h>
57 #include <sys/bootdebug.h>
58 
59 #define	dprintf if (boothowto & RB_DEBUG) printf
60 
61 extern long	lseek();
62 
63 static bool_t	xdrrec_getint32();
64 static bool_t	xdrrec_putint32();
65 static bool_t	xdrrec_getbytes();
66 static bool_t	xdrrec_putbytes();
67 static uint_t	xdrrec_getpos();
68 static bool_t	xdrrec_setpos();
69 static int32_t *xdrrec_inline();
70 static void	xdrrec_destroy();
71 
72 static struct xdr_ops *xdrrec_ops();
73 static bool_t flush_out();
74 static bool_t fill_input_buf();
75 static bool_t get_input_bytes();
76 static bool_t set_input_fragment();
77 static bool_t skip_input_bytes();
78 static uint_t fix_buf_size();
79 
80 /*
81  * A record is composed of one or more record fragments.
82  * A record fragment is a four-byte header followed by zero to
83  * 2**32-1 bytes.  The header is treated as a long unsigned and is
84  * encode/decoded to the network via htonl/ntohl.  The low order 31 bits
85  * are a byte count of the fragment.  The highest order bit is a boolean:
86  * 1 => this fragment is the last fragment of the record,
87  * 0 => this fragment is followed by more fragment(s).
88  *
89  * The fragment/record machinery is not general;  it is constructed to
90  * meet the needs of xdr and rpc based on tcp.
91  */
92 #define	LAST_FRAG 0x80000000
93 
94 typedef struct rec_strm {
95 	caddr_t tcp_handle;
96 	caddr_t the_buffer;
97 	/*
98 	 * out-goung bits
99 	 */
100 	int (*writeit)();
101 	caddr_t out_base;	/* output buffer (points to frag header) */
102 	caddr_t out_finger;	/* next output position */
103 	caddr_t out_boundry;	/* data cannot up to this address */
104 	uint32_t *frag_header;	/* beginning of current fragment */
105 	bool_t frag_sent;	/* true if buffer sent in middle of record */
106 	/*
107 	 * in-coming bits
108 	 */
109 	int (*readit)();
110 	uint32_t in_size;	/* fixed size of the input buffer */
111 	caddr_t in_base;
112 	caddr_t in_finger;	/* location of next byte to be had */
113 	caddr_t in_boundry;	/* can read up to this location */
114 	int fbtbc;		/* fragment bytes to be consumed */
115 	bool_t last_frag;
116 	uint_t sendsize;
117 	uint_t recvsize;
118 } RECSTREAM;
119 
120 
121 /*
122  * Create an xdr handle for xdrrec
123  * xdrrec_create fills in xdrs.  Sendsize and recvsize are
124  * send and recv buffer sizes (0 => use default).
125  * tcp_handle is an opaque handle that is passed as the first parameter to
126  * the procedures readit and writeit.  Readit and writeit are read and
127  * write respectively.   They are like the system
128  * calls expect that they take an opaque handle rather than an fd.
129  */
130 void
131 xdrrec_create(XDR *xdrs, uint_t sendsize, uint_t recvsize, caddr_t tcp_handle,
132 		int (*readit)(), int (*writeit)())
133 {
134 	RECSTREAM *rstrm = (RECSTREAM *)mem_alloc(sizeof (RECSTREAM));
135 	if (rstrm == NULL) {
136 		dprintf("xdrrec_create: out of memory\n");
137 		/*
138 		 *  This is bad.  Should rework xdrrec_create to
139 		 *  return a handle, and in this case return NULL
140 		 */
141 		return;
142 	}
143 	/*
144 	 * adjust sizes and allocate buffer quad byte aligned
145 	 */
146 	rstrm->sendsize = sendsize = fix_buf_size(sendsize);
147 	rstrm->recvsize = recvsize = fix_buf_size(recvsize);
148 	rstrm->the_buffer = mem_alloc(sendsize + recvsize + BYTES_PER_XDR_UNIT);
149 	if (rstrm->the_buffer == NULL) {
150 		dprintf("xdrrec_create: out of memory\n");
151 		return;
152 	}
153 	for (rstrm->out_base = rstrm->the_buffer;
154 		(uintptr_t)rstrm->out_base % BYTES_PER_XDR_UNIT != 0;
155 		rstrm->out_base++);
156 	rstrm->in_base = rstrm->out_base + sendsize;
157 	/*
158 	 * now the rest ...
159 	 */
160 	xdrs->x_ops = xdrrec_ops();
161 	xdrs->x_private = (caddr_t)rstrm;
162 	rstrm->tcp_handle = tcp_handle;
163 	rstrm->readit = readit;
164 	rstrm->writeit = writeit;
165 	rstrm->out_finger = rstrm->out_boundry = rstrm->out_base;
166 	rstrm->frag_header = (uint32_t *)rstrm->out_base;
167 	rstrm->out_finger += sizeof (uint_t);
168 	rstrm->out_boundry += sendsize;
169 	rstrm->frag_sent = FALSE;
170 	rstrm->in_size = recvsize;
171 	rstrm->in_boundry = rstrm->in_base;
172 	rstrm->in_finger = (rstrm->in_boundry += recvsize);
173 	rstrm->fbtbc = 0;
174 	rstrm->last_frag = TRUE;
175 
176 }
177 
178 
179 /*
180  * The routines defined below are the xdr ops which will go into the
181  * xdr handle filled in by xdrrec_create.
182  */
183 
184 static bool_t
185 xdrrec_getint32(XDR *xdrs, int32_t *ip)
186 {
187 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
188 	int32_t *bufip = (int32_t *)(rstrm->in_finger);
189 	int32_t myint;
190 
191 	/* first try the inline, fast case */
192 	if ((rstrm->fbtbc >= sizeof (int32_t)) &&
193 		(((ptrdiff_t)rstrm->in_boundry
194 		    - (ptrdiff_t)bufip) >= sizeof (int32_t))) {
195 		*ip = (int32_t)ntohl((uint32_t)(*bufip));
196 		rstrm->fbtbc -= sizeof (int32_t);
197 		rstrm->in_finger += sizeof (int32_t);
198 	} else {
199 		if (!xdrrec_getbytes(xdrs, (caddr_t)&myint, sizeof (int32_t)))
200 			return (FALSE);
201 		*ip = (int32_t)ntohl((uint32_t)myint);
202 	}
203 	return (TRUE);
204 }
205 
206 static bool_t
207 xdrrec_putint32(XDR *xdrs, int32_t *ip)
208 {
209 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
210 	int32_t *dest_ip = ((int32_t *)(rstrm->out_finger));
211 
212 	if ((rstrm->out_finger += sizeof (int32_t)) > rstrm->out_boundry) {
213 		/*
214 		 * this case should almost never happen so the code is
215 		 * inefficient
216 		 */
217 		rstrm->out_finger -= sizeof (int32_t);
218 		rstrm->frag_sent = TRUE;
219 		if (! flush_out(rstrm, FALSE))
220 			return (FALSE);
221 		dest_ip = ((int32_t *)(rstrm->out_finger));
222 		rstrm->out_finger += sizeof (int32_t);
223 	}
224 	*dest_ip = (int32_t)htonl((uint32_t)(*ip));
225 	return (TRUE);
226 }
227 
228 /*
229  * We need to be a little smarter here because we don't want to induce any
230  * pathological behavior in inetboot's networking stack.  The algorithm we
231  * pursue is to try to consume the entire fragment exactly instead of
232  * blindly requesting the max to fill the input buffer.
233  */
234 static bool_t  /* must manage buffers, fragments, and records */
235 xdrrec_getbytes(XDR *xdrs, caddr_t addr, int32_t len)
236 {
237 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
238 	int current;
239 	int frag_len;
240 
241 	while (len > 0) {
242 		current =  frag_len = rstrm->fbtbc;
243 		if (current == 0) {
244 			if (rstrm->last_frag)
245 				return (FALSE);
246 			if (!set_input_fragment(rstrm))
247 				return (FALSE);
248 			continue;
249 		}
250 
251 		current = (len < current) ? len : current;
252 		if (!get_input_bytes(rstrm, addr, frag_len, current))
253 			return (FALSE);
254 		addr += current;
255 		rstrm->fbtbc -= current;
256 		len -= current;
257 	}
258 	return (TRUE);
259 }
260 
261 static bool_t
262 xdrrec_putbytes(XDR *xdrs, caddr_t addr, int32_t len)
263 {
264 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
265 	ptrdiff_t current;
266 
267 	while (len > 0) {
268 		current = rstrm->out_boundry - rstrm->out_finger;
269 		current = (len < current) ? len : current;
270 		bcopy(addr, rstrm->out_finger, current);
271 		rstrm->out_finger += current;
272 		addr += current;
273 		len -= current;
274 		if (rstrm->out_finger == rstrm->out_boundry) {
275 			rstrm->frag_sent = TRUE;
276 			if (! flush_out(rstrm, FALSE))
277 				return (FALSE);
278 		}
279 	}
280 	return (TRUE);
281 }
282 
283 static uint_t
284 xdrrec_getpos(XDR *xdrs)
285 {
286 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
287 	int32_t pos;
288 
289 	pos = lseek((int)(intptr_t)rstrm->tcp_handle, 0, 1);
290 	if (pos != -1)
291 		switch (xdrs->x_op) {
292 
293 		case XDR_ENCODE:
294 			pos += rstrm->out_finger - rstrm->out_base;
295 			break;
296 
297 		case XDR_DECODE:
298 			pos -= rstrm->in_boundry - rstrm->in_finger;
299 			break;
300 
301 		default:
302 			pos = (uint_t)-1;
303 			break;
304 		}
305 	return ((uint_t)pos);
306 }
307 
308 static bool_t
309 xdrrec_setpos(XDR *xdrs, uint_t pos)
310 {
311 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
312 	uint_t currpos = xdrrec_getpos(xdrs);
313 	int delta = currpos - pos;
314 	caddr_t newpos;
315 
316 	if ((int)currpos != -1)
317 		switch (xdrs->x_op) {
318 
319 		case XDR_ENCODE:
320 			newpos = rstrm->out_finger - delta;
321 			if ((newpos > (caddr_t)(rstrm->frag_header)) &&
322 				(newpos < rstrm->out_boundry)) {
323 				rstrm->out_finger = newpos;
324 				return (TRUE);
325 			}
326 			break;
327 
328 		case XDR_DECODE:
329 			newpos = rstrm->in_finger - delta;
330 			if ((delta < (int)(rstrm->fbtbc)) &&
331 				(newpos <= rstrm->in_boundry) &&
332 				(newpos >= rstrm->in_base)) {
333 				rstrm->in_finger = newpos;
334 				rstrm->fbtbc -= delta;
335 				return (TRUE);
336 			}
337 			break;
338 		}
339 	return (FALSE);
340 }
341 
342 static int32_t *
343 xdrrec_inline(XDR *xdrs, int len)
344 {
345 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
346 	int32_t *buf = NULL;
347 
348 	switch (xdrs->x_op) {
349 
350 	case XDR_ENCODE:
351 		if ((rstrm->out_finger + len) <= rstrm->out_boundry) {
352 			buf = (int32_t *)rstrm->out_finger;
353 			rstrm->out_finger += len;
354 		}
355 		break;
356 
357 	case XDR_DECODE:
358 		if ((len <= rstrm->fbtbc) &&
359 			((rstrm->in_finger + len) <= rstrm->in_boundry)) {
360 			buf = (int32_t *)rstrm->in_finger;
361 			rstrm->fbtbc -= len;
362 			rstrm->in_finger += len;
363 		}
364 		break;
365 	}
366 	return (buf);
367 }
368 
369 static void
370 xdrrec_destroy(XDR *xdrs)
371 {
372 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
373 
374 	mem_free(rstrm->the_buffer,
375 		rstrm->sendsize + rstrm->recvsize + BYTES_PER_XDR_UNIT);
376 	mem_free((caddr_t)rstrm, sizeof (RECSTREAM));
377 }
378 
379 
380 /*
381  * Exported routines to manage xdr records
382  */
383 
384 /*
385  * Before reading (deserializing from the stream, one should always call
386  * this procedure to guarantee proper record alignment.
387  */
388 bool_t
389 xdrrec_skiprecord(XDR *xdrs)
390 {
391 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
392 
393 	while (rstrm->fbtbc > 0 || (! rstrm->last_frag)) {
394 		if (! skip_input_bytes(rstrm, rstrm->fbtbc))
395 			return (FALSE);
396 		rstrm->fbtbc = 0;
397 		if ((! rstrm->last_frag) && (! set_input_fragment(rstrm)))
398 			return (FALSE);
399 	}
400 	rstrm->last_frag = FALSE;
401 	return (TRUE);
402 }
403 
404 #ifdef notneeded
405 /*
406  * Look ahead fuction.
407  * Returns TRUE iff there is no more input in the buffer
408  * after consuming the rest of the current record.
409  */
410 bool_t
411 xdrrec_eof(XDR *xdrs)
412 {
413 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
414 
415 	while (rstrm->fbtbc > 0 || (! rstrm->last_frag)) {
416 		if (! skip_input_bytes(rstrm, rstrm->fbtbc))
417 			return (TRUE);
418 		rstrm->fbtbc = 0;
419 		if ((! rstrm->last_frag) && (! set_input_fragment(rstrm)))
420 			return (TRUE);
421 	}
422 	if (rstrm->in_finger == rstrm->in_boundry)
423 		return (TRUE);
424 	return (FALSE);
425 }
426 #endif /* notneeded */
427 
428 /*
429  * The client must tell the package when an end-of-record has occurred.
430  * The second paraemters tells whether the record should be flushed to the
431  * (output) tcp stream.  (This let's the package support batched or
432  * pipelined procedure calls.)  TRUE => immmediate flush to tcp connection.
433  */
434 bool_t
435 xdrrec_endofrecord(XDR *xdrs, bool_t sendnow)
436 {
437 	RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
438 	ptrdiff_t len;  /* fragment length */
439 
440 	if (sendnow || rstrm->frag_sent ||
441 		((ptrdiff_t)rstrm->out_finger + sizeof (uint32_t)
442 		    >= (ptrdiff_t)rstrm->out_boundry)) {
443 		rstrm->frag_sent = FALSE;
444 		return (flush_out(rstrm, TRUE));
445 	}
446 	len = (ptrdiff_t)rstrm->out_finger - (ptrdiff_t)rstrm->frag_header;
447 	len -= sizeof (uint32_t);
448 	*(rstrm->frag_header) = htonl((uint32_t)len | LAST_FRAG);
449 	rstrm->frag_header = (uint32_t *)rstrm->out_finger;
450 	rstrm->out_finger += sizeof (uint32_t);
451 	return (TRUE);
452 }
453 
454 
455 /*
456  * Internal useful routines
457  */
458 static bool_t
459 flush_out(RECSTREAM *rstrm, bool_t eor)
460 {
461 	uint32_t eormask = (eor == TRUE) ? LAST_FRAG : 0;
462 	ptrdiff_t len;
463 
464 	len = (ptrdiff_t)rstrm->out_finger - (ptrdiff_t)rstrm->frag_header;
465 	len -= sizeof (uint32_t);
466 
467 	*(rstrm->frag_header) = htonl(len | eormask);
468 	len = rstrm->out_finger - rstrm->out_base;
469 	if ((*(rstrm->writeit))(rstrm->tcp_handle, rstrm->out_base, (int)len)
470 	    != (int)len)
471 		return (FALSE);
472 
473 	rstrm->frag_header = (uint32_t *)rstrm->out_base;
474 	rstrm->out_finger = (caddr_t)rstrm->out_base + sizeof (uint32_t);
475 	return (TRUE);
476 }
477 
478 static bool_t  /* knows nothing about records!  Only about input buffers */
479 fill_input_buf(RECSTREAM *rstrm, int frag_len)
480 {
481 	caddr_t where;
482 	uintptr_t i;
483 	int len;
484 
485 	where = rstrm->in_base;
486 	i = (uintptr_t)rstrm->in_boundry % BYTES_PER_XDR_UNIT;
487 	where += i;
488 	len = (frag_len < (rstrm->in_size - i)) ? frag_len :
489 		rstrm->in_size - i;
490 #ifdef DEBUG
491 	printf("fill_input_buf: len = %d\n", len);
492 #endif
493 	if ((len = (*(rstrm->readit))(rstrm->tcp_handle, where, len)) == -1)
494 		return (FALSE);
495 	rstrm->in_finger = where;
496 	where += len;
497 	rstrm->in_boundry = where;
498 	return (TRUE);
499 }
500 
501 static bool_t
502 get_input_bytes(RECSTREAM *rstrm, caddr_t addr, int frag_len, int len)
503 {
504 	ptrdiff_t current;
505 
506 	while (len > 0) {
507 		current = rstrm->in_boundry - rstrm->in_finger;
508 #ifdef DEBUG
509 	printf("get_input_bytes: len = %d, frag_len = %d, current %d\n",
510 		len, frag_len, current);
511 #endif
512 		/*
513 		 * set_input_bytes doesn't know how large the fragment is, we
514 		 * need to get the header so just grab a header's size worth
515 		 */
516 		if (frag_len == 0)
517 			frag_len = len;
518 
519 		if (current == 0) {
520 			if (! fill_input_buf(rstrm, frag_len))
521 				return (FALSE);
522 			continue;
523 		}
524 
525 		current = (len < current) ? len : current;
526 		bcopy(rstrm->in_finger, addr, current);
527 		rstrm->in_finger += current;
528 		addr += current;
529 		len -= current;
530 	}
531 	return (TRUE);
532 }
533 
534 static bool_t  /* next four bytes of the input stream are treated as a header */
535 set_input_fragment(RECSTREAM *rstrm)
536 {
537 	uint32_t header;
538 
539 	if (! get_input_bytes(rstrm, (caddr_t)&header, 0, sizeof (header)))
540 		return (FALSE);
541 	header = (uint32_t)ntohl(header);
542 	rstrm->last_frag = ((header & LAST_FRAG) == 0) ? FALSE : TRUE;
543 	rstrm->fbtbc = header & (~LAST_FRAG);
544 #ifdef DEBUG
545 	printf("set_input_fragment: frag_len = %d, last frag = %s\n",
546 		rstrm->fbtbc, rstrm->last_frag ? "TRUE" : "FALSE");
547 #endif
548 	return (TRUE);
549 }
550 
551 static bool_t  /* consumes input bytes; knows nothing about records! */
552 skip_input_bytes(RECSTREAM *rstrm, int32_t cnt)
553 {
554 	ptrdiff_t current;
555 #ifdef DEBUG
556 	printf("skip_input_fragment: cnt = %d\n", cnt);
557 #endif
558 	while (cnt > 0) {
559 		current = rstrm->in_boundry - rstrm->in_finger;
560 		if (current == 0) {
561 			if (! fill_input_buf(rstrm, cnt))
562 				return (FALSE);
563 			continue;
564 		}
565 		current = (cnt < current) ? cnt : current;
566 		rstrm->in_finger += current;
567 		cnt -= current;
568 	}
569 	return (TRUE);
570 }
571 
572 static uint_t
573 fix_buf_size(uint_t s)
574 {
575 
576 	if (s < 100)
577 		s = 4000;
578 	return (RNDUP(s));
579 }
580 
581 static struct xdr_ops *
582 xdrrec_ops()
583 {
584 	static struct xdr_ops ops;
585 
586 	if (ops.x_getint32 == NULL) {
587 		ops.x_getint32 = xdrrec_getint32;
588 		ops.x_putint32 = xdrrec_putint32;
589 		ops.x_getbytes = xdrrec_getbytes;
590 		ops.x_putbytes = xdrrec_putbytes;
591 		ops.x_getpostn = xdrrec_getpos;
592 		ops.x_setpostn = xdrrec_setpos;
593 		ops.x_inline = xdrrec_inline;
594 		ops.x_destroy = xdrrec_destroy;
595 	}
596 
597 	return (&ops);
598 }
599