xref: /freebsd/contrib/less/ch.c (revision 5521ff5a4d1929056e7ffc982fac3341ca54df7c)
1 /*
2  * Copyright (C) 1984-2000  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information about less, or for information on how to
8  * contact the author, see the README file.
9  */
10 
11 
12 /*
13  * Low level character input from the input file.
14  * We use these special purpose routines which optimize moving
15  * both forward and backward from the current read pointer.
16  */
17 
18 #include "less.h"
19 #if MSDOS_COMPILER==WIN32C
20 #include <errno.h>
21 #include <windows.h>
22 #endif
23 
24 public int ignore_eoi;
25 
26 /*
27  * Pool of buffers holding the most recently used blocks of the input file.
28  * The buffer pool is kept as a doubly-linked circular list,
29  * in order from most- to least-recently used.
30  * The circular list is anchored by the file state "thisfile".
31  */
32 #define LBUFSIZE	1024
33 struct buf {
34 	struct buf *next, *prev;  /* Must be first to match struct filestate */
35 	long block;
36 	unsigned int datasize;
37 	unsigned char data[LBUFSIZE];
38 };
39 
40 /*
41  * The file state is maintained in a filestate structure.
42  * A pointer to the filestate is kept in the ifile structure.
43  */
44 struct filestate {
45 	/* -- Following members must match struct buf */
46 	struct buf *buf_next, *buf_prev;
47 	long buf_block;
48 	/* -- End of struct buf copy */
49 	int file;
50 	int flags;
51 	POSITION fpos;
52 	int nbufs;
53 	long block;
54 	unsigned int offset;
55 	POSITION fsize;
56 };
57 
58 
59 #define	END_OF_CHAIN	((struct buf *)thisfile)
60 #define	ch_bufhead	thisfile->buf_next
61 #define	ch_buftail	thisfile->buf_prev
62 #define	ch_nbufs	thisfile->nbufs
63 #define	ch_block	thisfile->block
64 #define	ch_offset	thisfile->offset
65 #define	ch_fpos		thisfile->fpos
66 #define	ch_fsize	thisfile->fsize
67 #define	ch_flags	thisfile->flags
68 #define	ch_file		thisfile->file
69 
70 static struct filestate *thisfile;
71 static int ch_ungotchar = -1;
72 
73 extern int autobuf;
74 extern int sigs;
75 extern int cbufs;
76 extern int secure;
77 extern constant char helpdata[];
78 extern constant int size_helpdata;
79 extern IFILE curr_ifile;
80 #if LOGFILE
81 extern int logfile;
82 extern char *namelogfile;
83 #endif
84 
85 static int ch_addbuf();
86 
87 
88 /*
89  * Get the character pointed to by the read pointer.
90  * ch_get() is a macro which is more efficient to call
91  * than fch_get (the function), in the usual case
92  * that the block desired is at the head of the chain.
93  */
94 #define	ch_get()   ((ch_block == ch_bufhead->block && \
95 		     ch_offset < ch_bufhead->datasize) ? \
96 			ch_bufhead->data[ch_offset] : fch_get())
97 	int
98 fch_get()
99 {
100 	register struct buf *bp;
101 	register int n;
102 	register int slept;
103 	POSITION pos;
104 	POSITION len;
105 
106 	slept = FALSE;
107 
108 	/*
109 	 * Look for a buffer holding the desired block.
110 	 */
111 	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
112 		if (bp->block == ch_block)
113 		{
114 			if (ch_offset >= bp->datasize)
115 				/*
116 				 * Need more data in this buffer.
117 				 */
118 				goto read_more;
119 			goto found;
120 		}
121 	/*
122 	 * Block is not in a buffer.
123 	 * Take the least recently used buffer
124 	 * and read the desired block into it.
125 	 * If the LRU buffer has data in it,
126 	 * then maybe allocate a new buffer.
127 	 */
128 	if (ch_buftail == END_OF_CHAIN || ch_buftail->block != (long)(-1))
129 	{
130 		/*
131 		 * There is no empty buffer to use.
132 		 * Allocate a new buffer if:
133 		 * 1. We can't seek on this file and -b is not in effect; or
134 		 * 2. We haven't allocated the max buffers for this file yet.
135 		 */
136 		if ((autobuf && !(ch_flags & CH_CANSEEK)) ||
137 		    (cbufs == -1 || ch_nbufs < cbufs))
138 			if (ch_addbuf())
139 				/*
140 				 * Allocation failed: turn off autobuf.
141 				 */
142 				autobuf = OPT_OFF;
143 	}
144 	bp = ch_buftail;
145 	bp->block = ch_block;
146 	bp->datasize = 0;
147 
148     read_more:
149 	pos = (ch_block * LBUFSIZE) + bp->datasize;
150 	if ((len = ch_length()) != NULL_POSITION && pos >= len)
151 		/*
152 		 * At end of file.
153 		 */
154 		return (EOI);
155 
156 	if (pos != ch_fpos)
157 	{
158 		/*
159 		 * Not at the correct position: must seek.
160 		 * If input is a pipe, we're in trouble (can't seek on a pipe).
161 		 * Some data has been lost: just return "?".
162 		 */
163 		if (!(ch_flags & CH_CANSEEK))
164 			return ('?');
165 		if (lseek(ch_file, (off_t)pos, 0) == BAD_LSEEK)
166 		{
167  			error("seek error", NULL_PARG);
168 			clear_eol();
169 			return (EOI);
170  		}
171  		ch_fpos = pos;
172  	}
173 
174 	/*
175 	 * Read the block.
176 	 * If we read less than a full block, that's ok.
177 	 * We use partial block and pick up the rest next time.
178 	 */
179 	if (ch_ungotchar != -1)
180 	{
181 		bp->data[bp->datasize] = ch_ungotchar;
182 		n = 1;
183 		ch_ungotchar = -1;
184 	} else if (ch_flags & CH_HELPFILE)
185 	{
186 		bp->data[bp->datasize] = helpdata[ch_fpos];
187 		n = 1;
188 	} else
189 	{
190 		n = iread(ch_file, &bp->data[bp->datasize],
191 			(unsigned int)(LBUFSIZE - bp->datasize));
192 	}
193 
194 	if (n == READ_INTR)
195 		return (EOI);
196 	if (n < 0)
197 	{
198 #if MSDOS_COMPILER==WIN32C
199 		if (errno != EPIPE)
200 #endif
201 		{
202 			error("read error", NULL_PARG);
203 			clear_eol();
204 		}
205 		n = 0;
206 	}
207 
208 #if LOGFILE
209 	/*
210 	 * If we have a log file, write the new data to it.
211 	 */
212 	if (!secure && logfile >= 0 && n > 0)
213 		write(logfile, (char *) &bp->data[bp->datasize], n);
214 #endif
215 
216 	ch_fpos += n;
217 	bp->datasize += n;
218 
219 	/*
220 	 * If we have read to end of file, set ch_fsize to indicate
221 	 * the position of the end of file.
222 	 */
223 	if (n == 0)
224 	{
225 		ch_fsize = pos;
226 		if (ignore_eoi)
227 		{
228 			/*
229 			 * We are ignoring EOF.
230 			 * Wait a while, then try again.
231 			 */
232 			if (!slept)
233 				ierror("Waiting for data", NULL_PARG);
234 #if !MSDOS_COMPILER
235 	 		sleep(1);
236 #else
237 #if MSDOS_COMPILER==WIN32C
238 			Sleep(1000);
239 #endif
240 #endif
241 			slept = TRUE;
242 		}
243 		if (sigs)
244 			return (EOI);
245 	}
246 
247     found:
248 	if (ch_bufhead != bp)
249 	{
250 		/*
251 		 * Move the buffer to the head of the buffer chain.
252 		 * This orders the buffer chain, most- to least-recently used.
253 		 */
254 		bp->next->prev = bp->prev;
255 		bp->prev->next = bp->next;
256 
257 		bp->next = ch_bufhead;
258 		bp->prev = END_OF_CHAIN;
259 		ch_bufhead->prev = bp;
260 		ch_bufhead = bp;
261 	}
262 
263 	if (ch_offset >= bp->datasize)
264 		/*
265 		 * After all that, we still don't have enough data.
266 		 * Go back and try again.
267 		 */
268 		goto read_more;
269 
270 	return (bp->data[ch_offset]);
271 }
272 
273 /*
274  * ch_ungetchar is a rather kludgy and limited way to push
275  * a single char onto an input file descriptor.
276  */
277 	public void
278 ch_ungetchar(c)
279 	int c;
280 {
281 	if (c != -1 && ch_ungotchar != -1)
282 		error("ch_ungetchar overrun", NULL_PARG);
283 	ch_ungotchar = c;
284 }
285 
286 #if LOGFILE
287 /*
288  * Close the logfile.
289  * If we haven't read all of standard input into it, do that now.
290  */
291 	public void
292 end_logfile()
293 {
294 	static int tried = FALSE;
295 
296 	if (logfile < 0)
297 		return;
298 	if (!tried && ch_fsize == NULL_POSITION)
299 	{
300 		tried = TRUE;
301 		ierror("Finishing logfile", NULL_PARG);
302 		while (ch_forw_get() != EOI)
303 			if (ABORT_SIGS())
304 				break;
305 	}
306 	close(logfile);
307 	logfile = -1;
308 	namelogfile = NULL;
309 }
310 
311 /*
312  * Start a log file AFTER less has already been running.
313  * Invoked from the - command; see toggle_option().
314  * Write all the existing buffered data to the log file.
315  */
316 	public void
317 sync_logfile()
318 {
319 	register struct buf *bp;
320 	int warned = FALSE;
321 	long block;
322 	long nblocks;
323 
324 	nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
325 	for (block = 0;  block < nblocks;  block++)
326 	{
327 		for (bp = ch_bufhead;  ;  bp = bp->next)
328 		{
329 			if (bp == END_OF_CHAIN)
330 			{
331 				if (!warned)
332 				{
333 					error("Warning: log file is incomplete",
334 						NULL_PARG);
335 					warned = TRUE;
336 				}
337 				break;
338 			}
339 			if (bp->block == block)
340 			{
341 				write(logfile, (char *) bp->data, bp->datasize);
342 				break;
343 			}
344 		}
345 	}
346 }
347 
348 #endif
349 
350 /*
351  * Determine if a specific block is currently in one of the buffers.
352  */
353 	static int
354 buffered(block)
355 	long block;
356 {
357 	register struct buf *bp;
358 
359 	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
360 		if (bp->block == block)
361 			return (TRUE);
362 	return (FALSE);
363 }
364 
365 /*
366  * Seek to a specified position in the file.
367  * Return 0 if successful, non-zero if can't seek there.
368  */
369 	public int
370 ch_seek(pos)
371 	register POSITION pos;
372 {
373 	long new_block;
374 	POSITION len;
375 
376 	len = ch_length();
377 	if (pos < ch_zero() || (len != NULL_POSITION && pos > len))
378 		return (1);
379 
380 	new_block = pos / LBUFSIZE;
381 	if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block))
382 	{
383 		if (ch_fpos > pos)
384 			return (1);
385 		while (ch_fpos < pos)
386 		{
387 			if (ch_forw_get() == EOI)
388 				return (1);
389 			if (ABORT_SIGS())
390 				return (1);
391 		}
392 		return (0);
393 	}
394 	/*
395 	 * Set read pointer.
396 	 */
397 	ch_block = new_block;
398 	ch_offset = pos % LBUFSIZE;
399 	return (0);
400 }
401 
402 /*
403  * Seek to the end of the file.
404  */
405 	public int
406 ch_end_seek()
407 {
408 	POSITION len;
409 
410 	if (ch_flags & CH_CANSEEK)
411 		ch_fsize = filesize(ch_file);
412 
413 	len = ch_length();
414 	if (len != NULL_POSITION)
415 		return (ch_seek(len));
416 
417 	/*
418 	 * Do it the slow way: read till end of data.
419 	 */
420 	while (ch_forw_get() != EOI)
421 		if (ABORT_SIGS())
422 			return (1);
423 	return (0);
424 }
425 
426 /*
427  * Seek to the beginning of the file, or as close to it as we can get.
428  * We may not be able to seek there if input is a pipe and the
429  * beginning of the pipe is no longer buffered.
430  */
431 	public int
432 ch_beg_seek()
433 {
434 	register struct buf *bp, *firstbp;
435 
436 	/*
437 	 * Try a plain ch_seek first.
438 	 */
439 	if (ch_seek(ch_zero()) == 0)
440 		return (0);
441 
442 	/*
443 	 * Can't get to position 0.
444 	 * Look thru the buffers for the one closest to position 0.
445 	 */
446 	firstbp = bp = ch_bufhead;
447 	if (bp == END_OF_CHAIN)
448 		return (1);
449 	while ((bp = bp->next) != END_OF_CHAIN)
450 		if (bp->block < firstbp->block)
451 			firstbp = bp;
452 	ch_block = firstbp->block;
453 	ch_offset = 0;
454 	return (0);
455 }
456 
457 /*
458  * Return the length of the file, if known.
459  */
460 	public POSITION
461 ch_length()
462 {
463 	if (ignore_eoi)
464 		return (NULL_POSITION);
465 	if (ch_flags & CH_HELPFILE)
466 		return (size_helpdata);
467 	return (ch_fsize);
468 }
469 
470 /*
471  * Return the current position in the file.
472  */
473 #define	tellpos(blk,off)   ((POSITION)((((long)(blk)) * LBUFSIZE) + (off)))
474 
475 	public POSITION
476 ch_tell()
477 {
478 	return (tellpos(ch_block, ch_offset));
479 }
480 
481 /*
482  * Get the current char and post-increment the read pointer.
483  */
484 	public int
485 ch_forw_get()
486 {
487 	register int c;
488 
489 	c = ch_get();
490 	if (c == EOI)
491 		return (EOI);
492 	if (ch_offset < LBUFSIZE-1)
493 		ch_offset++;
494 	else
495 	{
496 		ch_block ++;
497 		ch_offset = 0;
498 	}
499 	return (c);
500 }
501 
502 /*
503  * Pre-decrement the read pointer and get the new current char.
504  */
505 	public int
506 ch_back_get()
507 {
508 	if (ch_offset > 0)
509 		ch_offset --;
510 	else
511 	{
512 		if (ch_block <= 0)
513 			return (EOI);
514 		if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1))
515 			return (EOI);
516 		ch_block--;
517 		ch_offset = LBUFSIZE-1;
518 	}
519 	return (ch_get());
520 }
521 
522 /*
523  * Allocate buffers.
524  * Caller wants us to have a total of at least want_nbufs buffers.
525  */
526 	public int
527 ch_nbuf(want_nbufs)
528 	int want_nbufs;
529 {
530 	PARG parg;
531 
532 	while (ch_nbufs < want_nbufs)
533 	{
534 		if (ch_addbuf())
535 		{
536 			/*
537 			 * Cannot allocate enough buffers.
538 			 * If we don't have ANY, then quit.
539 			 * Otherwise, just report the error and return.
540 			 */
541 			parg.p_int = want_nbufs - ch_nbufs;
542 			error("Cannot allocate %d buffers", &parg);
543 			if (ch_nbufs == 0)
544 				quit(QUIT_ERROR);
545 			break;
546 		}
547 	}
548 	return (ch_nbufs);
549 }
550 
551 /*
552  * Flush (discard) any saved file state, including buffer contents.
553  */
554 	public void
555 ch_flush()
556 {
557 	register struct buf *bp;
558 
559 	if (!(ch_flags & CH_CANSEEK))
560 	{
561 		/*
562 		 * If input is a pipe, we don't flush buffer contents,
563 		 * since the contents can't be recovered.
564 		 */
565 		ch_fsize = NULL_POSITION;
566 		return;
567 	}
568 
569 	/*
570 	 * Initialize all the buffers.
571 	 */
572 	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
573 		bp->block = (long)(-1);
574 
575 	/*
576 	 * Figure out the size of the file, if we can.
577 	 */
578 	ch_fsize = filesize(ch_file);
579 
580 	/*
581 	 * Seek to a known position: the beginning of the file.
582 	 */
583 	ch_fpos = 0;
584 	ch_block = 0; /* ch_fpos / LBUFSIZE; */
585 	ch_offset = 0; /* ch_fpos % LBUFSIZE; */
586 
587 #if 1
588 	/*
589 	 * This is a kludge to workaround a Linux kernel bug: files in
590 	 * /proc have a size of 0 according to fstat() but have readable
591 	 * data.  They are sometimes, but not always, seekable.
592 	 * Force them to be non-seekable here.
593 	 */
594 	if (ch_fsize == 0)
595 	{
596 		ch_fsize = NULL_POSITION;
597 		ch_flags &= ~CH_CANSEEK;
598 	}
599 #endif
600 
601 	if (lseek(ch_file, (off_t)0, 0) == BAD_LSEEK)
602 	{
603 		/*
604 		 * Warning only; even if the seek fails for some reason,
605 		 * there's a good chance we're at the beginning anyway.
606 		 * {{ I think this is bogus reasoning. }}
607 		 */
608 		error("seek error to 0", NULL_PARG);
609 	}
610 }
611 
612 /*
613  * Allocate a new buffer.
614  * The buffer is added to the tail of the buffer chain.
615  */
616 	static int
617 ch_addbuf()
618 {
619 	register struct buf *bp;
620 
621 	/*
622 	 * Allocate and initialize a new buffer and link it
623 	 * onto the tail of the buffer list.
624 	 */
625 	bp = (struct buf *) calloc(1, sizeof(struct buf));
626 	if (bp == NULL)
627 		return (1);
628 	ch_nbufs++;
629 	bp->block = (long)(-1);
630 	bp->next = END_OF_CHAIN;
631 	bp->prev = ch_buftail;
632 	ch_buftail->next = bp;
633 	ch_buftail = bp;
634 	return (0);
635 }
636 
637 /*
638  * Delete all buffers for this file.
639  */
640 	static void
641 ch_delbufs()
642 {
643 	register struct buf *bp;
644 
645 	while (ch_bufhead != END_OF_CHAIN)
646 	{
647 		bp = ch_bufhead;
648 		bp->next->prev = bp->prev;;
649 		bp->prev->next = bp->next;
650 		free(bp);
651 	}
652 	ch_nbufs = 0;
653 }
654 
655 /*
656  * Is it possible to seek on a file descriptor?
657  */
658 	public int
659 seekable(f)
660 	int f;
661 {
662 #if MSDOS_COMPILER
663 	extern int fd0;
664 	if (f == fd0 && !isatty(fd0))
665 	{
666 		/*
667 		 * In MS-DOS, pipes are seekable.  Check for
668 		 * standard input, and pretend it is not seekable.
669 		 */
670 		return (0);
671 	}
672 #endif
673 	return (lseek(f, (off_t)1, 0) != BAD_LSEEK);
674 }
675 
676 /*
677  * Initialize file state for a new file.
678  */
679 	public void
680 ch_init(f, flags)
681 	int f;
682 	int flags;
683 {
684 	/*
685 	 * See if we already have a filestate for this file.
686 	 */
687 	thisfile = (struct filestate *) get_filestate(curr_ifile);
688 	if (thisfile == NULL)
689 	{
690 		/*
691 		 * Allocate and initialize a new filestate.
692 		 */
693 		thisfile = (struct filestate *)
694 				calloc(1, sizeof(struct filestate));
695 		thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN;
696 		thisfile->buf_block = (long)(-1);
697 		thisfile->nbufs = 0;
698 		thisfile->flags = 0;
699 		thisfile->fpos = 0;
700 		thisfile->block = 0;
701 		thisfile->offset = 0;
702 		thisfile->file = -1;
703 		thisfile->fsize = NULL_POSITION;
704 		ch_flags = flags;
705 		/*
706 		 * Try to seek; set CH_CANSEEK if it works.
707 		 */
708 		if ((flags & CH_CANSEEK) && !seekable(f))
709 			ch_flags &= ~CH_CANSEEK;
710 		set_filestate(curr_ifile, (void *) thisfile);
711 	}
712 	if (thisfile->file == -1)
713 		thisfile->file = f;
714 	ch_flush();
715 }
716 
717 /*
718  * Close a filestate.
719  */
720 	public void
721 ch_close()
722 {
723 	int keepstate = FALSE;
724 
725 	if (ch_flags & (CH_CANSEEK|CH_POPENED|CH_HELPFILE))
726 	{
727 		/*
728 		 * We can seek or re-open, so we don't need to keep buffers.
729 		 */
730 		ch_delbufs();
731 	} else
732 		keepstate = TRUE;
733 	if (!(ch_flags & CH_KEEPOPEN))
734 	{
735 		/*
736 		 * We don't need to keep the file descriptor open
737 		 * (because we can re-open it.)
738 		 * But don't really close it if it was opened via popen(),
739 		 * because pclose() wants to close it.
740 		 */
741 		if (!(ch_flags & (CH_POPENED|CH_HELPFILE)))
742 			close(ch_file);
743 		ch_file = -1;
744 	} else
745 		keepstate = TRUE;
746 	if (!keepstate)
747 	{
748 		/*
749 		 * We don't even need to keep the filestate structure.
750 		 */
751 		free(thisfile);
752 		thisfile = NULL;
753 		set_filestate(curr_ifile, (void *) NULL);
754 	}
755 }
756 
757 /*
758  * Return ch_flags for the current file.
759  */
760 	public int
761 ch_getflags()
762 {
763 	return (ch_flags);
764 }
765 
766 #if 0
767 	public void
768 ch_dump(struct filestate *fs)
769 {
770 	struct buf *bp;
771 	unsigned char *s;
772 
773 	if (fs == NULL)
774 	{
775 		printf(" --no filestate\n");
776 		return;
777 	}
778 	printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n",
779 		fs->file, fs->flags, fs->fpos,
780 		fs->fsize, fs->block, fs->offset);
781 	printf(" %d bufs:\n", fs->nbufs);
782 	for (bp = fs->buf_next; bp != (struct buf *)fs;  bp = bp->next)
783 	{
784 		printf("%x: blk %x, size %x \"",
785 			bp, bp->block, bp->datasize);
786 		for (s = bp->data;  s < bp->data + 30;  s++)
787 			if (*s >= ' ' && *s < 0x7F)
788 				printf("%c", *s);
789 			else
790 				printf(".");
791 		printf("\"\n");
792 	}
793 }
794 #endif
795