xref: /freebsd/contrib/less/ch.c (revision 282a3889ebf826db9839be296ff1dd903f6d6d6e)
1 /*
2  * Copyright (C) 1984-2007  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information about less, or for information on how to
8  * contact the author, see the README file.
9  */
10 
11 
12 /*
13  * Low level character input from the input file.
14  * We use these special purpose routines which optimize moving
15  * both forward and backward from the current read pointer.
16  */
17 
18 #include "less.h"
19 #if MSDOS_COMPILER==WIN32C
20 #include <errno.h>
21 #include <windows.h>
22 #endif
23 
24 typedef POSITION BLOCKNUM;
25 
26 public int ignore_eoi;
27 
28 /*
29  * Pool of buffers holding the most recently used blocks of the input file.
30  * The buffer pool is kept as a doubly-linked circular list,
31  * in order from most- to least-recently used.
32  * The circular list is anchored by the file state "thisfile".
33  */
34 #define	LBUFSIZE	8192
35 struct buf {
36 	struct buf *next, *prev;
37 	struct buf *hnext, *hprev;
38 	BLOCKNUM block;
39 	unsigned int datasize;
40 	unsigned char data[LBUFSIZE];
41 };
42 
43 struct buflist {
44 	/* -- Following members must match struct buf */
45 	struct buf *buf_next, *buf_prev;
46 	struct buf *buf_hnext, *buf_hprev;
47 };
48 
49 /*
50  * The file state is maintained in a filestate structure.
51  * A pointer to the filestate is kept in the ifile structure.
52  */
53 #define	BUFHASH_SIZE	64
54 struct filestate {
55 	struct buf *buf_next, *buf_prev;
56 	struct buflist hashtbl[BUFHASH_SIZE];
57 	int file;
58 	int flags;
59 	POSITION fpos;
60 	int nbufs;
61 	BLOCKNUM block;
62 	unsigned int offset;
63 	POSITION fsize;
64 };
65 
66 #define	ch_bufhead	thisfile->buf_next
67 #define	ch_buftail	thisfile->buf_prev
68 #define	ch_nbufs	thisfile->nbufs
69 #define	ch_block	thisfile->block
70 #define	ch_offset	thisfile->offset
71 #define	ch_fpos		thisfile->fpos
72 #define	ch_fsize	thisfile->fsize
73 #define	ch_flags	thisfile->flags
74 #define	ch_file		thisfile->file
75 
76 #define	END_OF_CHAIN	((struct buf *)&thisfile->buf_next)
77 #define	END_OF_HCHAIN(h) ((struct buf *)&thisfile->hashtbl[h])
78 #define BUFHASH(blk)	((blk) & (BUFHASH_SIZE-1))
79 
80 #define	FOR_BUFS_IN_CHAIN(h,bp) \
81 	for (bp = thisfile->hashtbl[h].buf_hnext;  \
82 	     bp != END_OF_HCHAIN(h);  bp = bp->hnext)
83 
84 #define	HASH_RM(bp) \
85 	(bp)->hnext->hprev = (bp)->hprev; \
86 	(bp)->hprev->hnext = (bp)->hnext;
87 
88 #define	HASH_INS(bp,h) \
89 	(bp)->hnext = thisfile->hashtbl[h].buf_hnext; \
90 	(bp)->hprev = END_OF_HCHAIN(h); \
91 	thisfile->hashtbl[h].buf_hnext->hprev = (bp); \
92 	thisfile->hashtbl[h].buf_hnext = (bp);
93 
94 static struct filestate *thisfile;
95 static int ch_ungotchar = -1;
96 static int maxbufs = -1;
97 
98 extern int autobuf;
99 extern int sigs;
100 extern int secure;
101 extern constant char helpdata[];
102 extern constant int size_helpdata;
103 extern IFILE curr_ifile;
104 #if LOGFILE
105 extern int logfile;
106 extern char *namelogfile;
107 #endif
108 
109 static int ch_addbuf();
110 
111 
112 /*
113  * Get the character pointed to by the read pointer.
114  * ch_get() is a macro which is more efficient to call
115  * than fch_get (the function), in the usual case
116  * that the block desired is at the head of the chain.
117  */
118 #define	ch_get()   ((ch_block == ch_bufhead->block && \
119 		     ch_offset < ch_bufhead->datasize) ? \
120 			ch_bufhead->data[ch_offset] : fch_get())
121 	int
122 fch_get()
123 {
124 	register struct buf *bp;
125 	register int n;
126 	register int slept;
127 	register int h;
128 	POSITION pos;
129 	POSITION len;
130 
131 	slept = FALSE;
132 
133 	/*
134 	 * Look for a buffer holding the desired block.
135 	 */
136 	h = BUFHASH(ch_block);
137 	FOR_BUFS_IN_CHAIN(h, bp)
138 	{
139 		if (bp->block == ch_block)
140 		{
141 			if (ch_offset >= bp->datasize)
142 				/*
143 				 * Need more data in this buffer.
144 				 */
145 				goto read_more;
146 			goto found;
147 		}
148 	}
149 	/*
150 	 * Block is not in a buffer.
151 	 * Take the least recently used buffer
152 	 * and read the desired block into it.
153 	 * If the LRU buffer has data in it,
154 	 * then maybe allocate a new buffer.
155 	 */
156 	if (ch_buftail == END_OF_CHAIN || ch_buftail->block != -1)
157 	{
158 		/*
159 		 * There is no empty buffer to use.
160 		 * Allocate a new buffer if:
161 		 * 1. We can't seek on this file and -b is not in effect; or
162 		 * 2. We haven't allocated the max buffers for this file yet.
163 		 */
164 		if ((autobuf && !(ch_flags & CH_CANSEEK)) ||
165 		    (maxbufs < 0 || ch_nbufs < maxbufs))
166 			if (ch_addbuf())
167 				/*
168 				 * Allocation failed: turn off autobuf.
169 				 */
170 				autobuf = OPT_OFF;
171 	}
172 	bp = ch_buftail;
173 	HASH_RM(bp); /* Remove from old hash chain. */
174 	bp->block = ch_block;
175 	bp->datasize = 0;
176 	HASH_INS(bp, h); /* Insert into new hash chain. */
177 
178     read_more:
179 	pos = (ch_block * LBUFSIZE) + bp->datasize;
180 	if ((len = ch_length()) != NULL_POSITION && pos >= len)
181 		/*
182 		 * At end of file.
183 		 */
184 		return (EOI);
185 
186 	if (pos != ch_fpos)
187 	{
188 		/*
189 		 * Not at the correct position: must seek.
190 		 * If input is a pipe, we're in trouble (can't seek on a pipe).
191 		 * Some data has been lost: just return "?".
192 		 */
193 		if (!(ch_flags & CH_CANSEEK))
194 			return ('?');
195 		if (lseek(ch_file, (off_t)pos, 0) == BAD_LSEEK)
196 		{
197  			error("seek error", NULL_PARG);
198 			clear_eol();
199 			return (EOI);
200  		}
201  		ch_fpos = pos;
202  	}
203 
204 	/*
205 	 * Read the block.
206 	 * If we read less than a full block, that's ok.
207 	 * We use partial block and pick up the rest next time.
208 	 */
209 	if (ch_ungotchar != -1)
210 	{
211 		bp->data[bp->datasize] = ch_ungotchar;
212 		n = 1;
213 		ch_ungotchar = -1;
214 	} else if (ch_flags & CH_HELPFILE)
215 	{
216 		bp->data[bp->datasize] = helpdata[ch_fpos];
217 		n = 1;
218 	} else
219 	{
220 		n = iread(ch_file, &bp->data[bp->datasize],
221 			(unsigned int)(LBUFSIZE - bp->datasize));
222 	}
223 
224 	if (n == READ_INTR)
225 		return (EOI);
226 	if (n < 0)
227 	{
228 #if MSDOS_COMPILER==WIN32C
229 		if (errno != EPIPE)
230 #endif
231 		{
232 			error("read error", NULL_PARG);
233 			clear_eol();
234 		}
235 		n = 0;
236 	}
237 
238 #if LOGFILE
239 	/*
240 	 * If we have a log file, write the new data to it.
241 	 */
242 	if (!secure && logfile >= 0 && n > 0)
243 		write(logfile, (char *) &bp->data[bp->datasize], n);
244 #endif
245 
246 	ch_fpos += n;
247 	bp->datasize += n;
248 
249 	/*
250 	 * If we have read to end of file, set ch_fsize to indicate
251 	 * the position of the end of file.
252 	 */
253 	if (n == 0)
254 	{
255 		ch_fsize = pos;
256 		if (ignore_eoi)
257 		{
258 			/*
259 			 * We are ignoring EOF.
260 			 * Wait a while, then try again.
261 			 */
262 			if (!slept)
263 			{
264 				PARG parg;
265 				parg.p_string = wait_message();
266 				ierror("%s", &parg);
267 			}
268 #if !MSDOS_COMPILER
269 	 		sleep(1);
270 #else
271 #if MSDOS_COMPILER==WIN32C
272 			Sleep(1000);
273 #endif
274 #endif
275 			slept = TRUE;
276 		}
277 		if (sigs)
278 			return (EOI);
279 	}
280 
281     found:
282 	if (ch_bufhead != bp)
283 	{
284 		/*
285 		 * Move the buffer to the head of the buffer chain.
286 		 * This orders the buffer chain, most- to least-recently used.
287 		 */
288 		bp->next->prev = bp->prev;
289 		bp->prev->next = bp->next;
290 		bp->next = ch_bufhead;
291 		bp->prev = END_OF_CHAIN;
292 		ch_bufhead->prev = bp;
293 		ch_bufhead = bp;
294 
295 		/*
296 		 * Move to head of hash chain too.
297 		 */
298 		HASH_RM(bp);
299 		HASH_INS(bp, h);
300 	}
301 
302 	if (ch_offset >= bp->datasize)
303 		/*
304 		 * After all that, we still don't have enough data.
305 		 * Go back and try again.
306 		 */
307 		goto read_more;
308 
309 	return (bp->data[ch_offset]);
310 }
311 
312 /*
313  * ch_ungetchar is a rather kludgy and limited way to push
314  * a single char onto an input file descriptor.
315  */
316 	public void
317 ch_ungetchar(c)
318 	int c;
319 {
320 	if (c != -1 && ch_ungotchar != -1)
321 		error("ch_ungetchar overrun", NULL_PARG);
322 	ch_ungotchar = c;
323 }
324 
325 #if LOGFILE
326 /*
327  * Close the logfile.
328  * If we haven't read all of standard input into it, do that now.
329  */
330 	public void
331 end_logfile()
332 {
333 	static int tried = FALSE;
334 
335 	if (logfile < 0)
336 		return;
337 	if (!tried && ch_fsize == NULL_POSITION)
338 	{
339 		tried = TRUE;
340 		ierror("Finishing logfile", NULL_PARG);
341 		while (ch_forw_get() != EOI)
342 			if (ABORT_SIGS())
343 				break;
344 	}
345 	close(logfile);
346 	logfile = -1;
347 	namelogfile = NULL;
348 }
349 
350 /*
351  * Start a log file AFTER less has already been running.
352  * Invoked from the - command; see toggle_option().
353  * Write all the existing buffered data to the log file.
354  */
355 	public void
356 sync_logfile()
357 {
358 	register struct buf *bp;
359 	int warned = FALSE;
360 	BLOCKNUM block;
361 	BLOCKNUM nblocks;
362 
363 	nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
364 	for (block = 0;  block < nblocks;  block++)
365 	{
366 		for (bp = ch_bufhead;  ;  bp = bp->next)
367 		{
368 			if (bp == END_OF_CHAIN)
369 			{
370 				if (!warned)
371 				{
372 					error("Warning: log file is incomplete",
373 						NULL_PARG);
374 					warned = TRUE;
375 				}
376 				break;
377 			}
378 			if (bp->block == block)
379 			{
380 				write(logfile, (char *) bp->data, bp->datasize);
381 				break;
382 			}
383 		}
384 	}
385 }
386 
387 #endif
388 
389 /*
390  * Determine if a specific block is currently in one of the buffers.
391  */
392 	static int
393 buffered(block)
394 	BLOCKNUM block;
395 {
396 	register struct buf *bp;
397 	register int h;
398 
399 	h = BUFHASH(block);
400 	FOR_BUFS_IN_CHAIN(h, bp)
401 	{
402 		if (bp->block == block)
403 			return (TRUE);
404 	}
405 	return (FALSE);
406 }
407 
408 /*
409  * Seek to a specified position in the file.
410  * Return 0 if successful, non-zero if can't seek there.
411  */
412 	public int
413 ch_seek(pos)
414 	register POSITION pos;
415 {
416 	BLOCKNUM new_block;
417 	POSITION len;
418 
419 	len = ch_length();
420 	if (pos < ch_zero() || (len != NULL_POSITION && pos > len))
421 		return (1);
422 
423 	new_block = pos / LBUFSIZE;
424 	if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block))
425 	{
426 		if (ch_fpos > pos)
427 			return (1);
428 		while (ch_fpos < pos)
429 		{
430 			if (ch_forw_get() == EOI)
431 				return (1);
432 			if (ABORT_SIGS())
433 				return (1);
434 		}
435 		return (0);
436 	}
437 	/*
438 	 * Set read pointer.
439 	 */
440 	ch_block = new_block;
441 	ch_offset = pos % LBUFSIZE;
442 	return (0);
443 }
444 
445 /*
446  * Seek to the end of the file.
447  */
448 	public int
449 ch_end_seek()
450 {
451 	POSITION len;
452 
453 	if (ch_flags & CH_CANSEEK)
454 		ch_fsize = filesize(ch_file);
455 
456 	len = ch_length();
457 	if (len != NULL_POSITION)
458 		return (ch_seek(len));
459 
460 	/*
461 	 * Do it the slow way: read till end of data.
462 	 */
463 	while (ch_forw_get() != EOI)
464 		if (ABORT_SIGS())
465 			return (1);
466 	return (0);
467 }
468 
469 /*
470  * Seek to the beginning of the file, or as close to it as we can get.
471  * We may not be able to seek there if input is a pipe and the
472  * beginning of the pipe is no longer buffered.
473  */
474 	public int
475 ch_beg_seek()
476 {
477 	register struct buf *bp, *firstbp;
478 
479 	/*
480 	 * Try a plain ch_seek first.
481 	 */
482 	if (ch_seek(ch_zero()) == 0)
483 		return (0);
484 
485 	/*
486 	 * Can't get to position 0.
487 	 * Look thru the buffers for the one closest to position 0.
488 	 */
489 	firstbp = bp = ch_bufhead;
490 	if (bp == END_OF_CHAIN)
491 		return (1);
492 	while ((bp = bp->next) != END_OF_CHAIN)
493 		if (bp->block < firstbp->block)
494 			firstbp = bp;
495 	ch_block = firstbp->block;
496 	ch_offset = 0;
497 	return (0);
498 }
499 
500 /*
501  * Return the length of the file, if known.
502  */
503 	public POSITION
504 ch_length()
505 {
506 	if (ignore_eoi)
507 		return (NULL_POSITION);
508 	if (ch_flags & CH_HELPFILE)
509 		return (size_helpdata);
510 	return (ch_fsize);
511 }
512 
513 /*
514  * Return the current position in the file.
515  */
516 	public POSITION
517 ch_tell()
518 {
519 	return (ch_block * LBUFSIZE) + ch_offset;
520 }
521 
522 /*
523  * Get the current char and post-increment the read pointer.
524  */
525 	public int
526 ch_forw_get()
527 {
528 	register int c;
529 
530 	c = ch_get();
531 	if (c == EOI)
532 		return (EOI);
533 	if (ch_offset < LBUFSIZE-1)
534 		ch_offset++;
535 	else
536 	{
537 		ch_block ++;
538 		ch_offset = 0;
539 	}
540 	return (c);
541 }
542 
543 /*
544  * Pre-decrement the read pointer and get the new current char.
545  */
546 	public int
547 ch_back_get()
548 {
549 	if (ch_offset > 0)
550 		ch_offset --;
551 	else
552 	{
553 		if (ch_block <= 0)
554 			return (EOI);
555 		if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1))
556 			return (EOI);
557 		ch_block--;
558 		ch_offset = LBUFSIZE-1;
559 	}
560 	return (ch_get());
561 }
562 
563 /*
564  * Set max amount of buffer space.
565  * bufspace is in units of 1024 bytes.  -1 mean no limit.
566  */
567 	public void
568 ch_setbufspace(bufspace)
569 	int bufspace;
570 {
571 	if (bufspace < 0)
572 		maxbufs = -1;
573 	else
574 	{
575 		maxbufs = ((bufspace * 1024) + LBUFSIZE-1) / LBUFSIZE;
576 		if (maxbufs < 1)
577 			maxbufs = 1;
578 	}
579 }
580 
581 /*
582  * Flush (discard) any saved file state, including buffer contents.
583  */
584 	public void
585 ch_flush()
586 {
587 	register struct buf *bp;
588 
589 	if (!(ch_flags & CH_CANSEEK))
590 	{
591 		/*
592 		 * If input is a pipe, we don't flush buffer contents,
593 		 * since the contents can't be recovered.
594 		 */
595 		ch_fsize = NULL_POSITION;
596 		return;
597 	}
598 
599 	/*
600 	 * Initialize all the buffers.
601 	 */
602 	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
603 		bp->block = -1;
604 
605 	/*
606 	 * Figure out the size of the file, if we can.
607 	 */
608 	ch_fsize = filesize(ch_file);
609 
610 	/*
611 	 * Seek to a known position: the beginning of the file.
612 	 */
613 	ch_fpos = 0;
614 	ch_block = 0; /* ch_fpos / LBUFSIZE; */
615 	ch_offset = 0; /* ch_fpos % LBUFSIZE; */
616 
617 #if 1
618 	/*
619 	 * This is a kludge to workaround a Linux kernel bug: files in
620 	 * /proc have a size of 0 according to fstat() but have readable
621 	 * data.  They are sometimes, but not always, seekable.
622 	 * Force them to be non-seekable here.
623 	 */
624 	if (ch_fsize == 0)
625 	{
626 		ch_fsize = NULL_POSITION;
627 		ch_flags &= ~CH_CANSEEK;
628 	}
629 #endif
630 
631 	if (lseek(ch_file, (off_t)0, 0) == BAD_LSEEK)
632 	{
633 		/*
634 		 * Warning only; even if the seek fails for some reason,
635 		 * there's a good chance we're at the beginning anyway.
636 		 * {{ I think this is bogus reasoning. }}
637 		 */
638 		error("seek error to 0", NULL_PARG);
639 	}
640 }
641 
642 /*
643  * Allocate a new buffer.
644  * The buffer is added to the tail of the buffer chain.
645  */
646 	static int
647 ch_addbuf()
648 {
649 	register struct buf *bp;
650 
651 	/*
652 	 * Allocate and initialize a new buffer and link it
653 	 * onto the tail of the buffer list.
654 	 */
655 	bp = (struct buf *) calloc(1, sizeof(struct buf));
656 	if (bp == NULL)
657 		return (1);
658 	ch_nbufs++;
659 	bp->block = -1;
660 	bp->next = END_OF_CHAIN;
661 	bp->prev = ch_buftail;
662 	ch_buftail->next = bp;
663 	ch_buftail = bp;
664 	HASH_INS(bp, 0);
665 	return (0);
666 }
667 
668 /*
669  *
670  */
671 	static void
672 init_hashtbl()
673 {
674 	register int h;
675 
676 	for (h = 0;  h < BUFHASH_SIZE;  h++)
677 	{
678 		thisfile->hashtbl[h].buf_hnext = END_OF_HCHAIN(h);
679 		thisfile->hashtbl[h].buf_hprev = END_OF_HCHAIN(h);
680 	}
681 }
682 
683 /*
684  * Delete all buffers for this file.
685  */
686 	static void
687 ch_delbufs()
688 {
689 	register struct buf *bp;
690 
691 	while (ch_bufhead != END_OF_CHAIN)
692 	{
693 		bp = ch_bufhead;
694 		bp->next->prev = bp->prev;;
695 		bp->prev->next = bp->next;
696 		free(bp);
697 	}
698 	ch_nbufs = 0;
699 	init_hashtbl();
700 }
701 
702 /*
703  * Is it possible to seek on a file descriptor?
704  */
705 	public int
706 seekable(f)
707 	int f;
708 {
709 #if MSDOS_COMPILER
710 	extern int fd0;
711 	if (f == fd0 && !isatty(fd0))
712 	{
713 		/*
714 		 * In MS-DOS, pipes are seekable.  Check for
715 		 * standard input, and pretend it is not seekable.
716 		 */
717 		return (0);
718 	}
719 #endif
720 	return (lseek(f, (off_t)1, 0) != BAD_LSEEK);
721 }
722 
723 /*
724  * Initialize file state for a new file.
725  */
726 	public void
727 ch_init(f, flags)
728 	int f;
729 	int flags;
730 {
731 	/*
732 	 * See if we already have a filestate for this file.
733 	 */
734 	thisfile = (struct filestate *) get_filestate(curr_ifile);
735 	if (thisfile == NULL)
736 	{
737 		/*
738 		 * Allocate and initialize a new filestate.
739 		 */
740 		thisfile = (struct filestate *)
741 				calloc(1, sizeof(struct filestate));
742 		thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN;
743 		thisfile->nbufs = 0;
744 		thisfile->flags = 0;
745 		thisfile->fpos = 0;
746 		thisfile->block = 0;
747 		thisfile->offset = 0;
748 		thisfile->file = -1;
749 		thisfile->fsize = NULL_POSITION;
750 		ch_flags = flags;
751 		init_hashtbl();
752 		/*
753 		 * Try to seek; set CH_CANSEEK if it works.
754 		 */
755 		if ((flags & CH_CANSEEK) && !seekable(f))
756 			ch_flags &= ~CH_CANSEEK;
757 		set_filestate(curr_ifile, (void *) thisfile);
758 	}
759 	if (thisfile->file == -1)
760 		thisfile->file = f;
761 	ch_flush();
762 }
763 
764 /*
765  * Close a filestate.
766  */
767 	public void
768 ch_close()
769 {
770 	int keepstate = FALSE;
771 
772 	if (ch_flags & (CH_CANSEEK|CH_POPENED|CH_HELPFILE))
773 	{
774 		/*
775 		 * We can seek or re-open, so we don't need to keep buffers.
776 		 */
777 		ch_delbufs();
778 	} else
779 		keepstate = TRUE;
780 	if (!(ch_flags & CH_KEEPOPEN))
781 	{
782 		/*
783 		 * We don't need to keep the file descriptor open
784 		 * (because we can re-open it.)
785 		 * But don't really close it if it was opened via popen(),
786 		 * because pclose() wants to close it.
787 		 */
788 		if (!(ch_flags & (CH_POPENED|CH_HELPFILE)))
789 			close(ch_file);
790 		ch_file = -1;
791 	} else
792 		keepstate = TRUE;
793 	if (!keepstate)
794 	{
795 		/*
796 		 * We don't even need to keep the filestate structure.
797 		 */
798 		free(thisfile);
799 		thisfile = NULL;
800 		set_filestate(curr_ifile, (void *) NULL);
801 	}
802 }
803 
804 /*
805  * Return ch_flags for the current file.
806  */
807 	public int
808 ch_getflags()
809 {
810 	return (ch_flags);
811 }
812 
813 #if 0
814 	public void
815 ch_dump(struct filestate *fs)
816 {
817 	struct buf *bp;
818 	unsigned char *s;
819 
820 	if (fs == NULL)
821 	{
822 		printf(" --no filestate\n");
823 		return;
824 	}
825 	printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n",
826 		fs->file, fs->flags, fs->fpos,
827 		fs->fsize, fs->block, fs->offset);
828 	printf(" %d bufs:\n", fs->nbufs);
829 	for (bp = fs->buf_next; bp != (struct buf *)fs;  bp = bp->next)
830 	{
831 		printf("%x: blk %x, size %x \"",
832 			bp, bp->block, bp->datasize);
833 		for (s = bp->data;  s < bp->data + 30;  s++)
834 			if (*s >= ' ' && *s < 0x7F)
835 				printf("%c", *s);
836 			else
837 				printf(".");
838 		printf("\"\n");
839 	}
840 }
841 #endif
842