1 /* 2 * Copyright (C) 1984-2000 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information about less, or for information on how to 8 * contact the author, see the README file. 9 */ 10 11 12 /* 13 * Low level character input from the input file. 14 * We use these special purpose routines which optimize moving 15 * both forward and backward from the current read pointer. 16 */ 17 18 #include "less.h" 19 #if MSDOS_COMPILER==WIN32C 20 #include <errno.h> 21 #include <windows.h> 22 #endif 23 24 typedef POSITION BLOCKNUM; 25 26 public int ignore_eoi; 27 28 /* 29 * Pool of buffers holding the most recently used blocks of the input file. 30 * The buffer pool is kept as a doubly-linked circular list, 31 * in order from most- to least-recently used. 32 * The circular list is anchored by the file state "thisfile". 33 */ 34 #define LBUFSIZE 8192 35 struct buf { 36 struct buf *next, *prev; 37 struct buf *hnext, *hprev; 38 BLOCKNUM block; 39 unsigned int datasize; 40 unsigned char data[LBUFSIZE]; 41 }; 42 43 struct buflist { 44 /* -- Following members must match struct buf */ 45 struct buf *buf_next, *buf_prev; 46 struct buf *buf_hnext, *buf_hprev; 47 }; 48 49 /* 50 * The file state is maintained in a filestate structure. 51 * A pointer to the filestate is kept in the ifile structure. 52 */ 53 #define BUFHASH_SIZE 64 54 struct filestate { 55 struct buf *buf_next, *buf_prev; 56 struct buflist hashtbl[BUFHASH_SIZE]; 57 int file; 58 int flags; 59 POSITION fpos; 60 int nbufs; 61 BLOCKNUM block; 62 unsigned int offset; 63 POSITION fsize; 64 }; 65 66 #define ch_bufhead thisfile->buf_next 67 #define ch_buftail thisfile->buf_prev 68 #define ch_nbufs thisfile->nbufs 69 #define ch_block thisfile->block 70 #define ch_offset thisfile->offset 71 #define ch_fpos thisfile->fpos 72 #define ch_fsize thisfile->fsize 73 #define ch_flags thisfile->flags 74 #define ch_file thisfile->file 75 76 #define END_OF_CHAIN ((struct buf *)&thisfile->buf_next) 77 #define END_OF_HCHAIN(h) ((struct buf *)&thisfile->hashtbl[h]) 78 #define BUFHASH(blk) ((blk) & (BUFHASH_SIZE-1)) 79 80 #define FOR_BUFS_IN_CHAIN(h,bp) \ 81 for (bp = thisfile->hashtbl[h].buf_hnext; \ 82 bp != END_OF_HCHAIN(h); bp = bp->hnext) 83 84 #define HASH_RM(bp) \ 85 (bp)->hnext->hprev = (bp)->hprev; \ 86 (bp)->hprev->hnext = (bp)->hnext; 87 88 #define HASH_INS(bp,h) \ 89 (bp)->hnext = thisfile->hashtbl[h].buf_hnext; \ 90 (bp)->hprev = END_OF_HCHAIN(h); \ 91 thisfile->hashtbl[h].buf_hnext->hprev = (bp); \ 92 thisfile->hashtbl[h].buf_hnext = (bp); 93 94 static struct filestate *thisfile; 95 static int ch_ungotchar = -1; 96 97 extern int autobuf; 98 extern int sigs; 99 extern int cbufs; 100 extern int secure; 101 extern constant char helpdata[]; 102 extern constant int size_helpdata; 103 extern IFILE curr_ifile; 104 #if LOGFILE 105 extern int logfile; 106 extern char *namelogfile; 107 #endif 108 109 static int ch_addbuf(); 110 111 112 /* 113 * Get the character pointed to by the read pointer. 114 * ch_get() is a macro which is more efficient to call 115 * than fch_get (the function), in the usual case 116 * that the block desired is at the head of the chain. 117 */ 118 #define ch_get() ((ch_block == ch_bufhead->block && \ 119 ch_offset < ch_bufhead->datasize) ? \ 120 ch_bufhead->data[ch_offset] : fch_get()) 121 int 122 fch_get() 123 { 124 register struct buf *bp; 125 register int n; 126 register int slept; 127 register int h; 128 POSITION pos; 129 POSITION len; 130 131 slept = FALSE; 132 133 /* 134 * Look for a buffer holding the desired block. 135 */ 136 h = BUFHASH(ch_block); 137 FOR_BUFS_IN_CHAIN(h, bp) 138 { 139 if (bp->block == ch_block) 140 { 141 if (ch_offset >= bp->datasize) 142 /* 143 * Need more data in this buffer. 144 */ 145 goto read_more; 146 goto found; 147 } 148 } 149 /* 150 * Block is not in a buffer. 151 * Take the least recently used buffer 152 * and read the desired block into it. 153 * If the LRU buffer has data in it, 154 * then maybe allocate a new buffer. 155 */ 156 if (ch_buftail == END_OF_CHAIN || ch_buftail->block != -1) 157 { 158 /* 159 * There is no empty buffer to use. 160 * Allocate a new buffer if: 161 * 1. We can't seek on this file and -b is not in effect; or 162 * 2. We haven't allocated the max buffers for this file yet. 163 */ 164 if ((autobuf && !(ch_flags & CH_CANSEEK)) || 165 (cbufs == -1 || ch_nbufs < cbufs)) 166 if (ch_addbuf()) 167 /* 168 * Allocation failed: turn off autobuf. 169 */ 170 autobuf = OPT_OFF; 171 } 172 bp = ch_buftail; 173 HASH_RM(bp); /* Remove from old hash chain. */ 174 bp->block = ch_block; 175 bp->datasize = 0; 176 HASH_INS(bp, h); /* Insert into new hash chain. */ 177 178 read_more: 179 pos = (ch_block * LBUFSIZE) + bp->datasize; 180 if ((len = ch_length()) != NULL_POSITION && pos >= len) 181 /* 182 * At end of file. 183 */ 184 return (EOI); 185 186 if (pos != ch_fpos) 187 { 188 /* 189 * Not at the correct position: must seek. 190 * If input is a pipe, we're in trouble (can't seek on a pipe). 191 * Some data has been lost: just return "?". 192 */ 193 if (!(ch_flags & CH_CANSEEK)) 194 return ('?'); 195 if (lseek(ch_file, (off_t)pos, 0) == BAD_LSEEK) 196 { 197 error("seek error", NULL_PARG); 198 clear_eol(); 199 return (EOI); 200 } 201 ch_fpos = pos; 202 } 203 204 /* 205 * Read the block. 206 * If we read less than a full block, that's ok. 207 * We use partial block and pick up the rest next time. 208 */ 209 if (ch_ungotchar != -1) 210 { 211 bp->data[bp->datasize] = ch_ungotchar; 212 n = 1; 213 ch_ungotchar = -1; 214 } else if (ch_flags & CH_HELPFILE) 215 { 216 bp->data[bp->datasize] = helpdata[ch_fpos]; 217 n = 1; 218 } else 219 { 220 n = iread(ch_file, &bp->data[bp->datasize], 221 (unsigned int)(LBUFSIZE - bp->datasize)); 222 } 223 224 if (n == READ_INTR) 225 return (EOI); 226 if (n < 0) 227 { 228 #if MSDOS_COMPILER==WIN32C 229 if (errno != EPIPE) 230 #endif 231 { 232 error("read error", NULL_PARG); 233 clear_eol(); 234 } 235 n = 0; 236 } 237 238 #if LOGFILE 239 /* 240 * If we have a log file, write the new data to it. 241 */ 242 if (!secure && logfile >= 0 && n > 0) 243 write(logfile, (char *) &bp->data[bp->datasize], n); 244 #endif 245 246 ch_fpos += n; 247 bp->datasize += n; 248 249 /* 250 * If we have read to end of file, set ch_fsize to indicate 251 * the position of the end of file. 252 */ 253 if (n == 0) 254 { 255 ch_fsize = pos; 256 if (ignore_eoi) 257 { 258 /* 259 * We are ignoring EOF. 260 * Wait a while, then try again. 261 */ 262 if (!slept) 263 { 264 PARG parg; 265 parg.p_string = wait_message(); 266 ierror("%s", &parg); 267 } 268 #if !MSDOS_COMPILER 269 sleep(1); 270 #else 271 #if MSDOS_COMPILER==WIN32C 272 Sleep(1000); 273 #endif 274 #endif 275 slept = TRUE; 276 } 277 if (sigs) 278 return (EOI); 279 } 280 281 found: 282 if (ch_bufhead != bp) 283 { 284 /* 285 * Move the buffer to the head of the buffer chain. 286 * This orders the buffer chain, most- to least-recently used. 287 */ 288 bp->next->prev = bp->prev; 289 bp->prev->next = bp->next; 290 bp->next = ch_bufhead; 291 bp->prev = END_OF_CHAIN; 292 ch_bufhead->prev = bp; 293 ch_bufhead = bp; 294 295 /* 296 * Move to head of hash chain too. 297 */ 298 HASH_RM(bp); 299 HASH_INS(bp, h); 300 } 301 302 if (ch_offset >= bp->datasize) 303 /* 304 * After all that, we still don't have enough data. 305 * Go back and try again. 306 */ 307 goto read_more; 308 309 return (bp->data[ch_offset]); 310 } 311 312 /* 313 * ch_ungetchar is a rather kludgy and limited way to push 314 * a single char onto an input file descriptor. 315 */ 316 public void 317 ch_ungetchar(c) 318 int c; 319 { 320 if (c != -1 && ch_ungotchar != -1) 321 error("ch_ungetchar overrun", NULL_PARG); 322 ch_ungotchar = c; 323 } 324 325 #if LOGFILE 326 /* 327 * Close the logfile. 328 * If we haven't read all of standard input into it, do that now. 329 */ 330 public void 331 end_logfile() 332 { 333 static int tried = FALSE; 334 335 if (logfile < 0) 336 return; 337 if (!tried && ch_fsize == NULL_POSITION) 338 { 339 tried = TRUE; 340 ierror("Finishing logfile", NULL_PARG); 341 while (ch_forw_get() != EOI) 342 if (ABORT_SIGS()) 343 break; 344 } 345 close(logfile); 346 logfile = -1; 347 namelogfile = NULL; 348 } 349 350 /* 351 * Start a log file AFTER less has already been running. 352 * Invoked from the - command; see toggle_option(). 353 * Write all the existing buffered data to the log file. 354 */ 355 public void 356 sync_logfile() 357 { 358 register struct buf *bp; 359 int warned = FALSE; 360 BLOCKNUM block; 361 BLOCKNUM nblocks; 362 363 nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE; 364 for (block = 0; block < nblocks; block++) 365 { 366 for (bp = ch_bufhead; ; bp = bp->next) 367 { 368 if (bp == END_OF_CHAIN) 369 { 370 if (!warned) 371 { 372 error("Warning: log file is incomplete", 373 NULL_PARG); 374 warned = TRUE; 375 } 376 break; 377 } 378 if (bp->block == block) 379 { 380 write(logfile, (char *) bp->data, bp->datasize); 381 break; 382 } 383 } 384 } 385 } 386 387 #endif 388 389 /* 390 * Determine if a specific block is currently in one of the buffers. 391 */ 392 static int 393 buffered(block) 394 BLOCKNUM block; 395 { 396 register struct buf *bp; 397 register int h; 398 399 h = BUFHASH(block); 400 FOR_BUFS_IN_CHAIN(h, bp) 401 { 402 if (bp->block == block) 403 return (TRUE); 404 } 405 return (FALSE); 406 } 407 408 /* 409 * Seek to a specified position in the file. 410 * Return 0 if successful, non-zero if can't seek there. 411 */ 412 public int 413 ch_seek(pos) 414 register POSITION pos; 415 { 416 BLOCKNUM new_block; 417 POSITION len; 418 419 len = ch_length(); 420 if (pos < ch_zero() || (len != NULL_POSITION && pos > len)) 421 return (1); 422 423 new_block = pos / LBUFSIZE; 424 if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block)) 425 { 426 if (ch_fpos > pos) 427 return (1); 428 while (ch_fpos < pos) 429 { 430 if (ch_forw_get() == EOI) 431 return (1); 432 if (ABORT_SIGS()) 433 return (1); 434 } 435 return (0); 436 } 437 /* 438 * Set read pointer. 439 */ 440 ch_block = new_block; 441 ch_offset = pos % LBUFSIZE; 442 return (0); 443 } 444 445 /* 446 * Seek to the end of the file. 447 */ 448 public int 449 ch_end_seek() 450 { 451 POSITION len; 452 453 if (ch_flags & CH_CANSEEK) 454 ch_fsize = filesize(ch_file); 455 456 len = ch_length(); 457 if (len != NULL_POSITION) 458 return (ch_seek(len)); 459 460 /* 461 * Do it the slow way: read till end of data. 462 */ 463 while (ch_forw_get() != EOI) 464 if (ABORT_SIGS()) 465 return (1); 466 return (0); 467 } 468 469 /* 470 * Seek to the beginning of the file, or as close to it as we can get. 471 * We may not be able to seek there if input is a pipe and the 472 * beginning of the pipe is no longer buffered. 473 */ 474 public int 475 ch_beg_seek() 476 { 477 register struct buf *bp, *firstbp; 478 479 /* 480 * Try a plain ch_seek first. 481 */ 482 if (ch_seek(ch_zero()) == 0) 483 return (0); 484 485 /* 486 * Can't get to position 0. 487 * Look thru the buffers for the one closest to position 0. 488 */ 489 firstbp = bp = ch_bufhead; 490 if (bp == END_OF_CHAIN) 491 return (1); 492 while ((bp = bp->next) != END_OF_CHAIN) 493 if (bp->block < firstbp->block) 494 firstbp = bp; 495 ch_block = firstbp->block; 496 ch_offset = 0; 497 return (0); 498 } 499 500 /* 501 * Return the length of the file, if known. 502 */ 503 public POSITION 504 ch_length() 505 { 506 if (ignore_eoi) 507 return (NULL_POSITION); 508 if (ch_flags & CH_HELPFILE) 509 return (size_helpdata); 510 return (ch_fsize); 511 } 512 513 /* 514 * Return the current position in the file. 515 */ 516 public POSITION 517 ch_tell() 518 { 519 return (ch_block * LBUFSIZE) + ch_offset; 520 } 521 522 /* 523 * Get the current char and post-increment the read pointer. 524 */ 525 public int 526 ch_forw_get() 527 { 528 register int c; 529 530 c = ch_get(); 531 if (c == EOI) 532 return (EOI); 533 if (ch_offset < LBUFSIZE-1) 534 ch_offset++; 535 else 536 { 537 ch_block ++; 538 ch_offset = 0; 539 } 540 return (c); 541 } 542 543 /* 544 * Pre-decrement the read pointer and get the new current char. 545 */ 546 public int 547 ch_back_get() 548 { 549 if (ch_offset > 0) 550 ch_offset --; 551 else 552 { 553 if (ch_block <= 0) 554 return (EOI); 555 if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1)) 556 return (EOI); 557 ch_block--; 558 ch_offset = LBUFSIZE-1; 559 } 560 return (ch_get()); 561 } 562 563 /* 564 * Allocate buffers. 565 * Caller wants us to have a total of at least want_nbufs buffers. 566 */ 567 public int 568 ch_nbuf(want_nbufs) 569 int want_nbufs; 570 { 571 PARG parg; 572 573 while (ch_nbufs < want_nbufs) 574 { 575 if (ch_addbuf()) 576 { 577 /* 578 * Cannot allocate enough buffers. 579 * If we don't have ANY, then quit. 580 * Otherwise, just report the error and return. 581 */ 582 parg.p_int = want_nbufs - ch_nbufs; 583 error("Cannot allocate %d buffers", &parg); 584 if (ch_nbufs == 0) 585 quit(QUIT_ERROR); 586 break; 587 } 588 } 589 return (ch_nbufs); 590 } 591 592 /* 593 * Flush (discard) any saved file state, including buffer contents. 594 */ 595 public void 596 ch_flush() 597 { 598 register struct buf *bp; 599 600 if (!(ch_flags & CH_CANSEEK)) 601 { 602 /* 603 * If input is a pipe, we don't flush buffer contents, 604 * since the contents can't be recovered. 605 */ 606 ch_fsize = NULL_POSITION; 607 return; 608 } 609 610 /* 611 * Initialize all the buffers. 612 */ 613 for (bp = ch_bufhead; bp != END_OF_CHAIN; bp = bp->next) 614 bp->block = -1; 615 616 /* 617 * Figure out the size of the file, if we can. 618 */ 619 ch_fsize = filesize(ch_file); 620 621 /* 622 * Seek to a known position: the beginning of the file. 623 */ 624 ch_fpos = 0; 625 ch_block = 0; /* ch_fpos / LBUFSIZE; */ 626 ch_offset = 0; /* ch_fpos % LBUFSIZE; */ 627 628 #if 1 629 /* 630 * This is a kludge to workaround a Linux kernel bug: files in 631 * /proc have a size of 0 according to fstat() but have readable 632 * data. They are sometimes, but not always, seekable. 633 * Force them to be non-seekable here. 634 */ 635 if (ch_fsize == 0) 636 { 637 ch_fsize = NULL_POSITION; 638 ch_flags &= ~CH_CANSEEK; 639 } 640 #endif 641 642 if (lseek(ch_file, (off_t)0, 0) == BAD_LSEEK) 643 { 644 /* 645 * Warning only; even if the seek fails for some reason, 646 * there's a good chance we're at the beginning anyway. 647 * {{ I think this is bogus reasoning. }} 648 */ 649 error("seek error to 0", NULL_PARG); 650 } 651 } 652 653 /* 654 * Allocate a new buffer. 655 * The buffer is added to the tail of the buffer chain. 656 */ 657 static int 658 ch_addbuf() 659 { 660 register struct buf *bp; 661 662 /* 663 * Allocate and initialize a new buffer and link it 664 * onto the tail of the buffer list. 665 */ 666 bp = (struct buf *) calloc(1, sizeof(struct buf)); 667 if (bp == NULL) 668 return (1); 669 ch_nbufs++; 670 bp->block = -1; 671 bp->next = END_OF_CHAIN; 672 bp->prev = ch_buftail; 673 ch_buftail->next = bp; 674 ch_buftail = bp; 675 HASH_INS(bp, 0); 676 return (0); 677 } 678 679 /* 680 * 681 */ 682 static void 683 init_hashtbl() 684 { 685 register int h; 686 687 for (h = 0; h < BUFHASH_SIZE; h++) 688 { 689 thisfile->hashtbl[h].buf_hnext = END_OF_HCHAIN(h); 690 thisfile->hashtbl[h].buf_hprev = END_OF_HCHAIN(h); 691 } 692 } 693 694 /* 695 * Delete all buffers for this file. 696 */ 697 static void 698 ch_delbufs() 699 { 700 register struct buf *bp; 701 702 while (ch_bufhead != END_OF_CHAIN) 703 { 704 bp = ch_bufhead; 705 bp->next->prev = bp->prev;; 706 bp->prev->next = bp->next; 707 free(bp); 708 } 709 ch_nbufs = 0; 710 init_hashtbl(); 711 } 712 713 /* 714 * Is it possible to seek on a file descriptor? 715 */ 716 public int 717 seekable(f) 718 int f; 719 { 720 #if MSDOS_COMPILER 721 extern int fd0; 722 if (f == fd0 && !isatty(fd0)) 723 { 724 /* 725 * In MS-DOS, pipes are seekable. Check for 726 * standard input, and pretend it is not seekable. 727 */ 728 return (0); 729 } 730 #endif 731 return (lseek(f, (off_t)1, 0) != BAD_LSEEK); 732 } 733 734 /* 735 * Initialize file state for a new file. 736 */ 737 public void 738 ch_init(f, flags) 739 int f; 740 int flags; 741 { 742 /* 743 * See if we already have a filestate for this file. 744 */ 745 thisfile = (struct filestate *) get_filestate(curr_ifile); 746 if (thisfile == NULL) 747 { 748 /* 749 * Allocate and initialize a new filestate. 750 */ 751 thisfile = (struct filestate *) 752 calloc(1, sizeof(struct filestate)); 753 thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN; 754 thisfile->nbufs = 0; 755 thisfile->flags = 0; 756 thisfile->fpos = 0; 757 thisfile->block = 0; 758 thisfile->offset = 0; 759 thisfile->file = -1; 760 thisfile->fsize = NULL_POSITION; 761 ch_flags = flags; 762 init_hashtbl(); 763 /* 764 * Try to seek; set CH_CANSEEK if it works. 765 */ 766 if ((flags & CH_CANSEEK) && !seekable(f)) 767 ch_flags &= ~CH_CANSEEK; 768 set_filestate(curr_ifile, (void *) thisfile); 769 } 770 if (thisfile->file == -1) 771 thisfile->file = f; 772 ch_flush(); 773 } 774 775 /* 776 * Close a filestate. 777 */ 778 public void 779 ch_close() 780 { 781 int keepstate = FALSE; 782 783 if (ch_flags & (CH_CANSEEK|CH_POPENED|CH_HELPFILE)) 784 { 785 /* 786 * We can seek or re-open, so we don't need to keep buffers. 787 */ 788 ch_delbufs(); 789 } else 790 keepstate = TRUE; 791 if (!(ch_flags & CH_KEEPOPEN)) 792 { 793 /* 794 * We don't need to keep the file descriptor open 795 * (because we can re-open it.) 796 * But don't really close it if it was opened via popen(), 797 * because pclose() wants to close it. 798 */ 799 if (!(ch_flags & (CH_POPENED|CH_HELPFILE))) 800 close(ch_file); 801 ch_file = -1; 802 } else 803 keepstate = TRUE; 804 if (!keepstate) 805 { 806 /* 807 * We don't even need to keep the filestate structure. 808 */ 809 free(thisfile); 810 thisfile = NULL; 811 set_filestate(curr_ifile, (void *) NULL); 812 } 813 } 814 815 /* 816 * Return ch_flags for the current file. 817 */ 818 public int 819 ch_getflags() 820 { 821 return (ch_flags); 822 } 823 824 #if 0 825 public void 826 ch_dump(struct filestate *fs) 827 { 828 struct buf *bp; 829 unsigned char *s; 830 831 if (fs == NULL) 832 { 833 printf(" --no filestate\n"); 834 return; 835 } 836 printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n", 837 fs->file, fs->flags, fs->fpos, 838 fs->fsize, fs->block, fs->offset); 839 printf(" %d bufs:\n", fs->nbufs); 840 for (bp = fs->buf_next; bp != (struct buf *)fs; bp = bp->next) 841 { 842 printf("%x: blk %x, size %x \"", 843 bp, bp->block, bp->datasize); 844 for (s = bp->data; s < bp->data + 30; s++) 845 if (*s >= ' ' && *s < 0x7F) 846 printf("%c", *s); 847 else 848 printf("."); 849 printf("\"\n"); 850 } 851 } 852 #endif 853