1 /* 2 * Copyright (C) 1984-2007 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information about less, or for information on how to 8 * contact the author, see the README file. 9 */ 10 11 12 /* 13 * Low level character input from the input file. 14 * We use these special purpose routines which optimize moving 15 * both forward and backward from the current read pointer. 16 */ 17 18 #include "less.h" 19 #if MSDOS_COMPILER==WIN32C 20 #include <errno.h> 21 #include <windows.h> 22 #endif 23 24 typedef POSITION BLOCKNUM; 25 26 public int ignore_eoi; 27 28 /* 29 * Pool of buffers holding the most recently used blocks of the input file. 30 * The buffer pool is kept as a doubly-linked circular list, 31 * in order from most- to least-recently used. 32 * The circular list is anchored by the file state "thisfile". 33 */ 34 #define LBUFSIZE 8192 35 struct buf { 36 struct buf *next, *prev; 37 struct buf *hnext, *hprev; 38 BLOCKNUM block; 39 unsigned int datasize; 40 unsigned char data[LBUFSIZE]; 41 }; 42 43 struct buflist { 44 /* -- Following members must match struct buf */ 45 struct buf *buf_next, *buf_prev; 46 struct buf *buf_hnext, *buf_hprev; 47 }; 48 49 /* 50 * The file state is maintained in a filestate structure. 51 * A pointer to the filestate is kept in the ifile structure. 52 */ 53 #define BUFHASH_SIZE 64 54 struct filestate { 55 struct buf *buf_next, *buf_prev; 56 struct buflist hashtbl[BUFHASH_SIZE]; 57 int file; 58 int flags; 59 POSITION fpos; 60 int nbufs; 61 BLOCKNUM block; 62 unsigned int offset; 63 POSITION fsize; 64 }; 65 66 #define ch_bufhead thisfile->buf_next 67 #define ch_buftail thisfile->buf_prev 68 #define ch_nbufs thisfile->nbufs 69 #define ch_block thisfile->block 70 #define ch_offset thisfile->offset 71 #define ch_fpos thisfile->fpos 72 #define ch_fsize thisfile->fsize 73 #define ch_flags thisfile->flags 74 #define ch_file thisfile->file 75 76 #define END_OF_CHAIN ((struct buf *)&thisfile->buf_next) 77 #define END_OF_HCHAIN(h) ((struct buf *)&thisfile->hashtbl[h]) 78 #define BUFHASH(blk) ((blk) & (BUFHASH_SIZE-1)) 79 80 #define FOR_BUFS_IN_CHAIN(h,bp) \ 81 for (bp = thisfile->hashtbl[h].buf_hnext; \ 82 bp != END_OF_HCHAIN(h); bp = bp->hnext) 83 84 #define HASH_RM(bp) \ 85 (bp)->hnext->hprev = (bp)->hprev; \ 86 (bp)->hprev->hnext = (bp)->hnext; 87 88 #define HASH_INS(bp,h) \ 89 (bp)->hnext = thisfile->hashtbl[h].buf_hnext; \ 90 (bp)->hprev = END_OF_HCHAIN(h); \ 91 thisfile->hashtbl[h].buf_hnext->hprev = (bp); \ 92 thisfile->hashtbl[h].buf_hnext = (bp); 93 94 static struct filestate *thisfile; 95 static int ch_ungotchar = -1; 96 static int maxbufs = -1; 97 98 extern int autobuf; 99 extern int sigs; 100 extern int secure; 101 extern constant char helpdata[]; 102 extern constant int size_helpdata; 103 extern IFILE curr_ifile; 104 #if LOGFILE 105 extern int logfile; 106 extern char *namelogfile; 107 #endif 108 109 static int ch_addbuf(); 110 111 112 /* 113 * Get the character pointed to by the read pointer. 114 * ch_get() is a macro which is more efficient to call 115 * than fch_get (the function), in the usual case 116 * that the block desired is at the head of the chain. 117 */ 118 #define ch_get() ((ch_block == ch_bufhead->block && \ 119 ch_offset < ch_bufhead->datasize) ? \ 120 ch_bufhead->data[ch_offset] : fch_get()) 121 int 122 fch_get() 123 { 124 register struct buf *bp; 125 register int n; 126 register int slept; 127 register int h; 128 POSITION pos; 129 POSITION len; 130 131 slept = FALSE; 132 133 /* 134 * Look for a buffer holding the desired block. 135 */ 136 h = BUFHASH(ch_block); 137 FOR_BUFS_IN_CHAIN(h, bp) 138 { 139 if (bp->block == ch_block) 140 { 141 if (ch_offset >= bp->datasize) 142 /* 143 * Need more data in this buffer. 144 */ 145 goto read_more; 146 goto found; 147 } 148 } 149 /* 150 * Block is not in a buffer. 151 * Take the least recently used buffer 152 * and read the desired block into it. 153 * If the LRU buffer has data in it, 154 * then maybe allocate a new buffer. 155 */ 156 if (ch_buftail == END_OF_CHAIN || ch_buftail->block != -1) 157 { 158 /* 159 * There is no empty buffer to use. 160 * Allocate a new buffer if: 161 * 1. We can't seek on this file and -b is not in effect; or 162 * 2. We haven't allocated the max buffers for this file yet. 163 */ 164 if ((autobuf && !(ch_flags & CH_CANSEEK)) || 165 (maxbufs < 0 || ch_nbufs < maxbufs)) 166 if (ch_addbuf()) 167 /* 168 * Allocation failed: turn off autobuf. 169 */ 170 autobuf = OPT_OFF; 171 } 172 bp = ch_buftail; 173 HASH_RM(bp); /* Remove from old hash chain. */ 174 bp->block = ch_block; 175 bp->datasize = 0; 176 HASH_INS(bp, h); /* Insert into new hash chain. */ 177 178 read_more: 179 pos = (ch_block * LBUFSIZE) + bp->datasize; 180 if ((len = ch_length()) != NULL_POSITION && pos >= len) 181 /* 182 * At end of file. 183 */ 184 return (EOI); 185 186 if (pos != ch_fpos) 187 { 188 /* 189 * Not at the correct position: must seek. 190 * If input is a pipe, we're in trouble (can't seek on a pipe). 191 * Some data has been lost: just return "?". 192 */ 193 if (!(ch_flags & CH_CANSEEK)) 194 return ('?'); 195 if (lseek(ch_file, (off_t)pos, 0) == BAD_LSEEK) 196 { 197 error("seek error", NULL_PARG); 198 clear_eol(); 199 return (EOI); 200 } 201 ch_fpos = pos; 202 } 203 204 /* 205 * Read the block. 206 * If we read less than a full block, that's ok. 207 * We use partial block and pick up the rest next time. 208 */ 209 if (ch_ungotchar != -1) 210 { 211 bp->data[bp->datasize] = ch_ungotchar; 212 n = 1; 213 ch_ungotchar = -1; 214 } else if (ch_flags & CH_HELPFILE) 215 { 216 bp->data[bp->datasize] = helpdata[ch_fpos]; 217 n = 1; 218 } else 219 { 220 n = iread(ch_file, &bp->data[bp->datasize], 221 (unsigned int)(LBUFSIZE - bp->datasize)); 222 } 223 224 if (n == READ_INTR) 225 return (EOI); 226 if (n < 0) 227 { 228 #if MSDOS_COMPILER==WIN32C 229 if (errno != EPIPE) 230 #endif 231 { 232 error("read error", NULL_PARG); 233 clear_eol(); 234 } 235 n = 0; 236 } 237 238 #if LOGFILE 239 /* 240 * If we have a log file, write the new data to it. 241 */ 242 if (!secure && logfile >= 0 && n > 0) 243 write(logfile, (char *) &bp->data[bp->datasize], n); 244 #endif 245 246 ch_fpos += n; 247 bp->datasize += n; 248 249 /* 250 * If we have read to end of file, set ch_fsize to indicate 251 * the position of the end of file. 252 */ 253 if (n == 0) 254 { 255 ch_fsize = pos; 256 if (ignore_eoi) 257 { 258 /* 259 * We are ignoring EOF. 260 * Wait a while, then try again. 261 */ 262 if (!slept) 263 { 264 PARG parg; 265 parg.p_string = wait_message(); 266 ierror("%s", &parg); 267 } 268 #if !MSDOS_COMPILER 269 sleep(1); 270 #else 271 #if MSDOS_COMPILER==WIN32C 272 Sleep(1000); 273 #endif 274 #endif 275 slept = TRUE; 276 } 277 if (sigs) 278 return (EOI); 279 } 280 281 found: 282 if (ch_bufhead != bp) 283 { 284 /* 285 * Move the buffer to the head of the buffer chain. 286 * This orders the buffer chain, most- to least-recently used. 287 */ 288 bp->next->prev = bp->prev; 289 bp->prev->next = bp->next; 290 bp->next = ch_bufhead; 291 bp->prev = END_OF_CHAIN; 292 ch_bufhead->prev = bp; 293 ch_bufhead = bp; 294 295 /* 296 * Move to head of hash chain too. 297 */ 298 HASH_RM(bp); 299 HASH_INS(bp, h); 300 } 301 302 if (ch_offset >= bp->datasize) 303 /* 304 * After all that, we still don't have enough data. 305 * Go back and try again. 306 */ 307 goto read_more; 308 309 return (bp->data[ch_offset]); 310 } 311 312 /* 313 * ch_ungetchar is a rather kludgy and limited way to push 314 * a single char onto an input file descriptor. 315 */ 316 public void 317 ch_ungetchar(c) 318 int c; 319 { 320 if (c != -1 && ch_ungotchar != -1) 321 error("ch_ungetchar overrun", NULL_PARG); 322 ch_ungotchar = c; 323 } 324 325 #if LOGFILE 326 /* 327 * Close the logfile. 328 * If we haven't read all of standard input into it, do that now. 329 */ 330 public void 331 end_logfile() 332 { 333 static int tried = FALSE; 334 335 if (logfile < 0) 336 return; 337 if (!tried && ch_fsize == NULL_POSITION) 338 { 339 tried = TRUE; 340 ierror("Finishing logfile", NULL_PARG); 341 while (ch_forw_get() != EOI) 342 if (ABORT_SIGS()) 343 break; 344 } 345 close(logfile); 346 logfile = -1; 347 namelogfile = NULL; 348 } 349 350 /* 351 * Start a log file AFTER less has already been running. 352 * Invoked from the - command; see toggle_option(). 353 * Write all the existing buffered data to the log file. 354 */ 355 public void 356 sync_logfile() 357 { 358 register struct buf *bp; 359 int warned = FALSE; 360 BLOCKNUM block; 361 BLOCKNUM nblocks; 362 363 nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE; 364 for (block = 0; block < nblocks; block++) 365 { 366 for (bp = ch_bufhead; ; bp = bp->next) 367 { 368 if (bp == END_OF_CHAIN) 369 { 370 if (!warned) 371 { 372 error("Warning: log file is incomplete", 373 NULL_PARG); 374 warned = TRUE; 375 } 376 break; 377 } 378 if (bp->block == block) 379 { 380 write(logfile, (char *) bp->data, bp->datasize); 381 break; 382 } 383 } 384 } 385 } 386 387 #endif 388 389 /* 390 * Determine if a specific block is currently in one of the buffers. 391 */ 392 static int 393 buffered(block) 394 BLOCKNUM block; 395 { 396 register struct buf *bp; 397 register int h; 398 399 h = BUFHASH(block); 400 FOR_BUFS_IN_CHAIN(h, bp) 401 { 402 if (bp->block == block) 403 return (TRUE); 404 } 405 return (FALSE); 406 } 407 408 /* 409 * Seek to a specified position in the file. 410 * Return 0 if successful, non-zero if can't seek there. 411 */ 412 public int 413 ch_seek(pos) 414 register POSITION pos; 415 { 416 BLOCKNUM new_block; 417 POSITION len; 418 419 len = ch_length(); 420 if (pos < ch_zero() || (len != NULL_POSITION && pos > len)) 421 return (1); 422 423 new_block = pos / LBUFSIZE; 424 if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block)) 425 { 426 if (ch_fpos > pos) 427 return (1); 428 while (ch_fpos < pos) 429 { 430 if (ch_forw_get() == EOI) 431 return (1); 432 if (ABORT_SIGS()) 433 return (1); 434 } 435 return (0); 436 } 437 /* 438 * Set read pointer. 439 */ 440 ch_block = new_block; 441 ch_offset = pos % LBUFSIZE; 442 return (0); 443 } 444 445 /* 446 * Seek to the end of the file. 447 */ 448 public int 449 ch_end_seek() 450 { 451 POSITION len; 452 453 if (ch_flags & CH_CANSEEK) 454 ch_fsize = filesize(ch_file); 455 456 len = ch_length(); 457 if (len != NULL_POSITION) 458 return (ch_seek(len)); 459 460 /* 461 * Do it the slow way: read till end of data. 462 */ 463 while (ch_forw_get() != EOI) 464 if (ABORT_SIGS()) 465 return (1); 466 return (0); 467 } 468 469 /* 470 * Seek to the beginning of the file, or as close to it as we can get. 471 * We may not be able to seek there if input is a pipe and the 472 * beginning of the pipe is no longer buffered. 473 */ 474 public int 475 ch_beg_seek() 476 { 477 register struct buf *bp, *firstbp; 478 479 /* 480 * Try a plain ch_seek first. 481 */ 482 if (ch_seek(ch_zero()) == 0) 483 return (0); 484 485 /* 486 * Can't get to position 0. 487 * Look thru the buffers for the one closest to position 0. 488 */ 489 firstbp = bp = ch_bufhead; 490 if (bp == END_OF_CHAIN) 491 return (1); 492 while ((bp = bp->next) != END_OF_CHAIN) 493 if (bp->block < firstbp->block) 494 firstbp = bp; 495 ch_block = firstbp->block; 496 ch_offset = 0; 497 return (0); 498 } 499 500 /* 501 * Return the length of the file, if known. 502 */ 503 public POSITION 504 ch_length() 505 { 506 if (ignore_eoi) 507 return (NULL_POSITION); 508 if (ch_flags & CH_HELPFILE) 509 return (size_helpdata); 510 return (ch_fsize); 511 } 512 513 /* 514 * Return the current position in the file. 515 */ 516 public POSITION 517 ch_tell() 518 { 519 return (ch_block * LBUFSIZE) + ch_offset; 520 } 521 522 /* 523 * Get the current char and post-increment the read pointer. 524 */ 525 public int 526 ch_forw_get() 527 { 528 register int c; 529 530 c = ch_get(); 531 if (c == EOI) 532 return (EOI); 533 if (ch_offset < LBUFSIZE-1) 534 ch_offset++; 535 else 536 { 537 ch_block ++; 538 ch_offset = 0; 539 } 540 return (c); 541 } 542 543 /* 544 * Pre-decrement the read pointer and get the new current char. 545 */ 546 public int 547 ch_back_get() 548 { 549 if (ch_offset > 0) 550 ch_offset --; 551 else 552 { 553 if (ch_block <= 0) 554 return (EOI); 555 if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1)) 556 return (EOI); 557 ch_block--; 558 ch_offset = LBUFSIZE-1; 559 } 560 return (ch_get()); 561 } 562 563 /* 564 * Set max amount of buffer space. 565 * bufspace is in units of 1024 bytes. -1 mean no limit. 566 */ 567 public void 568 ch_setbufspace(bufspace) 569 int bufspace; 570 { 571 if (bufspace < 0) 572 maxbufs = -1; 573 else 574 { 575 maxbufs = ((bufspace * 1024) + LBUFSIZE-1) / LBUFSIZE; 576 if (maxbufs < 1) 577 maxbufs = 1; 578 } 579 } 580 581 /* 582 * Flush (discard) any saved file state, including buffer contents. 583 */ 584 public void 585 ch_flush() 586 { 587 register struct buf *bp; 588 589 if (!(ch_flags & CH_CANSEEK)) 590 { 591 /* 592 * If input is a pipe, we don't flush buffer contents, 593 * since the contents can't be recovered. 594 */ 595 ch_fsize = NULL_POSITION; 596 return; 597 } 598 599 /* 600 * Initialize all the buffers. 601 */ 602 for (bp = ch_bufhead; bp != END_OF_CHAIN; bp = bp->next) 603 bp->block = -1; 604 605 /* 606 * Figure out the size of the file, if we can. 607 */ 608 ch_fsize = filesize(ch_file); 609 610 /* 611 * Seek to a known position: the beginning of the file. 612 */ 613 ch_fpos = 0; 614 ch_block = 0; /* ch_fpos / LBUFSIZE; */ 615 ch_offset = 0; /* ch_fpos % LBUFSIZE; */ 616 617 #if 1 618 /* 619 * This is a kludge to workaround a Linux kernel bug: files in 620 * /proc have a size of 0 according to fstat() but have readable 621 * data. They are sometimes, but not always, seekable. 622 * Force them to be non-seekable here. 623 */ 624 if (ch_fsize == 0) 625 { 626 ch_fsize = NULL_POSITION; 627 ch_flags &= ~CH_CANSEEK; 628 } 629 #endif 630 631 if (lseek(ch_file, (off_t)0, 0) == BAD_LSEEK) 632 { 633 /* 634 * Warning only; even if the seek fails for some reason, 635 * there's a good chance we're at the beginning anyway. 636 * {{ I think this is bogus reasoning. }} 637 */ 638 error("seek error to 0", NULL_PARG); 639 } 640 } 641 642 /* 643 * Allocate a new buffer. 644 * The buffer is added to the tail of the buffer chain. 645 */ 646 static int 647 ch_addbuf() 648 { 649 register struct buf *bp; 650 651 /* 652 * Allocate and initialize a new buffer and link it 653 * onto the tail of the buffer list. 654 */ 655 bp = (struct buf *) calloc(1, sizeof(struct buf)); 656 if (bp == NULL) 657 return (1); 658 ch_nbufs++; 659 bp->block = -1; 660 bp->next = END_OF_CHAIN; 661 bp->prev = ch_buftail; 662 ch_buftail->next = bp; 663 ch_buftail = bp; 664 HASH_INS(bp, 0); 665 return (0); 666 } 667 668 /* 669 * 670 */ 671 static void 672 init_hashtbl() 673 { 674 register int h; 675 676 for (h = 0; h < BUFHASH_SIZE; h++) 677 { 678 thisfile->hashtbl[h].buf_hnext = END_OF_HCHAIN(h); 679 thisfile->hashtbl[h].buf_hprev = END_OF_HCHAIN(h); 680 } 681 } 682 683 /* 684 * Delete all buffers for this file. 685 */ 686 static void 687 ch_delbufs() 688 { 689 register struct buf *bp; 690 691 while (ch_bufhead != END_OF_CHAIN) 692 { 693 bp = ch_bufhead; 694 bp->next->prev = bp->prev;; 695 bp->prev->next = bp->next; 696 free(bp); 697 } 698 ch_nbufs = 0; 699 init_hashtbl(); 700 } 701 702 /* 703 * Is it possible to seek on a file descriptor? 704 */ 705 public int 706 seekable(f) 707 int f; 708 { 709 #if MSDOS_COMPILER 710 extern int fd0; 711 if (f == fd0 && !isatty(fd0)) 712 { 713 /* 714 * In MS-DOS, pipes are seekable. Check for 715 * standard input, and pretend it is not seekable. 716 */ 717 return (0); 718 } 719 #endif 720 return (lseek(f, (off_t)1, 0) != BAD_LSEEK); 721 } 722 723 /* 724 * Initialize file state for a new file. 725 */ 726 public void 727 ch_init(f, flags) 728 int f; 729 int flags; 730 { 731 /* 732 * See if we already have a filestate for this file. 733 */ 734 thisfile = (struct filestate *) get_filestate(curr_ifile); 735 if (thisfile == NULL) 736 { 737 /* 738 * Allocate and initialize a new filestate. 739 */ 740 thisfile = (struct filestate *) 741 calloc(1, sizeof(struct filestate)); 742 thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN; 743 thisfile->nbufs = 0; 744 thisfile->flags = 0; 745 thisfile->fpos = 0; 746 thisfile->block = 0; 747 thisfile->offset = 0; 748 thisfile->file = -1; 749 thisfile->fsize = NULL_POSITION; 750 ch_flags = flags; 751 init_hashtbl(); 752 /* 753 * Try to seek; set CH_CANSEEK if it works. 754 */ 755 if ((flags & CH_CANSEEK) && !seekable(f)) 756 ch_flags &= ~CH_CANSEEK; 757 set_filestate(curr_ifile, (void *) thisfile); 758 } 759 if (thisfile->file == -1) 760 thisfile->file = f; 761 ch_flush(); 762 } 763 764 /* 765 * Close a filestate. 766 */ 767 public void 768 ch_close() 769 { 770 int keepstate = FALSE; 771 772 if (ch_flags & (CH_CANSEEK|CH_POPENED|CH_HELPFILE)) 773 { 774 /* 775 * We can seek or re-open, so we don't need to keep buffers. 776 */ 777 ch_delbufs(); 778 } else 779 keepstate = TRUE; 780 if (!(ch_flags & CH_KEEPOPEN)) 781 { 782 /* 783 * We don't need to keep the file descriptor open 784 * (because we can re-open it.) 785 * But don't really close it if it was opened via popen(), 786 * because pclose() wants to close it. 787 */ 788 if (!(ch_flags & (CH_POPENED|CH_HELPFILE))) 789 close(ch_file); 790 ch_file = -1; 791 } else 792 keepstate = TRUE; 793 if (!keepstate) 794 { 795 /* 796 * We don't even need to keep the filestate structure. 797 */ 798 free(thisfile); 799 thisfile = NULL; 800 set_filestate(curr_ifile, (void *) NULL); 801 } 802 } 803 804 /* 805 * Return ch_flags for the current file. 806 */ 807 public int 808 ch_getflags() 809 { 810 return (ch_flags); 811 } 812 813 #if 0 814 public void 815 ch_dump(struct filestate *fs) 816 { 817 struct buf *bp; 818 unsigned char *s; 819 820 if (fs == NULL) 821 { 822 printf(" --no filestate\n"); 823 return; 824 } 825 printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n", 826 fs->file, fs->flags, fs->fpos, 827 fs->fsize, fs->block, fs->offset); 828 printf(" %d bufs:\n", fs->nbufs); 829 for (bp = fs->buf_next; bp != (struct buf *)fs; bp = bp->next) 830 { 831 printf("%x: blk %x, size %x \"", 832 bp, bp->block, bp->datasize); 833 for (s = bp->data; s < bp->data + 30; s++) 834 if (*s >= ' ' && *s < 0x7F) 835 printf("%c", *s); 836 else 837 printf("."); 838 printf("\"\n"); 839 } 840 } 841 #endif 842