xref: /freebsd/contrib/less/line.c (revision f7c4bd95ba735bd6a5454b4953945a99cefbb80c)
1 /* $FreeBSD$ */
2 /*
3  * Copyright (C) 1984-2007  Mark Nudelman
4  *
5  * You may distribute under the terms of either the GNU General Public
6  * License or the Less License, as specified in the README file.
7  *
8  * For more information about less, or for information on how to
9  * contact the author, see the README file.
10  */
11 
12 
13 /*
14  * Routines to manipulate the "line buffer".
15  * The line buffer holds a line of output as it is being built
16  * in preparation for output to the screen.
17  */
18 
19 #include "less.h"
20 #include "charset.h"
21 
22 static char *linebuf = NULL;	/* Buffer which holds the current output line */
23 static char *attr = NULL;	/* Extension of linebuf to hold attributes */
24 public int size_linebuf = 0;	/* Size of line buffer (and attr buffer) */
25 
26 static int cshift;		/* Current left-shift of output line buffer */
27 public int hshift;		/* Desired left-shift of output line buffer */
28 public int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
29 public int ntabstops = 1;	/* Number of tabstops */
30 public int tabdefault = 8;	/* Default repeated tabstops */
31 
32 static int curr;		/* Index into linebuf */
33 static int column;		/* Printable length, accounting for
34 				   backspaces, etc. */
35 static int overstrike;		/* Next char should overstrike previous char */
36 static int last_overstrike = AT_NORMAL;
37 static int is_null_line;	/* There is no current line */
38 static int lmargin;		/* Left margin */
39 static int line_matches;	/* Number of search matches in this line */
40 static char pendc;
41 static POSITION pendpos;
42 static char *end_ansi_chars;
43 static char *mid_ansi_chars;
44 
45 static int attr_swidth();
46 static int attr_ewidth();
47 static int do_append();
48 
49 extern int sigs;
50 extern int bs_mode;
51 extern int linenums;
52 extern int ctldisp;
53 extern int twiddle;
54 extern int binattr;
55 extern int status_col;
56 extern int auto_wrap, ignaw;
57 extern int bo_s_width, bo_e_width;
58 extern int ul_s_width, ul_e_width;
59 extern int bl_s_width, bl_e_width;
60 extern int so_s_width, so_e_width;
61 extern int sc_width, sc_height;
62 extern int utf_mode;
63 extern int oldbot;
64 extern POSITION start_attnpos;
65 extern POSITION end_attnpos;
66 
67 static char mbc_buf[MAX_UTF_CHAR_LEN];
68 static int mbc_buf_len = 0;
69 static int mbc_buf_index = 0;
70 static POSITION mbc_pos;
71 
72 /*
73  * Initialize from environment variables.
74  */
75 	public void
76 init_line()
77 {
78 	end_ansi_chars = lgetenv("LESSANSIENDCHARS");
79 	if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
80 		end_ansi_chars = "m";
81 
82 	mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
83 	if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
84 		mid_ansi_chars = "0123456789;[?!\"'#%()*+ ";
85 
86 	linebuf = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
87 	attr = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
88 	size_linebuf = LINEBUF_SIZE;
89 }
90 
91 /*
92  * Expand the line buffer.
93  */
94 	static int
95 expand_linebuf()
96 {
97 	/* Double the size of the line buffer. */
98 	int new_size = size_linebuf * 2;
99 
100 	/* Just realloc to expand the buffer, if we can. */
101 #if HAVE_REALLOC
102 	char *new_buf = (char *) realloc(linebuf, new_size);
103 	char *new_attr = (char *) realloc(attr, new_size);
104 #else
105 	char *new_buf = (char *) calloc(new_size, sizeof(char));
106 	char *new_attr = (char *) calloc(new_size, sizeof(char));
107 #endif
108 	if (new_buf == NULL || new_attr == NULL)
109 	{
110 		if (new_attr != NULL)
111 			free(new_attr);
112 		if (new_buf != NULL)
113 			free(new_buf);
114 		return 1;
115 	}
116 #if HAVE_REALLOC
117 	/*
118 	 * We realloc'd the buffers; they already have the old contents.
119 	 */
120 	#if 0
121 	memset(new_buf + size_linebuf, 0, new_size - size_linebuf);
122 	memset(new_attr + size_linebuf, 0, new_size - size_linebuf);
123 	#endif
124 #else
125 	/*
126 	 * We just calloc'd the buffers; copy the old contents.
127 	 */
128 	memcpy(new_buf, linebuf, size_linebuf * sizeof(char));
129 	memcpy(new_attr, attr, size_linebuf * sizeof(char));
130 	free(attr);
131 	free(linebuf);
132 #endif
133 	linebuf = new_buf;
134 	attr = new_attr;
135 	size_linebuf = new_size;
136 	return 0;
137 }
138 
139 /*
140  * Is a character ASCII?
141  */
142 	public int
143 is_ascii_char(ch)
144 	LWCHAR ch;
145 {
146 	return (ch <= 0x7F);
147 }
148 
149 /*
150  * Rewind the line buffer.
151  */
152 	public void
153 prewind()
154 {
155 	curr = 0;
156 	column = 0;
157 	cshift = 0;
158 	overstrike = 0;
159 	last_overstrike = AT_NORMAL;
160 	mbc_buf_len = 0;
161 	is_null_line = 0;
162 	pendc = '\0';
163 	lmargin = 0;
164 	if (status_col)
165 		lmargin += 1;
166 #if HILITE_SEARCH
167 	line_matches = 0;
168 #endif
169 }
170 
171 /*
172  * Insert the line number (of the given position) into the line buffer.
173  */
174 	public void
175 plinenum(pos)
176 	POSITION pos;
177 {
178 	register LINENUM linenum = 0;
179 	register int i;
180 
181 	if (linenums == OPT_ONPLUS)
182 	{
183 		/*
184 		 * Get the line number and put it in the current line.
185 		 * {{ Note: since find_linenum calls forw_raw_line,
186 		 *    it may seek in the input file, requiring the caller
187 		 *    of plinenum to re-seek if necessary. }}
188 		 * {{ Since forw_raw_line modifies linebuf, we must
189 		 *    do this first, before storing anything in linebuf. }}
190 		 */
191 		linenum = find_linenum(pos);
192 	}
193 
194 	/*
195 	 * Display a status column if the -J option is set.
196 	 */
197 	if (status_col)
198 	{
199 		linebuf[curr] = ' ';
200 		if (start_attnpos != NULL_POSITION &&
201 		    pos >= start_attnpos && pos < end_attnpos)
202 			attr[curr] = AT_NORMAL|AT_HILITE;
203 		else
204 			attr[curr] = AT_NORMAL;
205 		curr++;
206 		column++;
207 	}
208 	/*
209 	 * Display the line number at the start of each line
210 	 * if the -N option is set.
211 	 */
212 	if (linenums == OPT_ONPLUS)
213 	{
214 		char buf[INT_STRLEN_BOUND(pos) + 2];
215 		int n;
216 
217 		linenumtoa(linenum, buf);
218 		n = strlen(buf);
219 		if (n < MIN_LINENUM_WIDTH)
220 			n = MIN_LINENUM_WIDTH;
221 		sprintf(linebuf+curr, "%*s ", n, buf);
222 		n++;  /* One space after the line number. */
223 		for (i = 0; i < n; i++)
224 			attr[curr+i] = AT_NORMAL;
225 		curr += n;
226 		column += n;
227 		lmargin += n;
228 	}
229 
230 	/*
231 	 * Append enough spaces to bring us to the lmargin.
232 	 */
233 	while (column < lmargin)
234 	{
235 		linebuf[curr] = ' ';
236 		attr[curr++] = AT_NORMAL;
237 		column++;
238 	}
239 }
240 
241 /*
242  * Shift the input line left.
243  * This means discarding N printable chars at the start of the buffer.
244  */
245 	static void
246 pshift(shift)
247 	int shift;
248 {
249 	LWCHAR prev_ch = 0;
250 	unsigned char c;
251 	int shifted = 0;
252 	int to;
253 	int from;
254 	int len;
255 	int width;
256 	int prev_attr;
257 	int next_attr;
258 
259 	if (shift > column - lmargin)
260 		shift = column - lmargin;
261 	if (shift > curr - lmargin)
262 		shift = curr - lmargin;
263 
264 	to = from = lmargin;
265 	/*
266 	 * We keep on going when shifted == shift
267 	 * to get all combining chars.
268 	 */
269 	while (shifted <= shift && from < curr)
270 	{
271 		c = linebuf[from];
272 		if (ctldisp == OPT_ONPLUS && IS_CSI_START(c))
273 		{
274 			/* Keep cumulative effect.  */
275 			linebuf[to] = c;
276 			attr[to++] = attr[from++];
277 			while (from < curr && linebuf[from])
278 			{
279 				linebuf[to] = linebuf[from];
280 				attr[to++] = attr[from];
281 				if (!is_ansi_middle(linebuf[from++]))
282 					break;
283 			}
284 			continue;
285 		}
286 
287 		width = 0;
288 
289 		if (!IS_ASCII_OCTET(c) && utf_mode)
290 		{
291 			/* Assumes well-formedness validation already done.  */
292 			LWCHAR ch;
293 
294 			len = utf_len(c);
295 			if (from + len > curr)
296 				break;
297 			ch = get_wchar(linebuf + from);
298 			if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch))
299 				width = is_wide_char(ch) ? 2 : 1;
300 			prev_ch = ch;
301 		} else
302 		{
303 			len = 1;
304 			if (c == '\b')
305 				/* XXX - Incorrect if several '\b' in a row.  */
306 				width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
307 			else if (!control_char(c))
308 				width = 1;
309 			prev_ch = 0;
310 		}
311 
312 		if (width == 2 && shift - shifted == 1) {
313 			/* Should never happen when called by pshift_all().  */
314 			attr[to] = attr[from];
315 			/*
316 			 * Assume a wide_char will never be the first half of a
317 			 * combining_char pair, so reset prev_ch in case we're
318 			 * followed by a '\b'.
319 			 */
320 			prev_ch = linebuf[to++] = ' ';
321 			from += len;
322 			shifted++;
323 			continue;
324 		}
325 
326 		/* Adjust width for magic cookies. */
327 		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
328 		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
329 		if (!is_at_equiv(attr[from], prev_attr) &&
330 			!is_at_equiv(attr[from], next_attr))
331 		{
332 			width += attr_swidth(attr[from]);
333 			if (from + len < curr)
334 				width += attr_ewidth(attr[from]);
335 			if (is_at_equiv(prev_attr, next_attr))
336 			{
337 				width += attr_ewidth(prev_attr);
338 				if (from + len < curr)
339 					width += attr_swidth(next_attr);
340 			}
341 		}
342 
343 		if (shift - shifted < width)
344 			break;
345 		from += len;
346 		shifted += width;
347 		if (shifted < 0)
348 			shifted = 0;
349 	}
350 	while (from < curr)
351 	{
352 		linebuf[to] = linebuf[from];
353 		attr[to++] = attr[from++];
354 	}
355 	curr = to;
356 	column -= shifted;
357 	cshift += shifted;
358 }
359 
360 /*
361  *
362  */
363 	public void
364 pshift_all()
365 {
366 	pshift(column);
367 }
368 
369 /*
370  * Return the printing width of the start (enter) sequence
371  * for a given character attribute.
372  */
373 	static int
374 attr_swidth(a)
375 	int a;
376 {
377 	int w = 0;
378 
379 	a = apply_at_specials(a);
380 
381 	if (a & AT_UNDERLINE)
382 		w += ul_s_width;
383 	if (a & AT_BOLD)
384 		w += bo_s_width;
385 	if (a & AT_BLINK)
386 		w += bl_s_width;
387 	if (a & AT_STANDOUT)
388 		w += so_s_width;
389 
390 	return w;
391 }
392 
393 /*
394  * Return the printing width of the end (exit) sequence
395  * for a given character attribute.
396  */
397 	static int
398 attr_ewidth(a)
399 	int a;
400 {
401 	int w = 0;
402 
403 	a = apply_at_specials(a);
404 
405 	if (a & AT_UNDERLINE)
406 		w += ul_e_width;
407 	if (a & AT_BOLD)
408 		w += bo_e_width;
409 	if (a & AT_BLINK)
410 		w += bl_e_width;
411 	if (a & AT_STANDOUT)
412 		w += so_e_width;
413 
414 	return w;
415 }
416 
417 /*
418  * Return the printing width of a given character and attribute,
419  * if the character were added to the current position in the line buffer.
420  * Adding a character with a given attribute may cause an enter or exit
421  * attribute sequence to be inserted, so this must be taken into account.
422  */
423 	static int
424 pwidth(ch, a, prev_ch)
425 	LWCHAR ch;
426 	int a;
427 	LWCHAR prev_ch;
428 {
429 	int w;
430 
431 	if (ch == '\b')
432 		/*
433 		 * Backspace moves backwards one or two positions.
434 		 * XXX - Incorrect if several '\b' in a row.
435 		 */
436 		return (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
437 
438 	if (!utf_mode || is_ascii_char(ch))
439 	{
440 		if (control_char((char)ch))
441 		{
442 			/*
443 			 * Control characters do unpredictable things,
444 			 * so we don't even try to guess; say it doesn't move.
445 			 * This can only happen if the -r flag is in effect.
446 			 */
447 			return (0);
448 		}
449 	} else
450 	{
451 		if (is_composing_char(ch) || is_combining_char(prev_ch, ch))
452 		{
453 			/*
454 			 * Composing and combining chars take up no space.
455 			 *
456 			 * Some terminals, upon failure to compose a
457 			 * composing character with the character(s) that
458 			 * precede(s) it will actually take up one column
459 			 * for the composing character; there isn't much
460 			 * we could do short of testing the (complex)
461 			 * composition process ourselves and printing
462 			 * a binary representation when it fails.
463 			 */
464 			return (0);
465 		}
466 	}
467 
468 	/*
469 	 * Other characters take one or two columns,
470 	 * plus the width of any attribute enter/exit sequence.
471 	 */
472 	w = 1;
473 	if (is_wide_char(ch))
474 		w++;
475 	if (curr > 0 && !is_at_equiv(attr[curr-1], a))
476 		w += attr_ewidth(attr[curr-1]);
477 	if ((apply_at_specials(a) != AT_NORMAL) &&
478 	    (curr == 0 || !is_at_equiv(attr[curr-1], a)))
479 		w += attr_swidth(a);
480 	return (w);
481 }
482 
483 /*
484  * Delete to the previous base character in the line buffer.
485  * Return 1 if one is found.
486  */
487 	static int
488 backc()
489 {
490 	LWCHAR prev_ch;
491 	char *p = linebuf + curr;
492 	LWCHAR ch = step_char(&p, -1, linebuf + lmargin);
493 	int width;
494 
495 	/* This assumes that there is no '\b' in linebuf.  */
496 	while (   curr > lmargin
497 	       && column > lmargin
498 	       && (!(attr[curr - 1] & (AT_ANSI|AT_BINARY))))
499 	{
500 		curr = p - linebuf;
501 		prev_ch = step_char(&p, -1, linebuf + lmargin);
502 		width = pwidth(ch, attr[curr], prev_ch);
503 		column -= width;
504 		if (width > 0)
505 			return 1;
506 		ch = prev_ch;
507 	}
508 
509 	return 0;
510 }
511 
512 /*
513  * Are we currently within a recognized ANSI escape sequence?
514  */
515 	static int
516 in_ansi_esc_seq()
517 {
518 	char *p;
519 
520 	/*
521 	 * Search backwards for either an ESC (which means we ARE in a seq);
522 	 * or an end char (which means we're NOT in a seq).
523 	 */
524 	for (p = &linebuf[curr];  p > linebuf; )
525 	{
526 		LWCHAR ch = step_char(&p, -1, linebuf);
527 		if (IS_CSI_START(ch))
528 			return (1);
529 		if (!is_ansi_middle(ch))
530 			return (0);
531 	}
532 	return (0);
533 }
534 
535 /*
536  * Is a character the end of an ANSI escape sequence?
537  */
538 	public int
539 is_ansi_end(ch)
540 	LWCHAR ch;
541 {
542 	if (!is_ascii_char(ch))
543 		return (0);
544 	return (strchr(end_ansi_chars, (char) ch) != NULL);
545 }
546 
547 /*
548  *
549  */
550 	public int
551 is_ansi_middle(ch)
552 	LWCHAR ch;
553 {
554 	if (!is_ascii_char(ch))
555 		return (0);
556 	if (is_ansi_end(ch))
557 		return (0);
558 	return (strchr(mid_ansi_chars, (char) ch) != NULL);
559 }
560 
561 /*
562  * Append a character and attribute to the line buffer.
563  */
564 #define	STORE_CHAR(ch,a,rep,pos) \
565 	do { \
566 		if (store_char((ch),(a),(rep),(pos))) return (1); \
567 	} while (0)
568 
569 	static int
570 store_char(ch, a, rep, pos)
571 	LWCHAR ch;
572 	int a;
573 	char *rep;
574 	POSITION pos;
575 {
576 	int w;
577 	int replen;
578 	char cs;
579 
580 	w = (a & (AT_UNDERLINE|AT_BOLD));	/* Pre-use w.  */
581 	if (w != AT_NORMAL)
582 		last_overstrike = w;
583 
584 #if HILITE_SEARCH
585 	{
586 		int matches;
587 		if (is_hilited(pos, pos+1, 0, &matches))
588 		{
589 			/*
590 			 * This character should be highlighted.
591 			 * Override the attribute passed in.
592 			 */
593 			if (a != AT_ANSI)
594 				a |= AT_HILITE;
595 		}
596 		line_matches += matches;
597 	}
598 #endif
599 
600 	if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq())
601 	{
602 		if (!is_ansi_end(ch) && !is_ansi_middle(ch)) {
603 			/* Remove whole unrecognized sequence.  */
604 			do {
605 				--curr;
606 			} while (!IS_CSI_START(linebuf[curr]));
607 			return 0;
608 		}
609 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
610 		w = 0;
611 	}
612 	else if (ctldisp == OPT_ONPLUS && IS_CSI_START(ch))
613 	{
614 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
615 		w = 0;
616 	}
617 	else
618 	{
619 		char *p = &linebuf[curr];
620 		LWCHAR prev_ch = step_char(&p, -1, linebuf);
621 		w = pwidth(ch, a, prev_ch);
622 	}
623 
624 	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
625 		/*
626 		 * Won't fit on screen.
627 		 */
628 		return (1);
629 
630 	if (rep == NULL)
631 	{
632 		cs = (char) ch;
633 		rep = &cs;
634 		replen = 1;
635 	} else
636 	{
637 		replen = utf_len(rep[0]);
638 	}
639 	if (curr + replen >= size_linebuf-6)
640 	{
641 		/*
642 		 * Won't fit in line buffer.
643 		 * Try to expand it.
644 		 */
645 		if (expand_linebuf())
646 			return (1);
647 	}
648 
649 	while (replen-- > 0)
650 	{
651 		linebuf[curr] = *rep++;
652 		attr[curr] = a;
653 		curr++;
654 	}
655 	column += w;
656 	return (0);
657 }
658 
659 /*
660  * Append a tab to the line buffer.
661  * Store spaces to represent the tab.
662  */
663 #define	STORE_TAB(a,pos) \
664 	do { if (store_tab((a),(pos))) return (1); } while (0)
665 
666 	static int
667 store_tab(attr, pos)
668 	int attr;
669 	POSITION pos;
670 {
671 	int to_tab = column + cshift - lmargin;
672 	int i;
673 
674 	if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
675 		to_tab = tabdefault -
676 		     ((to_tab - tabstops[ntabstops-1]) % tabdefault);
677 	else
678 	{
679 		for (i = ntabstops - 2;  i >= 0;  i--)
680 			if (to_tab >= tabstops[i])
681 				break;
682 		to_tab = tabstops[i+1] - to_tab;
683 	}
684 
685 	if (column + to_tab - 1 + pwidth(' ', attr, 0) + attr_ewidth(attr) > sc_width)
686 		return 1;
687 
688 	do {
689 		STORE_CHAR(' ', attr, " ", pos);
690 	} while (--to_tab > 0);
691 	return 0;
692 }
693 
694 #define STORE_PRCHAR(c, pos) \
695 	do { if (store_prchar((c), (pos))) return 1; } while (0)
696 
697 	static int
698 store_prchar(c, pos)
699 	char c;
700 	POSITION pos;
701 {
702 	char *s;
703 
704 	/*
705 	 * Convert to printable representation.
706 	 */
707 	s = prchar(c);
708 
709 	/*
710 	 * Make sure we can get the entire representation
711 	 * of the character on this line.
712 	 */
713 	if (column + (int) strlen(s) - 1 +
714             pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
715 		return 1;
716 
717 	for ( ;  *s != 0;  s++)
718 		STORE_CHAR(*s, AT_BINARY, NULL, pos);
719 
720 	return 0;
721 }
722 
723 	static int
724 flush_mbc_buf(pos)
725 	POSITION pos;
726 {
727 	int i;
728 
729 	for (i = 0; i < mbc_buf_index; i++)
730 		if (store_prchar(mbc_buf[i], pos))
731 			return mbc_buf_index - i;
732 
733 	return 0;
734 }
735 
736 /*
737  * Append a character to the line buffer.
738  * Expand tabs into spaces, handle underlining, boldfacing, etc.
739  * Returns 0 if ok, 1 if couldn't fit in buffer.
740  */
741 	public int
742 pappend(c, pos)
743 	char c;
744 	POSITION pos;
745 {
746 	int r;
747 
748 	if (pendc)
749 	{
750 		if (do_append(pendc, NULL, pendpos))
751 			/*
752 			 * Oops.  We've probably lost the char which
753 			 * was in pendc, since caller won't back up.
754 			 */
755 			return (1);
756 		pendc = '\0';
757 	}
758 
759 	if (c == '\r' && bs_mode == BS_SPECIAL)
760 	{
761 		if (mbc_buf_len > 0)  /* utf_mode must be on. */
762 		{
763 			/* Flush incomplete (truncated) sequence. */
764 			r = flush_mbc_buf(mbc_pos);
765 			mbc_buf_index = r + 1;
766 			mbc_buf_len = 0;
767 			if (r)
768 				return (mbc_buf_index);
769 		}
770 
771 		/*
772 		 * Don't put the CR into the buffer until we see
773 		 * the next char.  If the next char is a newline,
774 		 * discard the CR.
775 		 */
776 		pendc = c;
777 		pendpos = pos;
778 		return (0);
779 	}
780 
781 	if (!utf_mode)
782 	{
783 		r = do_append((LWCHAR) c, NULL, pos);
784 	} else
785 	{
786 		/* Perform strict validation in all possible cases. */
787 		if (mbc_buf_len == 0)
788 		{
789 		retry:
790 			mbc_buf_index = 1;
791 			*mbc_buf = c;
792 			if (IS_ASCII_OCTET(c))
793 				r = do_append((LWCHAR) c, NULL, pos);
794 			else if (IS_UTF8_LEAD(c))
795 			{
796 				mbc_buf_len = utf_len(c);
797 				mbc_pos = pos;
798 				return (0);
799 			} else
800 				/* UTF8_INVALID or stray UTF8_TRAIL */
801 				r = flush_mbc_buf(pos);
802 		} else if (IS_UTF8_TRAIL(c))
803 		{
804 			mbc_buf[mbc_buf_index++] = c;
805 			if (mbc_buf_index < mbc_buf_len)
806 				return (0);
807 			if (is_utf8_well_formed(mbc_buf))
808 				r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos);
809 			else
810 				/* Complete, but not shortest form, sequence. */
811 				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
812 			mbc_buf_len = 0;
813 		} else
814 		{
815 			/* Flush incomplete (truncated) sequence.  */
816 			r = flush_mbc_buf(mbc_pos);
817 			mbc_buf_index = r + 1;
818 			mbc_buf_len = 0;
819 			/* Handle new char.  */
820 			if (!r)
821 				goto retry;
822  		}
823 	}
824 
825 	/*
826 	 * If we need to shift the line, do it.
827 	 * But wait until we get to at least the middle of the screen,
828 	 * so shifting it doesn't affect the chars we're currently
829 	 * pappending.  (Bold & underline can get messed up otherwise.)
830 	 */
831 	if (cshift < hshift && column > sc_width / 2)
832 	{
833 		linebuf[curr] = '\0';
834 		pshift(hshift - cshift);
835 	}
836 	if (r)
837 	{
838 		/* How many chars should caller back up? */
839 		r = (!utf_mode) ? 1 : mbc_buf_index;
840 	}
841 	return (r);
842 }
843 
844 	static int
845 do_append(ch, rep, pos)
846 	LWCHAR ch;
847 	char *rep;
848 	POSITION pos;
849 {
850 	register int a;
851 	LWCHAR prev_ch;
852 
853 	a = AT_NORMAL;
854 
855 	if (ch == '\b')
856 	{
857 		if (bs_mode == BS_CONTROL)
858 			goto do_control_char;
859 
860 		/*
861 		 * A better test is needed here so we don't
862 		 * backspace over part of the printed
863 		 * representation of a binary character.
864 		 */
865 		if (   curr <= lmargin
866 		    || column <= lmargin
867 		    || (attr[curr - 1] & (AT_ANSI|AT_BINARY)))
868 			STORE_PRCHAR('\b', pos);
869 		else if (bs_mode == BS_NORMAL)
870 			STORE_CHAR(ch, AT_NORMAL, NULL, pos);
871 		else if (bs_mode == BS_SPECIAL)
872 			overstrike = backc();
873 
874 		return 0;
875 	}
876 
877 	if (overstrike > 0)
878 	{
879 		/*
880 		 * Overstrike the character at the current position
881 		 * in the line buffer.  This will cause either
882 		 * underline (if a "_" is overstruck),
883 		 * bold (if an identical character is overstruck),
884 		 * or just deletion of the character in the buffer.
885 		 */
886 		overstrike = utf_mode ? -1 : 0;
887 		/* To be correct, this must be a base character.  */
888 		prev_ch = get_wchar(linebuf + curr);
889 		a = attr[curr];
890 		if (ch == prev_ch)
891 		{
892 			/*
893 			 * Overstriking a char with itself means make it bold.
894 			 * But overstriking an underscore with itself is
895 			 * ambiguous.  It could mean make it bold, or
896 			 * it could mean make it underlined.
897 			 * Use the previous overstrike to resolve it.
898 			 */
899 			if (ch == '_')
900 			{
901 				if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
902 					a |= (AT_BOLD|AT_UNDERLINE);
903 				else if (last_overstrike != AT_NORMAL)
904 					a |= last_overstrike;
905 				else
906 					a |= AT_BOLD;
907 			} else
908 				a |= AT_BOLD;
909 		} else if (ch == '_')
910 		{
911 			a |= AT_UNDERLINE;
912 			ch = prev_ch;
913 			rep = linebuf + curr;
914 		} else if (prev_ch == '_')
915 		{
916 			a |= AT_UNDERLINE;
917 		}
918 		/* Else we replace prev_ch, but we keep its attributes.  */
919 	} else if (overstrike < 0)
920 	{
921 		if (   is_composing_char(ch)
922 		    || is_combining_char(get_wchar(linebuf + curr), ch))
923 			/* Continuation of the same overstrike.  */
924 			a = last_overstrike;
925 		else
926 			overstrike = 0;
927 	}
928 
929 	if (ch == '\t')
930 	{
931 		/*
932 		 * Expand a tab into spaces.
933 		 */
934 		switch (bs_mode)
935 		{
936 		case BS_CONTROL:
937 			goto do_control_char;
938 		case BS_NORMAL:
939 		case BS_SPECIAL:
940 			STORE_TAB(a, pos);
941 			break;
942 		}
943 	} else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch))
944 	{
945 	do_control_char:
946 		if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && IS_CSI_START(ch)))
947 		{
948 			/*
949 			 * Output as a normal character.
950 			 */
951 			STORE_CHAR(ch, AT_NORMAL, rep, pos);
952 		} else
953 		{
954 			STORE_PRCHAR((char) ch, pos);
955 		}
956 	} else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch))
957 	{
958 		char *s;
959 
960 		s = prutfchar(ch);
961 
962 		if (column + (int) strlen(s) - 1 +
963 		    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
964 			return (1);
965 
966 		for ( ;  *s != 0;  s++)
967 			STORE_CHAR(*s, AT_BINARY, NULL, pos);
968  	} else
969 	{
970 		STORE_CHAR(ch, a, rep, pos);
971 	}
972  	return (0);
973 }
974 
975 /*
976  *
977  */
978 	public int
979 pflushmbc()
980 {
981 	int r = 0;
982 
983 	if (mbc_buf_len > 0)
984 	{
985 		/* Flush incomplete (truncated) sequence.  */
986 		r = flush_mbc_buf(mbc_pos);
987 		mbc_buf_len = 0;
988 	}
989 	return r;
990 }
991 
992 /*
993  * Terminate the line in the line buffer.
994  */
995 	public void
996 pdone(endline)
997 	int endline;
998 {
999 	int nl;
1000 
1001 	(void) pflushmbc();
1002 
1003 	if (pendc && (pendc != '\r' || !endline))
1004 		/*
1005 		 * If we had a pending character, put it in the buffer.
1006 		 * But discard a pending CR if we are at end of line
1007 		 * (that is, discard the CR in a CR/LF sequence).
1008 		 */
1009 		(void) do_append(pendc, NULL, pendpos);
1010 
1011 	/*
1012 	 * Make sure we've shifted the line, if we need to.
1013 	 */
1014 	if (cshift < hshift)
1015 		pshift(hshift - cshift);
1016 
1017 	if (ctldisp == OPT_ONPLUS && is_ansi_end('m'))
1018 	{
1019 		/* Switch to normal attribute at end of line. */
1020 		char *p = "\033[m";
1021 		for ( ;  *p != '\0';  p++)
1022 		{
1023 			linebuf[curr] = *p;
1024 			attr[curr++] = AT_ANSI;
1025 		}
1026 	}
1027 
1028 	/*
1029 	 * Add a newline if necessary,
1030 	 * and append a '\0' to the end of the line.
1031 	 * We output a newline if we're not at the right edge of the screen,
1032 	 * or if the terminal doesn't auto wrap,
1033 	 * or if this is really the end of the line AND the terminal ignores
1034 	 * a newline at the right edge.
1035 	 * (In the last case we don't want to output a newline if the terminal
1036 	 * doesn't ignore it since that would produce an extra blank line.
1037 	 * But we do want to output a newline if the terminal ignores it in case
1038 	 * the next line is blank.  In that case the single newline output for
1039 	 * that blank line would be ignored!)
1040 	 */
1041 	if (!oldbot)
1042 		nl = (column < sc_width || !auto_wrap || (endline && ignaw) || ctldisp == OPT_ON);
1043 	else
1044 		nl = (column < sc_width || !auto_wrap || ignaw || ctldisp == OPT_ON);
1045 	if (nl)
1046 	{
1047 		linebuf[curr] = '\n';
1048 		attr[curr] = AT_NORMAL;
1049 		curr++;
1050 	}
1051 	else if (ignaw && !auto_wrap && column >= sc_width)
1052 	{
1053 		/*
1054 		 * Big horrible kludge.
1055 		 * No-wrap terminals are too hard to deal with when they get in
1056 		 * the state where a full screen width of characters have been
1057 		 * output but the cursor is sitting on the right edge instead
1058 		 * of at the start of the next line.
1059 		 * So after we output a full line, we output an extra
1060 		 * space and backspace to force the cursor to the
1061 		 * beginning of the next line, like a sane terminal.
1062 		 */
1063 		linebuf[curr] = ' ';
1064 		attr[curr++] = AT_NORMAL;
1065 		linebuf[curr] = '\b';
1066 		attr[curr++] = AT_NORMAL;
1067 	}
1068 	linebuf[curr] = '\0';
1069 	attr[curr] = AT_NORMAL;
1070 
1071 #if HILITE_SEARCH
1072 	if (status_col && line_matches > 0)
1073 	{
1074 		linebuf[0] = '*';
1075 		attr[0] = AT_NORMAL|AT_HILITE;
1076 	}
1077 #endif
1078 }
1079 
1080 /*
1081  * Get a character from the current line.
1082  * Return the character as the function return value,
1083  * and the character attribute in *ap.
1084  */
1085 	public int
1086 gline(i, ap)
1087 	register int i;
1088 	register int *ap;
1089 {
1090 	if (is_null_line)
1091 	{
1092 		/*
1093 		 * If there is no current line, we pretend the line is
1094 		 * either "~" or "", depending on the "twiddle" flag.
1095 		 */
1096 		if (twiddle)
1097 		{
1098 			if (i == 0)
1099 			{
1100 				*ap = AT_BOLD;
1101 				return '~';
1102 			}
1103 			--i;
1104 		}
1105 		/* Make sure we're back to AT_NORMAL before the '\n'.  */
1106 		*ap = AT_NORMAL;
1107 		return i ? '\0' : '\n';
1108 	}
1109 
1110 	*ap = attr[i];
1111 	return (linebuf[i] & 0xFF);
1112 }
1113 
1114 /*
1115  * Indicate that there is no current line.
1116  */
1117 	public void
1118 null_line()
1119 {
1120 	is_null_line = 1;
1121 	cshift = 0;
1122 }
1123 
1124 /*
1125  * Analogous to forw_line(), but deals with "raw lines":
1126  * lines which are not split for screen width.
1127  * {{ This is supposed to be more efficient than forw_line(). }}
1128  */
1129 	public POSITION
1130 forw_raw_line(curr_pos, linep, line_lenp)
1131 	POSITION curr_pos;
1132 	char **linep;
1133 	int *line_lenp;
1134 {
1135 	register int n;
1136 	register int c;
1137 	POSITION new_pos;
1138 
1139 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
1140 		(c = ch_forw_get()) == EOI)
1141 		return (NULL_POSITION);
1142 
1143 	n = 0;
1144 	for (;;)
1145 	{
1146 		if (c == '\n' || c == EOI || ABORT_SIGS())
1147 		{
1148 			new_pos = ch_tell();
1149 			break;
1150 		}
1151 		if (n >= size_linebuf-1)
1152 		{
1153 			if (expand_linebuf())
1154 			{
1155 				/*
1156 				 * Overflowed the input buffer.
1157 				 * Pretend the line ended here.
1158 				 */
1159 				new_pos = ch_tell() - 1;
1160 				break;
1161 			}
1162 		}
1163 		linebuf[n++] = c;
1164 		c = ch_forw_get();
1165 	}
1166 	linebuf[n] = '\0';
1167 	if (linep != NULL)
1168 		*linep = linebuf;
1169 	if (line_lenp != NULL)
1170 		*line_lenp = n;
1171 	return (new_pos);
1172 }
1173 
1174 /*
1175  * Analogous to back_line(), but deals with "raw lines".
1176  * {{ This is supposed to be more efficient than back_line(). }}
1177  */
1178 	public POSITION
1179 back_raw_line(curr_pos, linep, line_lenp)
1180 	POSITION curr_pos;
1181 	char **linep;
1182 	int *line_lenp;
1183 {
1184 	register int n;
1185 	register int c;
1186 	POSITION new_pos;
1187 
1188 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero() ||
1189 		ch_seek(curr_pos-1))
1190 		return (NULL_POSITION);
1191 
1192 	n = size_linebuf;
1193 	linebuf[--n] = '\0';
1194 	for (;;)
1195 	{
1196 		c = ch_back_get();
1197 		if (c == '\n' || ABORT_SIGS())
1198 		{
1199 			/*
1200 			 * This is the newline ending the previous line.
1201 			 * We have hit the beginning of the line.
1202 			 */
1203 			new_pos = ch_tell() + 1;
1204 			break;
1205 		}
1206 		if (c == EOI)
1207 		{
1208 			/*
1209 			 * We have hit the beginning of the file.
1210 			 * This must be the first line in the file.
1211 			 * This must, of course, be the beginning of the line.
1212 			 */
1213 			new_pos = ch_zero();
1214 			break;
1215 		}
1216 		if (n <= 0)
1217 		{
1218 			int old_size_linebuf = size_linebuf;
1219 			char *fm;
1220 			char *to;
1221 			if (expand_linebuf())
1222 			{
1223 				/*
1224 				 * Overflowed the input buffer.
1225 				 * Pretend the line ended here.
1226 				 */
1227 				new_pos = ch_tell() + 1;
1228 				break;
1229 			}
1230 			/*
1231 			 * Shift the data to the end of the new linebuf.
1232 			 */
1233 			for (fm = linebuf + old_size_linebuf - 1,
1234 			      to = linebuf + size_linebuf - 1;
1235 			     fm >= linebuf;  fm--, to--)
1236 				*to = *fm;
1237 			n = size_linebuf - old_size_linebuf;
1238 		}
1239 		linebuf[--n] = c;
1240 	}
1241 	if (linep != NULL)
1242 		*linep = &linebuf[n];
1243 	if (line_lenp != NULL)
1244 		*line_lenp = size_linebuf - 1 - n;
1245 	return (new_pos);
1246 }
1247