xref: /freebsd/contrib/less/line.c (revision 184c1b943937986c81e1996d999d21626ec7a4ff)
1 /*
2  * Copyright (C) 1984-2020  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to manipulate the "line buffer".
12  * The line buffer holds a line of output as it is being built
13  * in preparation for output to the screen.
14  */
15 
16 #include "less.h"
17 #include "charset.h"
18 #include "position.h"
19 
20 #if MSDOS_COMPILER==WIN32C
21 #define WIN32_LEAN_AND_MEAN
22 #include <windows.h>
23 #endif
24 
25 static char *linebuf = NULL;	/* Buffer which holds the current output line */
26 static char *attr = NULL;	/* Extension of linebuf to hold attributes */
27 public int size_linebuf = 0;	/* Size of line buffer (and attr buffer) */
28 
29 static int cshift;		/* Current left-shift of output line buffer */
30 public int hshift;		/* Desired left-shift of output line buffer */
31 public int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
32 public int ntabstops = 1;	/* Number of tabstops */
33 public int tabdefault = 8;	/* Default repeated tabstops */
34 public POSITION highest_hilite;	/* Pos of last hilite in file found so far */
35 
36 static int curr;		/* Index into linebuf */
37 static int column;		/* Printable length, accounting for
38 				   backspaces, etc. */
39 static int right_curr;
40 static int right_column;
41 static int overstrike;		/* Next char should overstrike previous char */
42 static int last_overstrike = AT_NORMAL;
43 static int is_null_line;	/* There is no current line */
44 static int lmargin;		/* Left margin */
45 static LWCHAR pendc;
46 static POSITION pendpos;
47 static char *end_ansi_chars;
48 static char *mid_ansi_chars;
49 
50 static int attr_swidth LESSPARAMS ((int a));
51 static int attr_ewidth LESSPARAMS ((int a));
52 static int do_append LESSPARAMS ((LWCHAR ch, char *rep, POSITION pos));
53 
54 extern int sigs;
55 extern int bs_mode;
56 extern int linenums;
57 extern int ctldisp;
58 extern int twiddle;
59 extern int binattr;
60 extern int status_col;
61 extern int auto_wrap, ignaw;
62 extern int bo_s_width, bo_e_width;
63 extern int ul_s_width, ul_e_width;
64 extern int bl_s_width, bl_e_width;
65 extern int so_s_width, so_e_width;
66 extern int sc_width, sc_height;
67 extern int utf_mode;
68 extern POSITION start_attnpos;
69 extern POSITION end_attnpos;
70 extern char rscroll_char;
71 extern int rscroll_attr;
72 
73 static char mbc_buf[MAX_UTF_CHAR_LEN];
74 static int mbc_buf_len = 0;
75 static int mbc_buf_index = 0;
76 static POSITION mbc_pos;
77 
78 /*
79  * Initialize from environment variables.
80  */
81 	public void
82 init_line(VOID_PARAM)
83 {
84 	end_ansi_chars = lgetenv("LESSANSIENDCHARS");
85 	if (isnullenv(end_ansi_chars))
86 		end_ansi_chars = "m";
87 
88 	mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
89 	if (isnullenv(mid_ansi_chars))
90 		mid_ansi_chars = "0123456789:;[?!\"'#%()*+ ";
91 
92 	linebuf = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
93 	attr = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
94 	size_linebuf = LINEBUF_SIZE;
95 }
96 
97 /*
98  * Expand the line buffer.
99  */
100 	static int
101 expand_linebuf(VOID_PARAM)
102 {
103 	/* Double the size of the line buffer. */
104 	int new_size = size_linebuf * 2;
105 
106 	/* Just realloc to expand the buffer, if we can. */
107 #if HAVE_REALLOC
108 	char *new_buf = (char *) realloc(linebuf, new_size);
109 	char *new_attr = (char *) realloc(attr, new_size);
110 #else
111 	char *new_buf = (char *) calloc(new_size, sizeof(char));
112 	char *new_attr = (char *) calloc(new_size, sizeof(char));
113 #endif
114 	if (new_buf == NULL || new_attr == NULL)
115 	{
116 		if (new_attr != NULL)
117 			free(new_attr);
118 		if (new_buf != NULL)
119 			free(new_buf);
120 		return 1;
121 	}
122 #if !HAVE_REALLOC
123 	/*
124 	 * We just calloc'd the buffers; copy the old contents.
125 	 */
126 	memcpy(new_buf, linebuf, size_linebuf * sizeof(char));
127 	memcpy(new_attr, attr, size_linebuf * sizeof(char));
128 	free(attr);
129 	free(linebuf);
130 #endif
131 	linebuf = new_buf;
132 	attr = new_attr;
133 	size_linebuf = new_size;
134 	return 0;
135 }
136 
137 /*
138  * Is a character ASCII?
139  */
140 	public int
141 is_ascii_char(ch)
142 	LWCHAR ch;
143 {
144 	return (ch <= 0x7F);
145 }
146 
147 /*
148  * Rewind the line buffer.
149  */
150 	public void
151 prewind(VOID_PARAM)
152 {
153 	curr = 0;
154 	column = 0;
155 	right_curr = 0;
156 	right_column = 0;
157 	cshift = 0;
158 	overstrike = 0;
159 	last_overstrike = AT_NORMAL;
160 	mbc_buf_len = 0;
161 	is_null_line = 0;
162 	pendc = '\0';
163 	lmargin = 0;
164 	if (status_col)
165 		lmargin += 2;
166 }
167 
168 /*
169  * Set a character in the line buffer.
170  */
171 	static void
172 set_linebuf(n, ch, a)
173 	int n;
174 	char ch;
175 	char a;
176 {
177 	linebuf[n] = ch;
178 	attr[n] = a;
179 }
180 
181 /*
182  * Append a character to the line buffer.
183  */
184 	static void
185 add_linebuf(ch, a, w)
186 	char ch;
187 	char a;
188 	int w;
189 {
190 	set_linebuf(curr++, ch, a);
191 	column += w;
192 }
193 
194 /*
195  * Insert the line number (of the given position) into the line buffer.
196  */
197 	public void
198 plinenum(pos)
199 	POSITION pos;
200 {
201 	LINENUM linenum = 0;
202 	int i;
203 
204 	if (linenums == OPT_ONPLUS)
205 	{
206 		/*
207 		 * Get the line number and put it in the current line.
208 		 * {{ Note: since find_linenum calls forw_raw_line,
209 		 *    it may seek in the input file, requiring the caller
210 		 *    of plinenum to re-seek if necessary. }}
211 		 * {{ Since forw_raw_line modifies linebuf, we must
212 		 *    do this first, before storing anything in linebuf. }}
213 		 */
214 		linenum = find_linenum(pos);
215 	}
216 
217 	/*
218 	 * Display a status column if the -J option is set.
219 	 */
220 	if (status_col)
221 	{
222 		int a = AT_NORMAL;
223 		char c = posmark(pos);
224 		if (c != 0)
225 			a |= AT_HILITE;
226 		else
227 		{
228 			c = ' ';
229 			if (start_attnpos != NULL_POSITION &&
230 			    pos >= start_attnpos && pos <= end_attnpos)
231 				a |= AT_HILITE;
232 		}
233 		add_linebuf(c, a, 1); /* column 0: status */
234 		add_linebuf(' ', AT_NORMAL, 1); /* column 1: empty */
235 	}
236 
237 	/*
238 	 * Display the line number at the start of each line
239 	 * if the -N option is set.
240 	 */
241 	if (linenums == OPT_ONPLUS)
242 	{
243 		char buf[INT_STRLEN_BOUND(linenum) + 2];
244 		int pad = 0;
245 		int n;
246 
247 		linenumtoa(linenum, buf);
248 		n = (int) strlen(buf);
249 		if (n < MIN_LINENUM_WIDTH)
250 			pad = MIN_LINENUM_WIDTH - n;
251 		for (i = 0; i < pad; i++)
252 			add_linebuf(' ', AT_NORMAL, 1);
253 		for (i = 0; i < n; i++)
254 			add_linebuf(buf[i], AT_BOLD, 1);
255 		add_linebuf(' ', AT_NORMAL, 1);
256 		lmargin += n + pad + 1;
257 	}
258 	/*
259 	 * Append enough spaces to bring us to the lmargin.
260 	 */
261 	while (column < lmargin)
262 	{
263 		add_linebuf(' ', AT_NORMAL, 1);
264 	}
265 }
266 
267 /*
268  * Shift the input line left.
269  * This means discarding N printable chars at the start of the buffer.
270  */
271 	static void
272 pshift(shift)
273 	int shift;
274 {
275 	LWCHAR prev_ch = 0;
276 	unsigned char c;
277 	int shifted = 0;
278 	int to;
279 	int from;
280 	int len;
281 	int width;
282 	int prev_attr;
283 	int next_attr;
284 
285 	if (shift > column - lmargin)
286 		shift = column - lmargin;
287 	if (shift > curr - lmargin)
288 		shift = curr - lmargin;
289 
290 	to = from = lmargin;
291 	/*
292 	 * We keep on going when shifted == shift
293 	 * to get all combining chars.
294 	 */
295 	while (shifted <= shift && from < curr)
296 	{
297 		c = linebuf[from];
298 		if (ctldisp == OPT_ONPLUS && IS_CSI_START(c))
299 		{
300 			/* Keep cumulative effect.  */
301 			linebuf[to] = c;
302 			attr[to++] = attr[from++];
303 			while (from < curr && linebuf[from])
304 			{
305 				linebuf[to] = linebuf[from];
306 				attr[to++] = attr[from];
307 				if (!is_ansi_middle(linebuf[from++]))
308 					break;
309 			}
310 			continue;
311 		}
312 
313 		width = 0;
314 
315 		if (!IS_ASCII_OCTET(c) && utf_mode)
316 		{
317 			/* Assumes well-formedness validation already done.  */
318 			LWCHAR ch;
319 
320 			len = utf_len(c);
321 			if (from + len > curr)
322 				break;
323 			ch = get_wchar(linebuf + from);
324 			if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch))
325 				width = is_wide_char(ch) ? 2 : 1;
326 			prev_ch = ch;
327 		} else
328 		{
329 			len = 1;
330 			if (c == '\b')
331 				/* XXX - Incorrect if several '\b' in a row.  */
332 				width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
333 			else if (!control_char(c))
334 				width = 1;
335 			prev_ch = 0;
336 		}
337 
338 		if (width == 2 && shift - shifted == 1) {
339 			/* Should never happen when called by pshift_all().  */
340 			attr[to] = attr[from];
341 			/*
342 			 * Assume a wide_char will never be the first half of a
343 			 * combining_char pair, so reset prev_ch in case we're
344 			 * followed by a '\b'.
345 			 */
346 			prev_ch = linebuf[to++] = ' ';
347 			from += len;
348 			shifted++;
349 			continue;
350 		}
351 
352 		/* Adjust width for magic cookies. */
353 		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
354 		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
355 		if (!is_at_equiv(attr[from], prev_attr) &&
356 			!is_at_equiv(attr[from], next_attr))
357 		{
358 			width += attr_swidth(attr[from]);
359 			if (from + len < curr)
360 				width += attr_ewidth(attr[from]);
361 			if (is_at_equiv(prev_attr, next_attr))
362 			{
363 				width += attr_ewidth(prev_attr);
364 				if (from + len < curr)
365 					width += attr_swidth(next_attr);
366 			}
367 		}
368 
369 		if (shift - shifted < width)
370 			break;
371 		from += len;
372 		shifted += width;
373 		if (shifted < 0)
374 			shifted = 0;
375 	}
376 	while (from < curr)
377 	{
378 		linebuf[to] = linebuf[from];
379 		attr[to++] = attr[from++];
380 	}
381 	curr = to;
382 	column -= shifted;
383 	cshift += shifted;
384 }
385 
386 /*
387  *
388  */
389 	public void
390 pshift_all(VOID_PARAM)
391 {
392 	pshift(column);
393 }
394 
395 /*
396  * Return the printing width of the start (enter) sequence
397  * for a given character attribute.
398  */
399 	static int
400 attr_swidth(a)
401 	int a;
402 {
403 	int w = 0;
404 
405 	a = apply_at_specials(a);
406 
407 	if (a & AT_UNDERLINE)
408 		w += ul_s_width;
409 	if (a & AT_BOLD)
410 		w += bo_s_width;
411 	if (a & AT_BLINK)
412 		w += bl_s_width;
413 	if (a & AT_STANDOUT)
414 		w += so_s_width;
415 
416 	return w;
417 }
418 
419 /*
420  * Return the printing width of the end (exit) sequence
421  * for a given character attribute.
422  */
423 	static int
424 attr_ewidth(a)
425 	int a;
426 {
427 	int w = 0;
428 
429 	a = apply_at_specials(a);
430 
431 	if (a & AT_UNDERLINE)
432 		w += ul_e_width;
433 	if (a & AT_BOLD)
434 		w += bo_e_width;
435 	if (a & AT_BLINK)
436 		w += bl_e_width;
437 	if (a & AT_STANDOUT)
438 		w += so_e_width;
439 
440 	return w;
441 }
442 
443 /*
444  * Return the printing width of a given character and attribute,
445  * if the character were added to the current position in the line buffer.
446  * Adding a character with a given attribute may cause an enter or exit
447  * attribute sequence to be inserted, so this must be taken into account.
448  */
449 	static int
450 pwidth(ch, a, prev_ch)
451 	LWCHAR ch;
452 	int a;
453 	LWCHAR prev_ch;
454 {
455 	int w;
456 
457 	if (ch == '\b')
458 		/*
459 		 * Backspace moves backwards one or two positions.
460 		 * XXX - Incorrect if several '\b' in a row.
461 		 */
462 		return (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
463 
464 	if (!utf_mode || is_ascii_char(ch))
465 	{
466 		if (control_char((char)ch))
467 		{
468 			/*
469 			 * Control characters do unpredictable things,
470 			 * so we don't even try to guess; say it doesn't move.
471 			 * This can only happen if the -r flag is in effect.
472 			 */
473 			return (0);
474 		}
475 	} else
476 	{
477 		if (is_composing_char(ch) || is_combining_char(prev_ch, ch))
478 		{
479 			/*
480 			 * Composing and combining chars take up no space.
481 			 *
482 			 * Some terminals, upon failure to compose a
483 			 * composing character with the character(s) that
484 			 * precede(s) it will actually take up one column
485 			 * for the composing character; there isn't much
486 			 * we could do short of testing the (complex)
487 			 * composition process ourselves and printing
488 			 * a binary representation when it fails.
489 			 */
490 			return (0);
491 		}
492 	}
493 
494 	/*
495 	 * Other characters take one or two columns,
496 	 * plus the width of any attribute enter/exit sequence.
497 	 */
498 	w = 1;
499 	if (is_wide_char(ch))
500 		w++;
501 	if (curr > 0 && !is_at_equiv(attr[curr-1], a))
502 		w += attr_ewidth(attr[curr-1]);
503 	if ((apply_at_specials(a) != AT_NORMAL) &&
504 	    (curr == 0 || !is_at_equiv(attr[curr-1], a)))
505 		w += attr_swidth(a);
506 	return (w);
507 }
508 
509 /*
510  * Delete to the previous base character in the line buffer.
511  * Return 1 if one is found.
512  */
513 	static int
514 backc(VOID_PARAM)
515 {
516 	LWCHAR prev_ch;
517 	char *p = linebuf + curr;
518 	LWCHAR ch = step_char(&p, -1, linebuf + lmargin);
519 	int width;
520 
521 	/* This assumes that there is no '\b' in linebuf.  */
522 	while (   curr > lmargin
523 	       && column > lmargin
524 	       && (!(attr[curr - 1] & (AT_ANSI|AT_BINARY))))
525 	{
526 		curr = (int) (p - linebuf);
527 		prev_ch = step_char(&p, -1, linebuf + lmargin);
528 		width = pwidth(ch, attr[curr], prev_ch);
529 		column -= width;
530 		if (width > 0)
531 			return 1;
532 		ch = prev_ch;
533 	}
534 
535 	return 0;
536 }
537 
538 /*
539  * Are we currently within a recognized ANSI escape sequence?
540  */
541 	static int
542 in_ansi_esc_seq(VOID_PARAM)
543 {
544 	char *p;
545 
546 	/*
547 	 * Search backwards for either an ESC (which means we ARE in a seq);
548 	 * or an end char (which means we're NOT in a seq).
549 	 */
550 	for (p = &linebuf[curr];  p > linebuf; )
551 	{
552 		LWCHAR ch = step_char(&p, -1, linebuf);
553 		if (IS_CSI_START(ch))
554 			return (1);
555 		if (!is_ansi_middle(ch))
556 			return (0);
557 	}
558 	return (0);
559 }
560 
561 /*
562  * Is a character the end of an ANSI escape sequence?
563  */
564 	public int
565 is_ansi_end(ch)
566 	LWCHAR ch;
567 {
568 	if (!is_ascii_char(ch))
569 		return (0);
570 	return (strchr(end_ansi_chars, (char) ch) != NULL);
571 }
572 
573 /*
574  * Can a char appear in an ANSI escape sequence, before the end char?
575  */
576 	public int
577 is_ansi_middle(ch)
578 	LWCHAR ch;
579 {
580 	if (!is_ascii_char(ch))
581 		return (0);
582 	if (is_ansi_end(ch))
583 		return (0);
584 	return (strchr(mid_ansi_chars, (char) ch) != NULL);
585 }
586 
587 /*
588  * Skip past an ANSI escape sequence.
589  * pp is initially positioned just after the CSI_START char.
590  */
591 	public void
592 skip_ansi(pp, limit)
593 	char **pp;
594 	constant char *limit;
595 {
596 	LWCHAR c;
597 	do {
598 		c = step_char(pp, +1, limit);
599 	} while (*pp < limit && is_ansi_middle(c));
600 	/* Note that we discard final char, for which is_ansi_middle is false. */
601 }
602 
603 
604 /*
605  * Append a character and attribute to the line buffer.
606  */
607 #define	STORE_CHAR(ch,a,rep,pos) \
608 	do { \
609 		if (store_char((ch),(a),(rep),(pos))) return (1); \
610 	} while (0)
611 
612 	static int
613 store_char(ch, a, rep, pos)
614 	LWCHAR ch;
615 	int a;
616 	char *rep;
617 	POSITION pos;
618 {
619 	int w;
620 	int replen;
621 	char cs;
622 
623 	w = (a & (AT_UNDERLINE|AT_BOLD));	/* Pre-use w.  */
624 	if (w != AT_NORMAL)
625 		last_overstrike = w;
626 
627 #if HILITE_SEARCH
628 	{
629 		int matches;
630 		if (is_hilited(pos, pos+1, 0, &matches))
631 		{
632 			/*
633 			 * This character should be highlighted.
634 			 * Override the attribute passed in.
635 			 */
636 			if (a != AT_ANSI)
637 			{
638 				if (highest_hilite != NULL_POSITION &&
639 				    pos > highest_hilite)
640 				    	highest_hilite = pos;
641 				a |= AT_HILITE;
642 			}
643 		}
644 	}
645 #endif
646 
647 	if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq())
648 	{
649 		if (!is_ansi_end(ch) && !is_ansi_middle(ch)) {
650 			/* Remove whole unrecognized sequence.  */
651 			char *p = &linebuf[curr];
652 			LWCHAR bch;
653 			do {
654 				bch = step_char(&p, -1, linebuf);
655 			} while (p > linebuf && !IS_CSI_START(bch));
656 			curr = (int) (p - linebuf);
657 			return 0;
658 		}
659 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
660 		w = 0;
661 	}
662 	else if (ctldisp == OPT_ONPLUS && IS_CSI_START(ch))
663 	{
664 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
665 		w = 0;
666 	}
667 	else
668 	{
669 		char *p = &linebuf[curr];
670 		LWCHAR prev_ch = step_char(&p, -1, linebuf);
671 		w = pwidth(ch, a, prev_ch);
672 	}
673 
674 	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
675 		/*
676 		 * Won't fit on screen.
677 		 */
678 		return (1);
679 
680 	if (rep == NULL)
681 	{
682 		cs = (char) ch;
683 		rep = &cs;
684 		replen = 1;
685 	} else
686 	{
687 		replen = utf_len(rep[0]);
688 	}
689 	if (curr + replen >= size_linebuf-6)
690 	{
691 		/*
692 		 * Won't fit in line buffer.
693 		 * Try to expand it.
694 		 */
695 		if (expand_linebuf())
696 			return (1);
697 	}
698 
699 	if (column > right_column && w > 0)
700 	{
701 		right_column = column;
702 		right_curr = curr;
703 	}
704 
705 	while (replen-- > 0)
706 	{
707 		add_linebuf(*rep++, a, 0);
708 	}
709 	column += w;
710 	return (0);
711 }
712 
713 /*
714  * Append a tab to the line buffer.
715  * Store spaces to represent the tab.
716  */
717 #define	STORE_TAB(a,pos) \
718 	do { if (store_tab((a),(pos))) return (1); } while (0)
719 
720 	static int
721 store_tab(attr, pos)
722 	int attr;
723 	POSITION pos;
724 {
725 	int to_tab = column + cshift - lmargin;
726 	int i;
727 
728 	if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
729 		to_tab = tabdefault -
730 		     ((to_tab - tabstops[ntabstops-1]) % tabdefault);
731 	else
732 	{
733 		for (i = ntabstops - 2;  i >= 0;  i--)
734 			if (to_tab >= tabstops[i])
735 				break;
736 		to_tab = tabstops[i+1] - to_tab;
737 	}
738 
739 	if (column + to_tab - 1 + pwidth(' ', attr, 0) + attr_ewidth(attr) > sc_width)
740 		return 1;
741 
742 	do {
743 		STORE_CHAR(' ', attr, " ", pos);
744 	} while (--to_tab > 0);
745 	return 0;
746 }
747 
748 #define STORE_PRCHAR(c, pos) \
749 	do { if (store_prchar((c), (pos))) return 1; } while (0)
750 
751 	static int
752 store_prchar(c, pos)
753 	LWCHAR c;
754 	POSITION pos;
755 {
756 	char *s;
757 
758 	/*
759 	 * Convert to printable representation.
760 	 */
761 	s = prchar(c);
762 
763 	/*
764 	 * Make sure we can get the entire representation
765 	 * of the character on this line.
766 	 */
767 	if (column + (int) strlen(s) - 1 +
768             pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
769 		return 1;
770 
771 	for ( ;  *s != 0;  s++)
772 		STORE_CHAR(*s, AT_BINARY, NULL, pos);
773 
774 	return 0;
775 }
776 
777 	static int
778 flush_mbc_buf(pos)
779 	POSITION pos;
780 {
781 	int i;
782 
783 	for (i = 0; i < mbc_buf_index; i++)
784 		if (store_prchar(mbc_buf[i], pos))
785 			return mbc_buf_index - i;
786 
787 	return 0;
788 }
789 
790 /*
791  * Append a character to the line buffer.
792  * Expand tabs into spaces, handle underlining, boldfacing, etc.
793  * Returns 0 if ok, 1 if couldn't fit in buffer.
794  */
795 	public int
796 pappend(c, pos)
797 	int c;
798 	POSITION pos;
799 {
800 	int r;
801 
802 	if (pendc)
803 	{
804 		if (c == '\r' && pendc == '\r')
805 			return (0);
806 		if (do_append(pendc, NULL, pendpos))
807 			/*
808 			 * Oops.  We've probably lost the char which
809 			 * was in pendc, since caller won't back up.
810 			 */
811 			return (1);
812 		pendc = '\0';
813 	}
814 
815 	if (c == '\r' && bs_mode == BS_SPECIAL)
816 	{
817 		if (mbc_buf_len > 0)  /* utf_mode must be on. */
818 		{
819 			/* Flush incomplete (truncated) sequence. */
820 			r = flush_mbc_buf(mbc_pos);
821 			mbc_buf_index = r + 1;
822 			mbc_buf_len = 0;
823 			if (r)
824 				return (mbc_buf_index);
825 		}
826 
827 		/*
828 		 * Don't put the CR into the buffer until we see
829 		 * the next char.  If the next char is a newline,
830 		 * discard the CR.
831 		 */
832 		pendc = c;
833 		pendpos = pos;
834 		return (0);
835 	}
836 
837 	if (!utf_mode)
838 	{
839 		r = do_append(c, NULL, pos);
840 	} else
841 	{
842 		/* Perform strict validation in all possible cases. */
843 		if (mbc_buf_len == 0)
844 		{
845 		retry:
846 			mbc_buf_index = 1;
847 			*mbc_buf = c;
848 			if (IS_ASCII_OCTET(c))
849 				r = do_append(c, NULL, pos);
850 			else if (IS_UTF8_LEAD(c))
851 			{
852 				mbc_buf_len = utf_len(c);
853 				mbc_pos = pos;
854 				return (0);
855 			} else
856 				/* UTF8_INVALID or stray UTF8_TRAIL */
857 				r = flush_mbc_buf(pos);
858 		} else if (IS_UTF8_TRAIL(c))
859 		{
860 			mbc_buf[mbc_buf_index++] = c;
861 			if (mbc_buf_index < mbc_buf_len)
862 				return (0);
863 			if (is_utf8_well_formed(mbc_buf, mbc_buf_index))
864 				r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos);
865 			else
866 				/* Complete, but not shortest form, sequence. */
867 				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
868 			mbc_buf_len = 0;
869 		} else
870 		{
871 			/* Flush incomplete (truncated) sequence.  */
872 			r = flush_mbc_buf(mbc_pos);
873 			mbc_buf_index = r + 1;
874 			mbc_buf_len = 0;
875 			/* Handle new char.  */
876 			if (!r)
877 				goto retry;
878  		}
879 	}
880 
881 	/*
882 	 * If we need to shift the line, do it.
883 	 * But wait until we get to at least the middle of the screen,
884 	 * so shifting it doesn't affect the chars we're currently
885 	 * pappending.  (Bold & underline can get messed up otherwise.)
886 	 */
887 	if (cshift < hshift && column > sc_width / 2)
888 	{
889 		linebuf[curr] = '\0';
890 		pshift(hshift - cshift);
891 	}
892 	if (r)
893 	{
894 		/* How many chars should caller back up? */
895 		r = (!utf_mode) ? 1 : mbc_buf_index;
896 	}
897 	return (r);
898 }
899 
900 	static int
901 do_append(ch, rep, pos)
902 	LWCHAR ch;
903 	char *rep;
904 	POSITION pos;
905 {
906 	int a;
907 	LWCHAR prev_ch;
908 
909 	a = AT_NORMAL;
910 
911 	if (ch == '\b')
912 	{
913 		if (bs_mode == BS_CONTROL)
914 			goto do_control_char;
915 
916 		/*
917 		 * A better test is needed here so we don't
918 		 * backspace over part of the printed
919 		 * representation of a binary character.
920 		 */
921 		if (   curr <= lmargin
922 		    || column <= lmargin
923 		    || (attr[curr - 1] & (AT_ANSI|AT_BINARY)))
924 			STORE_PRCHAR('\b', pos);
925 		else if (bs_mode == BS_NORMAL)
926 			STORE_CHAR(ch, AT_NORMAL, NULL, pos);
927 		else if (bs_mode == BS_SPECIAL)
928 			overstrike = backc();
929 
930 		return 0;
931 	}
932 
933 	if (overstrike > 0)
934 	{
935 		/*
936 		 * Overstrike the character at the current position
937 		 * in the line buffer.  This will cause either
938 		 * underline (if a "_" is overstruck),
939 		 * bold (if an identical character is overstruck),
940 		 * or just deletion of the character in the buffer.
941 		 */
942 		overstrike = utf_mode ? -1 : 0;
943 		if (utf_mode)
944 		{
945 			/* To be correct, this must be a base character.  */
946 			prev_ch = get_wchar(linebuf + curr);
947 		} else
948 		{
949 			prev_ch = (unsigned char) linebuf[curr];
950 		}
951 		a = attr[curr];
952 		if (ch == prev_ch)
953 		{
954 			/*
955 			 * Overstriking a char with itself means make it bold.
956 			 * But overstriking an underscore with itself is
957 			 * ambiguous.  It could mean make it bold, or
958 			 * it could mean make it underlined.
959 			 * Use the previous overstrike to resolve it.
960 			 */
961 			if (ch == '_')
962 			{
963 				if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
964 					a |= (AT_BOLD|AT_UNDERLINE);
965 				else if (last_overstrike != AT_NORMAL)
966 					a |= last_overstrike;
967 				else
968 					a |= AT_BOLD;
969 			} else
970 				a |= AT_BOLD;
971 		} else if (ch == '_')
972 		{
973 			a |= AT_UNDERLINE;
974 			ch = prev_ch;
975 			rep = linebuf + curr;
976 		} else if (prev_ch == '_')
977 		{
978 			a |= AT_UNDERLINE;
979 		}
980 		/* Else we replace prev_ch, but we keep its attributes.  */
981 	} else if (overstrike < 0)
982 	{
983 		if (   is_composing_char(ch)
984 		    || is_combining_char(get_wchar(linebuf + curr), ch))
985 			/* Continuation of the same overstrike.  */
986 			a = last_overstrike;
987 		else
988 			overstrike = 0;
989 	}
990 
991 	if (ch == '\t')
992 	{
993 		/*
994 		 * Expand a tab into spaces.
995 		 */
996 		switch (bs_mode)
997 		{
998 		case BS_CONTROL:
999 			goto do_control_char;
1000 		case BS_NORMAL:
1001 		case BS_SPECIAL:
1002 			STORE_TAB(a, pos);
1003 			break;
1004 		}
1005 	} else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch))
1006 	{
1007 	do_control_char:
1008 		if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && IS_CSI_START(ch)))
1009 		{
1010 			/*
1011 			 * Output as a normal character.
1012 			 */
1013 			STORE_CHAR(ch, AT_NORMAL, rep, pos);
1014 		} else
1015 		{
1016 			STORE_PRCHAR((char) ch, pos);
1017 		}
1018 	} else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch))
1019 	{
1020 		char *s;
1021 
1022 		s = prutfchar(ch);
1023 
1024 		if (column + (int) strlen(s) - 1 +
1025 		    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
1026 			return (1);
1027 
1028 		for ( ;  *s != 0;  s++)
1029 			STORE_CHAR(*s, AT_BINARY, NULL, pos);
1030  	} else
1031 	{
1032 		STORE_CHAR(ch, a, rep, pos);
1033 	}
1034  	return (0);
1035 }
1036 
1037 /*
1038  *
1039  */
1040 	public int
1041 pflushmbc(VOID_PARAM)
1042 {
1043 	int r = 0;
1044 
1045 	if (mbc_buf_len > 0)
1046 	{
1047 		/* Flush incomplete (truncated) sequence.  */
1048 		r = flush_mbc_buf(mbc_pos);
1049 		mbc_buf_len = 0;
1050 	}
1051 	return r;
1052 }
1053 
1054 /*
1055  * Switch to normal attribute at end of line.
1056  */
1057 	static void
1058 add_attr_normal(VOID_PARAM)
1059 {
1060 	char *p = "\033[m";
1061 
1062 	if (ctldisp != OPT_ONPLUS || !is_ansi_end('m'))
1063 		return;
1064 	for ( ;  *p != '\0';  p++)
1065 		add_linebuf(*p, AT_ANSI, 0);
1066 }
1067 
1068 /*
1069  * Terminate the line in the line buffer.
1070  */
1071 	public void
1072 pdone(endline, chopped, forw)
1073 	int endline;
1074 	int chopped;
1075 	int forw;
1076 {
1077 	(void) pflushmbc();
1078 
1079 	if (pendc && (pendc != '\r' || !endline))
1080 		/*
1081 		 * If we had a pending character, put it in the buffer.
1082 		 * But discard a pending CR if we are at end of line
1083 		 * (that is, discard the CR in a CR/LF sequence).
1084 		 */
1085 		(void) do_append(pendc, NULL, pendpos);
1086 
1087 	/*
1088 	 * Make sure we've shifted the line, if we need to.
1089 	 */
1090 	if (cshift < hshift)
1091 		pshift(hshift - cshift);
1092 
1093 	if (chopped && rscroll_char)
1094 	{
1095 		/*
1096 		 * Display the right scrolling char.
1097 		 * If we've already filled the rightmost screen char
1098 		 * (in the buffer), overwrite it.
1099 		 */
1100 		if (column >= sc_width)
1101 		{
1102 			/* We've already written in the rightmost char. */
1103 			column = right_column;
1104 			curr = right_curr;
1105 		}
1106 		add_attr_normal();
1107 		while (column < sc_width-1)
1108 		{
1109 			/*
1110 			 * Space to last (rightmost) char on screen.
1111 			 * This may be necessary if the char we overwrote
1112 			 * was double-width.
1113 			 */
1114 			add_linebuf(' ', AT_NORMAL, 1);
1115 		}
1116 		/* Print rscroll char. It must be single-width. */
1117 		add_linebuf(rscroll_char, rscroll_attr, 1);
1118 	} else
1119 	{
1120 		add_attr_normal();
1121 	}
1122 
1123 	/*
1124 	 * Add a newline if necessary,
1125 	 * and append a '\0' to the end of the line.
1126 	 * We output a newline if we're not at the right edge of the screen,
1127 	 * or if the terminal doesn't auto wrap,
1128 	 * or if this is really the end of the line AND the terminal ignores
1129 	 * a newline at the right edge.
1130 	 * (In the last case we don't want to output a newline if the terminal
1131 	 * doesn't ignore it since that would produce an extra blank line.
1132 	 * But we do want to output a newline if the terminal ignores it in case
1133 	 * the next line is blank.  In that case the single newline output for
1134 	 * that blank line would be ignored!)
1135 	 */
1136 	if (column < sc_width || !auto_wrap || (endline && ignaw) || ctldisp == OPT_ON)
1137 	{
1138 		add_linebuf('\n', AT_NORMAL, 0);
1139 	}
1140 	else if (ignaw && column >= sc_width && forw)
1141 	{
1142 		/*
1143 		 * Terminals with "ignaw" don't wrap until they *really* need
1144 		 * to, i.e. when the character *after* the last one to fit on a
1145 		 * line is output. But they are too hard to deal with when they
1146 		 * get in the state where a full screen width of characters
1147 		 * have been output but the cursor is sitting on the right edge
1148 		 * instead of at the start of the next line.
1149 		 * So we nudge them into wrapping by outputting a space
1150 		 * character plus a backspace.  But do this only if moving
1151 		 * forward; if we're moving backward and drawing this line at
1152 		 * the top of the screen, the space would overwrite the first
1153 		 * char on the next line.  We don't need to do this "nudge"
1154 		 * at the top of the screen anyway.
1155 		 */
1156 		add_linebuf(' ', AT_NORMAL, 1);
1157 		add_linebuf('\b', AT_NORMAL, -1);
1158 	}
1159 	set_linebuf(curr, '\0', AT_NORMAL);
1160 }
1161 
1162 /*
1163  *
1164  */
1165 	public void
1166 set_status_col(c)
1167 	int c;
1168 {
1169 	set_linebuf(0, c, AT_NORMAL|AT_HILITE);
1170 }
1171 
1172 /*
1173  * Get a character from the current line.
1174  * Return the character as the function return value,
1175  * and the character attribute in *ap.
1176  */
1177 	public int
1178 gline(i, ap)
1179 	int i;
1180 	int *ap;
1181 {
1182 	if (is_null_line)
1183 	{
1184 		/*
1185 		 * If there is no current line, we pretend the line is
1186 		 * either "~" or "", depending on the "twiddle" flag.
1187 		 */
1188 		if (twiddle)
1189 		{
1190 			if (i == 0)
1191 			{
1192 				*ap = AT_BOLD;
1193 				return '~';
1194 			}
1195 			--i;
1196 		}
1197 		/* Make sure we're back to AT_NORMAL before the '\n'.  */
1198 		*ap = AT_NORMAL;
1199 		return i ? '\0' : '\n';
1200 	}
1201 
1202 	*ap = attr[i];
1203 	return (linebuf[i] & 0xFF);
1204 }
1205 
1206 /*
1207  * Indicate that there is no current line.
1208  */
1209 	public void
1210 null_line(VOID_PARAM)
1211 {
1212 	is_null_line = 1;
1213 	cshift = 0;
1214 }
1215 
1216 /*
1217  * Analogous to forw_line(), but deals with "raw lines":
1218  * lines which are not split for screen width.
1219  * {{ This is supposed to be more efficient than forw_line(). }}
1220  */
1221 	public POSITION
1222 forw_raw_line(curr_pos, linep, line_lenp)
1223 	POSITION curr_pos;
1224 	char **linep;
1225 	int *line_lenp;
1226 {
1227 	int n;
1228 	int c;
1229 	POSITION new_pos;
1230 
1231 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
1232 		(c = ch_forw_get()) == EOI)
1233 		return (NULL_POSITION);
1234 
1235 	n = 0;
1236 	for (;;)
1237 	{
1238 		if (c == '\n' || c == EOI || ABORT_SIGS())
1239 		{
1240 			new_pos = ch_tell();
1241 			break;
1242 		}
1243 		if (n >= size_linebuf-1)
1244 		{
1245 			if (expand_linebuf())
1246 			{
1247 				/*
1248 				 * Overflowed the input buffer.
1249 				 * Pretend the line ended here.
1250 				 */
1251 				new_pos = ch_tell() - 1;
1252 				break;
1253 			}
1254 		}
1255 		linebuf[n++] = c;
1256 		c = ch_forw_get();
1257 	}
1258 	linebuf[n] = '\0';
1259 	if (linep != NULL)
1260 		*linep = linebuf;
1261 	if (line_lenp != NULL)
1262 		*line_lenp = n;
1263 	return (new_pos);
1264 }
1265 
1266 /*
1267  * Analogous to back_line(), but deals with "raw lines".
1268  * {{ This is supposed to be more efficient than back_line(). }}
1269  */
1270 	public POSITION
1271 back_raw_line(curr_pos, linep, line_lenp)
1272 	POSITION curr_pos;
1273 	char **linep;
1274 	int *line_lenp;
1275 {
1276 	int n;
1277 	int c;
1278 	POSITION new_pos;
1279 
1280 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero() ||
1281 		ch_seek(curr_pos-1))
1282 		return (NULL_POSITION);
1283 
1284 	n = size_linebuf;
1285 	linebuf[--n] = '\0';
1286 	for (;;)
1287 	{
1288 		c = ch_back_get();
1289 		if (c == '\n' || ABORT_SIGS())
1290 		{
1291 			/*
1292 			 * This is the newline ending the previous line.
1293 			 * We have hit the beginning of the line.
1294 			 */
1295 			new_pos = ch_tell() + 1;
1296 			break;
1297 		}
1298 		if (c == EOI)
1299 		{
1300 			/*
1301 			 * We have hit the beginning of the file.
1302 			 * This must be the first line in the file.
1303 			 * This must, of course, be the beginning of the line.
1304 			 */
1305 			new_pos = ch_zero();
1306 			break;
1307 		}
1308 		if (n <= 0)
1309 		{
1310 			int old_size_linebuf = size_linebuf;
1311 			char *fm;
1312 			char *to;
1313 			if (expand_linebuf())
1314 			{
1315 				/*
1316 				 * Overflowed the input buffer.
1317 				 * Pretend the line ended here.
1318 				 */
1319 				new_pos = ch_tell() + 1;
1320 				break;
1321 			}
1322 			/*
1323 			 * Shift the data to the end of the new linebuf.
1324 			 */
1325 			for (fm = linebuf + old_size_linebuf - 1,
1326 			      to = linebuf + size_linebuf - 1;
1327 			     fm >= linebuf;  fm--, to--)
1328 				*to = *fm;
1329 			n = size_linebuf - old_size_linebuf;
1330 		}
1331 		linebuf[--n] = c;
1332 	}
1333 	if (linep != NULL)
1334 		*linep = &linebuf[n];
1335 	if (line_lenp != NULL)
1336 		*line_lenp = size_linebuf - 1 - n;
1337 	return (new_pos);
1338 }
1339 
1340 /*
1341  * Find the shift necessary to show the end of the longest displayed line.
1342  */
1343 	public int
1344 rrshift(VOID_PARAM)
1345 {
1346 	POSITION pos;
1347 	int save_width;
1348 	int line;
1349 	int longest = 0;
1350 
1351 	save_width = sc_width;
1352 	sc_width = INT_MAX;
1353 	hshift = 0;
1354 	pos = position(TOP);
1355 	for (line = 0; line < sc_height && pos != NULL_POSITION; line++)
1356 	{
1357 		pos = forw_line(pos);
1358 		if (column > longest)
1359 			longest = column;
1360 	}
1361 	sc_width = save_width;
1362 	if (longest < sc_width)
1363 		return 0;
1364 	return longest - sc_width;
1365 }
1366