xref: /freebsd/contrib/less/line.c (revision 31d62a73c2e6ac0ff413a7a17700ffc7dce254ef)
1 /*
2  * Copyright (C) 1984-2017  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to manipulate the "line buffer".
12  * The line buffer holds a line of output as it is being built
13  * in preparation for output to the screen.
14  */
15 
16 #include "less.h"
17 #include "charset.h"
18 #include "position.h"
19 
20 #if MSDOS_COMPILER==WIN32C
21 #define WIN32_LEAN_AND_MEAN
22 #include <windows.h>
23 #endif
24 
25 static char *linebuf = NULL;	/* Buffer which holds the current output line */
26 static char *attr = NULL;	/* Extension of linebuf to hold attributes */
27 public int size_linebuf = 0;	/* Size of line buffer (and attr buffer) */
28 
29 static int cshift;		/* Current left-shift of output line buffer */
30 public int hshift;		/* Desired left-shift of output line buffer */
31 public int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
32 public int ntabstops = 1;	/* Number of tabstops */
33 public int tabdefault = 8;	/* Default repeated tabstops */
34 public POSITION highest_hilite;	/* Pos of last hilite in file found so far */
35 
36 static int curr;		/* Index into linebuf */
37 static int column;		/* Printable length, accounting for
38 				   backspaces, etc. */
39 static int right_curr;
40 static int right_column;
41 static int overstrike;		/* Next char should overstrike previous char */
42 static int last_overstrike = AT_NORMAL;
43 static int is_null_line;	/* There is no current line */
44 static int lmargin;		/* Left margin */
45 static LWCHAR pendc;
46 static POSITION pendpos;
47 static char *end_ansi_chars;
48 static char *mid_ansi_chars;
49 
50 static int attr_swidth LESSPARAMS ((int a));
51 static int attr_ewidth LESSPARAMS ((int a));
52 static int do_append LESSPARAMS ((LWCHAR ch, char *rep, POSITION pos));
53 
54 extern int sigs;
55 extern int bs_mode;
56 extern int linenums;
57 extern int ctldisp;
58 extern int twiddle;
59 extern int binattr;
60 extern int status_col;
61 extern int auto_wrap, ignaw;
62 extern int bo_s_width, bo_e_width;
63 extern int ul_s_width, ul_e_width;
64 extern int bl_s_width, bl_e_width;
65 extern int so_s_width, so_e_width;
66 extern int sc_width, sc_height;
67 extern int utf_mode;
68 extern POSITION start_attnpos;
69 extern POSITION end_attnpos;
70 extern LWCHAR rscroll_char;
71 extern int rscroll_attr;
72 
73 static char mbc_buf[MAX_UTF_CHAR_LEN];
74 static int mbc_buf_len = 0;
75 static int mbc_buf_index = 0;
76 static POSITION mbc_pos;
77 
78 /*
79  * Initialize from environment variables.
80  */
81 	public void
82 init_line()
83 {
84 	end_ansi_chars = lgetenv("LESSANSIENDCHARS");
85 	if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
86 		end_ansi_chars = "m";
87 
88 	mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
89 	if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
90 		mid_ansi_chars = "0123456789:;[?!\"'#%()*+ ";
91 
92 	linebuf = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
93 	attr = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
94 	size_linebuf = LINEBUF_SIZE;
95 }
96 
97 /*
98  * Expand the line buffer.
99  */
100 	static int
101 expand_linebuf()
102 {
103 	/* Double the size of the line buffer. */
104 	int new_size = size_linebuf * 2;
105 
106 	/* Just realloc to expand the buffer, if we can. */
107 #if HAVE_REALLOC
108 	char *new_buf = (char *) realloc(linebuf, new_size);
109 	char *new_attr = (char *) realloc(attr, new_size);
110 #else
111 	char *new_buf = (char *) calloc(new_size, sizeof(char));
112 	char *new_attr = (char *) calloc(new_size, sizeof(char));
113 #endif
114 	if (new_buf == NULL || new_attr == NULL)
115 	{
116 		if (new_attr != NULL)
117 			free(new_attr);
118 		if (new_buf != NULL)
119 			free(new_buf);
120 		return 1;
121 	}
122 #if HAVE_REALLOC
123 	/*
124 	 * We realloc'd the buffers; they already have the old contents.
125 	 */
126 	#if 0
127 	memset(new_buf + size_linebuf, 0, new_size - size_linebuf);
128 	memset(new_attr + size_linebuf, 0, new_size - size_linebuf);
129 	#endif
130 #else
131 	/*
132 	 * We just calloc'd the buffers; copy the old contents.
133 	 */
134 	memcpy(new_buf, linebuf, size_linebuf * sizeof(char));
135 	memcpy(new_attr, attr, size_linebuf * sizeof(char));
136 	free(attr);
137 	free(linebuf);
138 #endif
139 	linebuf = new_buf;
140 	attr = new_attr;
141 	size_linebuf = new_size;
142 	return 0;
143 }
144 
145 /*
146  * Is a character ASCII?
147  */
148 	public int
149 is_ascii_char(ch)
150 	LWCHAR ch;
151 {
152 	return (ch <= 0x7F);
153 }
154 
155 /*
156  * Rewind the line buffer.
157  */
158 	public void
159 prewind()
160 {
161 	curr = 0;
162 	column = 0;
163 	right_curr = 0;
164 	right_column = 0;
165 	cshift = 0;
166 	overstrike = 0;
167 	last_overstrike = AT_NORMAL;
168 	mbc_buf_len = 0;
169 	is_null_line = 0;
170 	pendc = '\0';
171 	lmargin = 0;
172 	if (status_col)
173 		lmargin += 2;
174 }
175 
176 /*
177  * Set a character in the line buffer.
178  */
179 	static void
180 set_linebuf(n, ch, a)
181 	int n;
182 	LWCHAR ch;
183 	char a;
184 {
185 	linebuf[n] = ch;
186 	attr[n] = a;
187 }
188 
189 /*
190  * Append a character to the line buffer.
191  */
192 	static void
193 add_linebuf(ch, a, w)
194 	LWCHAR ch;
195 	char a;
196 	int w;
197 {
198 	set_linebuf(curr++, ch, a);
199 	column += w;
200 }
201 
202 /*
203  * Insert the line number (of the given position) into the line buffer.
204  */
205 	public void
206 plinenum(pos)
207 	POSITION pos;
208 {
209 	LINENUM linenum = 0;
210 	int i;
211 
212 	if (linenums == OPT_ONPLUS)
213 	{
214 		/*
215 		 * Get the line number and put it in the current line.
216 		 * {{ Note: since find_linenum calls forw_raw_line,
217 		 *    it may seek in the input file, requiring the caller
218 		 *    of plinenum to re-seek if necessary. }}
219 		 * {{ Since forw_raw_line modifies linebuf, we must
220 		 *    do this first, before storing anything in linebuf. }}
221 		 */
222 		linenum = find_linenum(pos);
223 	}
224 
225 	/*
226 	 * Display a status column if the -J option is set.
227 	 */
228 	if (status_col)
229 	{
230 		int a = AT_NORMAL;
231 		char c = posmark(pos);
232 		if (c != 0)
233 			a |= AT_HILITE;
234 		else
235 		{
236 			c = ' ';
237 			if (start_attnpos != NULL_POSITION &&
238 			    pos >= start_attnpos && pos <= end_attnpos)
239 				a |= AT_HILITE;
240 		}
241 		add_linebuf(c, a, 1); /* column 0: status */
242 		add_linebuf(' ', AT_NORMAL, 1); /* column 1: empty */
243 	}
244 
245 	/*
246 	 * Display the line number at the start of each line
247 	 * if the -N option is set.
248 	 */
249 	if (linenums == OPT_ONPLUS)
250 	{
251 		char buf[INT_STRLEN_BOUND(linenum) + 2];
252 		int pad = 0;
253 		int n;
254 
255 		linenumtoa(linenum, buf);
256 		n = (int) strlen(buf);
257 		if (n < MIN_LINENUM_WIDTH)
258 			pad = MIN_LINENUM_WIDTH - n;
259 		for (i = 0; i < pad; i++)
260 			add_linebuf(' ', AT_NORMAL, 1);
261 		for (i = 0; i < n; i++)
262 			add_linebuf(buf[i], AT_BOLD, 1);
263 		add_linebuf(' ', AT_NORMAL, 1);
264 		lmargin += n + pad + 1;
265 	}
266 	/*
267 	 * Append enough spaces to bring us to the lmargin.
268 	 */
269 	while (column < lmargin)
270 	{
271 		add_linebuf(' ', AT_NORMAL, 1);
272 	}
273 }
274 
275 /*
276  * Shift the input line left.
277  * This means discarding N printable chars at the start of the buffer.
278  */
279 	static void
280 pshift(shift)
281 	int shift;
282 {
283 	LWCHAR prev_ch = 0;
284 	unsigned char c;
285 	int shifted = 0;
286 	int to;
287 	int from;
288 	int len;
289 	int width;
290 	int prev_attr;
291 	int next_attr;
292 
293 	if (shift > column - lmargin)
294 		shift = column - lmargin;
295 	if (shift > curr - lmargin)
296 		shift = curr - lmargin;
297 
298 	to = from = lmargin;
299 	/*
300 	 * We keep on going when shifted == shift
301 	 * to get all combining chars.
302 	 */
303 	while (shifted <= shift && from < curr)
304 	{
305 		c = linebuf[from];
306 		if (ctldisp == OPT_ONPLUS && IS_CSI_START(c))
307 		{
308 			/* Keep cumulative effect.  */
309 			linebuf[to] = c;
310 			attr[to++] = attr[from++];
311 			while (from < curr && linebuf[from])
312 			{
313 				linebuf[to] = linebuf[from];
314 				attr[to++] = attr[from];
315 				if (!is_ansi_middle(linebuf[from++]))
316 					break;
317 			}
318 			continue;
319 		}
320 
321 		width = 0;
322 
323 		if (!IS_ASCII_OCTET(c) && utf_mode)
324 		{
325 			/* Assumes well-formedness validation already done.  */
326 			LWCHAR ch;
327 
328 			len = utf_len(c);
329 			if (from + len > curr)
330 				break;
331 			ch = get_wchar(linebuf + from);
332 			if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch))
333 				width = is_wide_char(ch) ? 2 : 1;
334 			prev_ch = ch;
335 		} else
336 		{
337 			len = 1;
338 			if (c == '\b')
339 				/* XXX - Incorrect if several '\b' in a row.  */
340 				width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
341 			else if (!control_char(c))
342 				width = 1;
343 			prev_ch = 0;
344 		}
345 
346 		if (width == 2 && shift - shifted == 1) {
347 			/* Should never happen when called by pshift_all().  */
348 			attr[to] = attr[from];
349 			/*
350 			 * Assume a wide_char will never be the first half of a
351 			 * combining_char pair, so reset prev_ch in case we're
352 			 * followed by a '\b'.
353 			 */
354 			prev_ch = linebuf[to++] = ' ';
355 			from += len;
356 			shifted++;
357 			continue;
358 		}
359 
360 		/* Adjust width for magic cookies. */
361 		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
362 		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
363 		if (!is_at_equiv(attr[from], prev_attr) &&
364 			!is_at_equiv(attr[from], next_attr))
365 		{
366 			width += attr_swidth(attr[from]);
367 			if (from + len < curr)
368 				width += attr_ewidth(attr[from]);
369 			if (is_at_equiv(prev_attr, next_attr))
370 			{
371 				width += attr_ewidth(prev_attr);
372 				if (from + len < curr)
373 					width += attr_swidth(next_attr);
374 			}
375 		}
376 
377 		if (shift - shifted < width)
378 			break;
379 		from += len;
380 		shifted += width;
381 		if (shifted < 0)
382 			shifted = 0;
383 	}
384 	while (from < curr)
385 	{
386 		linebuf[to] = linebuf[from];
387 		attr[to++] = attr[from++];
388 	}
389 	curr = to;
390 	column -= shifted;
391 	cshift += shifted;
392 }
393 
394 /*
395  *
396  */
397 	public void
398 pshift_all()
399 {
400 	pshift(column);
401 }
402 
403 /*
404  * Return the printing width of the start (enter) sequence
405  * for a given character attribute.
406  */
407 	static int
408 attr_swidth(a)
409 	int a;
410 {
411 	int w = 0;
412 
413 	a = apply_at_specials(a);
414 
415 	if (a & AT_UNDERLINE)
416 		w += ul_s_width;
417 	if (a & AT_BOLD)
418 		w += bo_s_width;
419 	if (a & AT_BLINK)
420 		w += bl_s_width;
421 	if (a & AT_STANDOUT)
422 		w += so_s_width;
423 
424 	return w;
425 }
426 
427 /*
428  * Return the printing width of the end (exit) sequence
429  * for a given character attribute.
430  */
431 	static int
432 attr_ewidth(a)
433 	int a;
434 {
435 	int w = 0;
436 
437 	a = apply_at_specials(a);
438 
439 	if (a & AT_UNDERLINE)
440 		w += ul_e_width;
441 	if (a & AT_BOLD)
442 		w += bo_e_width;
443 	if (a & AT_BLINK)
444 		w += bl_e_width;
445 	if (a & AT_STANDOUT)
446 		w += so_e_width;
447 
448 	return w;
449 }
450 
451 /*
452  * Return the printing width of a given character and attribute,
453  * if the character were added to the current position in the line buffer.
454  * Adding a character with a given attribute may cause an enter or exit
455  * attribute sequence to be inserted, so this must be taken into account.
456  */
457 	static int
458 pwidth(ch, a, prev_ch)
459 	LWCHAR ch;
460 	int a;
461 	LWCHAR prev_ch;
462 {
463 	int w;
464 
465 	if (ch == '\b')
466 		/*
467 		 * Backspace moves backwards one or two positions.
468 		 * XXX - Incorrect if several '\b' in a row.
469 		 */
470 		return (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
471 
472 	if (!utf_mode || is_ascii_char(ch))
473 	{
474 		if (control_char((char)ch))
475 		{
476 			/*
477 			 * Control characters do unpredictable things,
478 			 * so we don't even try to guess; say it doesn't move.
479 			 * This can only happen if the -r flag is in effect.
480 			 */
481 			return (0);
482 		}
483 	} else
484 	{
485 		if (is_composing_char(ch) || is_combining_char(prev_ch, ch))
486 		{
487 			/*
488 			 * Composing and combining chars take up no space.
489 			 *
490 			 * Some terminals, upon failure to compose a
491 			 * composing character with the character(s) that
492 			 * precede(s) it will actually take up one column
493 			 * for the composing character; there isn't much
494 			 * we could do short of testing the (complex)
495 			 * composition process ourselves and printing
496 			 * a binary representation when it fails.
497 			 */
498 			return (0);
499 		}
500 	}
501 
502 	/*
503 	 * Other characters take one or two columns,
504 	 * plus the width of any attribute enter/exit sequence.
505 	 */
506 	w = 1;
507 	if (is_wide_char(ch))
508 		w++;
509 	if (curr > 0 && !is_at_equiv(attr[curr-1], a))
510 		w += attr_ewidth(attr[curr-1]);
511 	if ((apply_at_specials(a) != AT_NORMAL) &&
512 	    (curr == 0 || !is_at_equiv(attr[curr-1], a)))
513 		w += attr_swidth(a);
514 	return (w);
515 }
516 
517 /*
518  * Delete to the previous base character in the line buffer.
519  * Return 1 if one is found.
520  */
521 	static int
522 backc()
523 {
524 	LWCHAR prev_ch;
525 	char *p = linebuf + curr;
526 	LWCHAR ch = step_char(&p, -1, linebuf + lmargin);
527 	int width;
528 
529 	/* This assumes that there is no '\b' in linebuf.  */
530 	while (   curr > lmargin
531 	       && column > lmargin
532 	       && (!(attr[curr - 1] & (AT_ANSI|AT_BINARY))))
533 	{
534 		curr = (int) (p - linebuf);
535 		prev_ch = step_char(&p, -1, linebuf + lmargin);
536 		width = pwidth(ch, attr[curr], prev_ch);
537 		column -= width;
538 		if (width > 0)
539 			return 1;
540 		ch = prev_ch;
541 	}
542 
543 	return 0;
544 }
545 
546 /*
547  * Are we currently within a recognized ANSI escape sequence?
548  */
549 	static int
550 in_ansi_esc_seq()
551 {
552 	char *p;
553 
554 	/*
555 	 * Search backwards for either an ESC (which means we ARE in a seq);
556 	 * or an end char (which means we're NOT in a seq).
557 	 */
558 	for (p = &linebuf[curr];  p > linebuf; )
559 	{
560 		LWCHAR ch = step_char(&p, -1, linebuf);
561 		if (IS_CSI_START(ch))
562 			return (1);
563 		if (!is_ansi_middle(ch))
564 			return (0);
565 	}
566 	return (0);
567 }
568 
569 /*
570  * Is a character the end of an ANSI escape sequence?
571  */
572 	public int
573 is_ansi_end(ch)
574 	LWCHAR ch;
575 {
576 	if (!is_ascii_char(ch))
577 		return (0);
578 	return (strchr(end_ansi_chars, (char) ch) != NULL);
579 }
580 
581 /*
582  * Can a char appear in an ANSI escape sequence, before the end char?
583  */
584 	public int
585 is_ansi_middle(ch)
586 	LWCHAR ch;
587 {
588 	if (!is_ascii_char(ch))
589 		return (0);
590 	if (is_ansi_end(ch))
591 		return (0);
592 	return (strchr(mid_ansi_chars, (char) ch) != NULL);
593 }
594 
595 /*
596  * Skip past an ANSI escape sequence.
597  * pp is initially positioned just after the CSI_START char.
598  */
599 	public void
600 skip_ansi(pp, limit)
601 	char **pp;
602 	constant char *limit;
603 {
604 	LWCHAR c;
605 	do {
606 		c = step_char(pp, +1, limit);
607 	} while (*pp < limit && is_ansi_middle(c));
608 	/* Note that we discard final char, for which is_ansi_middle is false. */
609 }
610 
611 
612 /*
613  * Append a character and attribute to the line buffer.
614  */
615 #define	STORE_CHAR(ch,a,rep,pos) \
616 	do { \
617 		if (store_char((ch),(a),(rep),(pos))) return (1); \
618 	} while (0)
619 
620 	static int
621 store_char(ch, a, rep, pos)
622 	LWCHAR ch;
623 	int a;
624 	char *rep;
625 	POSITION pos;
626 {
627 	int w;
628 	int replen;
629 	char cs;
630 
631 	w = (a & (AT_UNDERLINE|AT_BOLD));	/* Pre-use w.  */
632 	if (w != AT_NORMAL)
633 		last_overstrike = w;
634 
635 #if HILITE_SEARCH
636 	{
637 		int matches;
638 		if (is_hilited(pos, pos+1, 0, &matches))
639 		{
640 			/*
641 			 * This character should be highlighted.
642 			 * Override the attribute passed in.
643 			 */
644 			if (a != AT_ANSI)
645 			{
646 				if (highest_hilite != NULL_POSITION &&
647 				    pos > highest_hilite)
648 				    	highest_hilite = pos;
649 				a |= AT_HILITE;
650 			}
651 		}
652 	}
653 #endif
654 
655 	if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq())
656 	{
657 		if (!is_ansi_end(ch) && !is_ansi_middle(ch)) {
658 			/* Remove whole unrecognized sequence.  */
659 			char *p = &linebuf[curr];
660 			LWCHAR bch;
661 			do {
662 				bch = step_char(&p, -1, linebuf);
663 			} while (p > linebuf && !IS_CSI_START(bch));
664 			curr = (int) (p - linebuf);
665 			return 0;
666 		}
667 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
668 		w = 0;
669 	}
670 	else if (ctldisp == OPT_ONPLUS && IS_CSI_START(ch))
671 	{
672 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
673 		w = 0;
674 	}
675 	else
676 	{
677 		char *p = &linebuf[curr];
678 		LWCHAR prev_ch = step_char(&p, -1, linebuf);
679 		w = pwidth(ch, a, prev_ch);
680 	}
681 
682 	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
683 		/*
684 		 * Won't fit on screen.
685 		 */
686 		return (1);
687 
688 	if (rep == NULL)
689 	{
690 		cs = (char) ch;
691 		rep = &cs;
692 		replen = 1;
693 	} else
694 	{
695 		replen = utf_len(rep[0]);
696 	}
697 	if (curr + replen >= size_linebuf-6)
698 	{
699 		/*
700 		 * Won't fit in line buffer.
701 		 * Try to expand it.
702 		 */
703 		if (expand_linebuf())
704 			return (1);
705 	}
706 
707 	if (column > right_column && w > 0)
708 	{
709 		right_column = column;
710 		right_curr = curr;
711 	}
712 
713 	while (replen-- > 0)
714 	{
715 		add_linebuf(*rep++, a, 0);
716 	}
717 	column += w;
718 	return (0);
719 }
720 
721 /*
722  * Append a tab to the line buffer.
723  * Store spaces to represent the tab.
724  */
725 #define	STORE_TAB(a,pos) \
726 	do { if (store_tab((a),(pos))) return (1); } while (0)
727 
728 	static int
729 store_tab(attr, pos)
730 	int attr;
731 	POSITION pos;
732 {
733 	int to_tab = column + cshift - lmargin;
734 	int i;
735 
736 	if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
737 		to_tab = tabdefault -
738 		     ((to_tab - tabstops[ntabstops-1]) % tabdefault);
739 	else
740 	{
741 		for (i = ntabstops - 2;  i >= 0;  i--)
742 			if (to_tab >= tabstops[i])
743 				break;
744 		to_tab = tabstops[i+1] - to_tab;
745 	}
746 
747 	if (column + to_tab - 1 + pwidth(' ', attr, 0) + attr_ewidth(attr) > sc_width)
748 		return 1;
749 
750 	do {
751 		STORE_CHAR(' ', attr, " ", pos);
752 	} while (--to_tab > 0);
753 	return 0;
754 }
755 
756 #define STORE_PRCHAR(c, pos) \
757 	do { if (store_prchar((c), (pos))) return 1; } while (0)
758 
759 	static int
760 store_prchar(c, pos)
761 	LWCHAR c;
762 	POSITION pos;
763 {
764 	char *s;
765 
766 	/*
767 	 * Convert to printable representation.
768 	 */
769 	s = prchar(c);
770 
771 	/*
772 	 * Make sure we can get the entire representation
773 	 * of the character on this line.
774 	 */
775 	if (column + (int) strlen(s) - 1 +
776             pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
777 		return 1;
778 
779 	for ( ;  *s != 0;  s++)
780 		STORE_CHAR(*s, AT_BINARY, NULL, pos);
781 
782 	return 0;
783 }
784 
785 	static int
786 flush_mbc_buf(pos)
787 	POSITION pos;
788 {
789 	int i;
790 
791 	for (i = 0; i < mbc_buf_index; i++)
792 		if (store_prchar(mbc_buf[i], pos))
793 			return mbc_buf_index - i;
794 
795 	return 0;
796 }
797 
798 /*
799  * Append a character to the line buffer.
800  * Expand tabs into spaces, handle underlining, boldfacing, etc.
801  * Returns 0 if ok, 1 if couldn't fit in buffer.
802  */
803 	public int
804 pappend(c, pos)
805 	unsigned char c;
806 	POSITION pos;
807 {
808 	int r;
809 
810 	if (pendc)
811 	{
812 		if (c == '\r' && pendc == '\r')
813 			return (0);
814 		if (do_append(pendc, NULL, pendpos))
815 			/*
816 			 * Oops.  We've probably lost the char which
817 			 * was in pendc, since caller won't back up.
818 			 */
819 			return (1);
820 		pendc = '\0';
821 	}
822 
823 	if (c == '\r' && bs_mode == BS_SPECIAL)
824 	{
825 		if (mbc_buf_len > 0)  /* utf_mode must be on. */
826 		{
827 			/* Flush incomplete (truncated) sequence. */
828 			r = flush_mbc_buf(mbc_pos);
829 			mbc_buf_index = r + 1;
830 			mbc_buf_len = 0;
831 			if (r)
832 				return (mbc_buf_index);
833 		}
834 
835 		/*
836 		 * Don't put the CR into the buffer until we see
837 		 * the next char.  If the next char is a newline,
838 		 * discard the CR.
839 		 */
840 		pendc = c;
841 		pendpos = pos;
842 		return (0);
843 	}
844 
845 	if (!utf_mode)
846 	{
847 		r = do_append(c, NULL, pos);
848 	} else
849 	{
850 		/* Perform strict validation in all possible cases. */
851 		if (mbc_buf_len == 0)
852 		{
853 		retry:
854 			mbc_buf_index = 1;
855 			*mbc_buf = c;
856 			if (IS_ASCII_OCTET(c))
857 				r = do_append(c, NULL, pos);
858 			else if (IS_UTF8_LEAD(c))
859 			{
860 				mbc_buf_len = utf_len(c);
861 				mbc_pos = pos;
862 				return (0);
863 			} else
864 				/* UTF8_INVALID or stray UTF8_TRAIL */
865 				r = flush_mbc_buf(pos);
866 		} else if (IS_UTF8_TRAIL(c))
867 		{
868 			mbc_buf[mbc_buf_index++] = c;
869 			if (mbc_buf_index < mbc_buf_len)
870 				return (0);
871 			if (is_utf8_well_formed(mbc_buf, mbc_buf_index))
872 				r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos);
873 			else
874 				/* Complete, but not shortest form, sequence. */
875 				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
876 			mbc_buf_len = 0;
877 		} else
878 		{
879 			/* Flush incomplete (truncated) sequence.  */
880 			r = flush_mbc_buf(mbc_pos);
881 			mbc_buf_index = r + 1;
882 			mbc_buf_len = 0;
883 			/* Handle new char.  */
884 			if (!r)
885 				goto retry;
886  		}
887 	}
888 
889 	/*
890 	 * If we need to shift the line, do it.
891 	 * But wait until we get to at least the middle of the screen,
892 	 * so shifting it doesn't affect the chars we're currently
893 	 * pappending.  (Bold & underline can get messed up otherwise.)
894 	 */
895 	if (cshift < hshift && column > sc_width / 2)
896 	{
897 		linebuf[curr] = '\0';
898 		pshift(hshift - cshift);
899 	}
900 	if (r)
901 	{
902 		/* How many chars should caller back up? */
903 		r = (!utf_mode) ? 1 : mbc_buf_index;
904 	}
905 	return (r);
906 }
907 
908 	static int
909 do_append(ch, rep, pos)
910 	LWCHAR ch;
911 	char *rep;
912 	POSITION pos;
913 {
914 	int a;
915 	LWCHAR prev_ch;
916 
917 	a = AT_NORMAL;
918 
919 	if (ch == '\b')
920 	{
921 		if (bs_mode == BS_CONTROL)
922 			goto do_control_char;
923 
924 		/*
925 		 * A better test is needed here so we don't
926 		 * backspace over part of the printed
927 		 * representation of a binary character.
928 		 */
929 		if (   curr <= lmargin
930 		    || column <= lmargin
931 		    || (attr[curr - 1] & (AT_ANSI|AT_BINARY)))
932 			STORE_PRCHAR('\b', pos);
933 		else if (bs_mode == BS_NORMAL)
934 			STORE_CHAR(ch, AT_NORMAL, NULL, pos);
935 		else if (bs_mode == BS_SPECIAL)
936 			overstrike = backc();
937 
938 		return 0;
939 	}
940 
941 	if (overstrike > 0)
942 	{
943 		/*
944 		 * Overstrike the character at the current position
945 		 * in the line buffer.  This will cause either
946 		 * underline (if a "_" is overstruck),
947 		 * bold (if an identical character is overstruck),
948 		 * or just deletion of the character in the buffer.
949 		 */
950 		overstrike = utf_mode ? -1 : 0;
951 		if (utf_mode)
952 		{
953 			/* To be correct, this must be a base character.  */
954 			prev_ch = get_wchar(linebuf + curr);
955 		} else
956 		{
957 			prev_ch = (unsigned char) linebuf[curr];
958 		}
959 		a = attr[curr];
960 		if (ch == prev_ch)
961 		{
962 			/*
963 			 * Overstriking a char with itself means make it bold.
964 			 * But overstriking an underscore with itself is
965 			 * ambiguous.  It could mean make it bold, or
966 			 * it could mean make it underlined.
967 			 * Use the previous overstrike to resolve it.
968 			 */
969 			if (ch == '_')
970 			{
971 				if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
972 					a |= (AT_BOLD|AT_UNDERLINE);
973 				else if (last_overstrike != AT_NORMAL)
974 					a |= last_overstrike;
975 				else
976 					a |= AT_BOLD;
977 			} else
978 				a |= AT_BOLD;
979 		} else if (ch == '_')
980 		{
981 			a |= AT_UNDERLINE;
982 			ch = prev_ch;
983 			rep = linebuf + curr;
984 		} else if (prev_ch == '_')
985 		{
986 			a |= AT_UNDERLINE;
987 		}
988 		/* Else we replace prev_ch, but we keep its attributes.  */
989 	} else if (overstrike < 0)
990 	{
991 		if (   is_composing_char(ch)
992 		    || is_combining_char(get_wchar(linebuf + curr), ch))
993 			/* Continuation of the same overstrike.  */
994 			a = last_overstrike;
995 		else
996 			overstrike = 0;
997 	}
998 
999 	if (ch == '\t')
1000 	{
1001 		/*
1002 		 * Expand a tab into spaces.
1003 		 */
1004 		switch (bs_mode)
1005 		{
1006 		case BS_CONTROL:
1007 			goto do_control_char;
1008 		case BS_NORMAL:
1009 		case BS_SPECIAL:
1010 			STORE_TAB(a, pos);
1011 			break;
1012 		}
1013 	} else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch))
1014 	{
1015 	do_control_char:
1016 		if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && IS_CSI_START(ch)))
1017 		{
1018 			/*
1019 			 * Output as a normal character.
1020 			 */
1021 			STORE_CHAR(ch, AT_NORMAL, rep, pos);
1022 		} else
1023 		{
1024 			STORE_PRCHAR((char) ch, pos);
1025 		}
1026 	} else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch))
1027 	{
1028 		char *s;
1029 
1030 		s = prutfchar(ch);
1031 
1032 		if (column + (int) strlen(s) - 1 +
1033 		    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
1034 			return (1);
1035 
1036 		for ( ;  *s != 0;  s++)
1037 			STORE_CHAR(*s, AT_BINARY, NULL, pos);
1038  	} else
1039 	{
1040 		STORE_CHAR(ch, a, rep, pos);
1041 	}
1042  	return (0);
1043 }
1044 
1045 /*
1046  *
1047  */
1048 	public int
1049 pflushmbc()
1050 {
1051 	int r = 0;
1052 
1053 	if (mbc_buf_len > 0)
1054 	{
1055 		/* Flush incomplete (truncated) sequence.  */
1056 		r = flush_mbc_buf(mbc_pos);
1057 		mbc_buf_len = 0;
1058 	}
1059 	return r;
1060 }
1061 
1062 /*
1063  * Switch to normal attribute at end of line.
1064  */
1065 	static void
1066 add_attr_normal()
1067 {
1068 	char *p = "\033[m";
1069 
1070 	if (ctldisp != OPT_ONPLUS || !is_ansi_end('m'))
1071 		return;
1072 	for ( ;  *p != '\0';  p++)
1073 		add_linebuf(*p, AT_ANSI, 0);
1074 }
1075 
1076 /*
1077  * Terminate the line in the line buffer.
1078  */
1079 	public void
1080 pdone(endline, chopped, forw)
1081 	int endline;
1082 	int chopped;
1083 	int forw;
1084 {
1085 	(void) pflushmbc();
1086 
1087 	if (pendc && (pendc != '\r' || !endline))
1088 		/*
1089 		 * If we had a pending character, put it in the buffer.
1090 		 * But discard a pending CR if we are at end of line
1091 		 * (that is, discard the CR in a CR/LF sequence).
1092 		 */
1093 		(void) do_append(pendc, NULL, pendpos);
1094 
1095 	/*
1096 	 * Make sure we've shifted the line, if we need to.
1097 	 */
1098 	if (cshift < hshift)
1099 		pshift(hshift - cshift);
1100 
1101 	if (chopped && rscroll_char)
1102 	{
1103 		/*
1104 		 * Display the right scrolling char.
1105 		 * If we've already filled the rightmost screen char
1106 		 * (in the buffer), overwrite it.
1107 		 */
1108 		if (column >= sc_width)
1109 		{
1110 			/* We've already written in the rightmost char. */
1111 			column = right_column;
1112 			curr = right_curr;
1113 		}
1114 		add_attr_normal();
1115 		while (column < sc_width-1)
1116 		{
1117 			/*
1118 			 * Space to last (rightmost) char on screen.
1119 			 * This may be necessary if the char we overwrote
1120 			 * was double-width.
1121 			 */
1122 			add_linebuf(' ', AT_NORMAL, 1);
1123 		}
1124 		/* Print rscroll char. It must be single-width. */
1125 		add_linebuf(rscroll_char, rscroll_attr, 1);
1126 	} else
1127 	{
1128 		add_attr_normal();
1129 	}
1130 
1131 	/*
1132 	 * Add a newline if necessary,
1133 	 * and append a '\0' to the end of the line.
1134 	 * We output a newline if we're not at the right edge of the screen,
1135 	 * or if the terminal doesn't auto wrap,
1136 	 * or if this is really the end of the line AND the terminal ignores
1137 	 * a newline at the right edge.
1138 	 * (In the last case we don't want to output a newline if the terminal
1139 	 * doesn't ignore it since that would produce an extra blank line.
1140 	 * But we do want to output a newline if the terminal ignores it in case
1141 	 * the next line is blank.  In that case the single newline output for
1142 	 * that blank line would be ignored!)
1143 	 */
1144 	if (column < sc_width || !auto_wrap || (endline && ignaw) || ctldisp == OPT_ON)
1145 	{
1146 		add_linebuf('\n', AT_NORMAL, 0);
1147 	}
1148 	else if (ignaw && column >= sc_width && forw)
1149 	{
1150 		/*
1151 		 * Terminals with "ignaw" don't wrap until they *really* need
1152 		 * to, i.e. when the character *after* the last one to fit on a
1153 		 * line is output. But they are too hard to deal with when they
1154 		 * get in the state where a full screen width of characters
1155 		 * have been output but the cursor is sitting on the right edge
1156 		 * instead of at the start of the next line.
1157 		 * So we nudge them into wrapping by outputting a space
1158 		 * character plus a backspace.  But do this only if moving
1159 		 * forward; if we're moving backward and drawing this line at
1160 		 * the top of the screen, the space would overwrite the first
1161 		 * char on the next line.  We don't need to do this "nudge"
1162 		 * at the top of the screen anyway.
1163 		 */
1164 		add_linebuf(' ', AT_NORMAL, 1);
1165 		add_linebuf('\b', AT_NORMAL, -1);
1166 	}
1167 	set_linebuf(curr, '\0', AT_NORMAL);
1168 }
1169 
1170 /*
1171  *
1172  */
1173 	public void
1174 set_status_col(c)
1175 	char c;
1176 {
1177 	set_linebuf(0, c, AT_NORMAL|AT_HILITE);
1178 }
1179 
1180 /*
1181  * Get a character from the current line.
1182  * Return the character as the function return value,
1183  * and the character attribute in *ap.
1184  */
1185 	public int
1186 gline(i, ap)
1187 	int i;
1188 	int *ap;
1189 {
1190 	if (is_null_line)
1191 	{
1192 		/*
1193 		 * If there is no current line, we pretend the line is
1194 		 * either "~" or "", depending on the "twiddle" flag.
1195 		 */
1196 		if (twiddle)
1197 		{
1198 			if (i == 0)
1199 			{
1200 				*ap = AT_BOLD;
1201 				return '~';
1202 			}
1203 			--i;
1204 		}
1205 		/* Make sure we're back to AT_NORMAL before the '\n'.  */
1206 		*ap = AT_NORMAL;
1207 		return i ? '\0' : '\n';
1208 	}
1209 
1210 	*ap = attr[i];
1211 	return (linebuf[i] & 0xFF);
1212 }
1213 
1214 /*
1215  * Indicate that there is no current line.
1216  */
1217 	public void
1218 null_line()
1219 {
1220 	is_null_line = 1;
1221 	cshift = 0;
1222 }
1223 
1224 /*
1225  * Analogous to forw_line(), but deals with "raw lines":
1226  * lines which are not split for screen width.
1227  * {{ This is supposed to be more efficient than forw_line(). }}
1228  */
1229 	public POSITION
1230 forw_raw_line(curr_pos, linep, line_lenp)
1231 	POSITION curr_pos;
1232 	char **linep;
1233 	int *line_lenp;
1234 {
1235 	int n;
1236 	int c;
1237 	POSITION new_pos;
1238 
1239 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
1240 		(c = ch_forw_get()) == EOI)
1241 		return (NULL_POSITION);
1242 
1243 	n = 0;
1244 	for (;;)
1245 	{
1246 		if (c == '\n' || c == EOI || ABORT_SIGS())
1247 		{
1248 			new_pos = ch_tell();
1249 			break;
1250 		}
1251 		if (n >= size_linebuf-1)
1252 		{
1253 			if (expand_linebuf())
1254 			{
1255 				/*
1256 				 * Overflowed the input buffer.
1257 				 * Pretend the line ended here.
1258 				 */
1259 				new_pos = ch_tell() - 1;
1260 				break;
1261 			}
1262 		}
1263 		linebuf[n++] = c;
1264 		c = ch_forw_get();
1265 	}
1266 	linebuf[n] = '\0';
1267 	if (linep != NULL)
1268 		*linep = linebuf;
1269 	if (line_lenp != NULL)
1270 		*line_lenp = n;
1271 	return (new_pos);
1272 }
1273 
1274 /*
1275  * Analogous to back_line(), but deals with "raw lines".
1276  * {{ This is supposed to be more efficient than back_line(). }}
1277  */
1278 	public POSITION
1279 back_raw_line(curr_pos, linep, line_lenp)
1280 	POSITION curr_pos;
1281 	char **linep;
1282 	int *line_lenp;
1283 {
1284 	int n;
1285 	int c;
1286 	POSITION new_pos;
1287 
1288 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero() ||
1289 		ch_seek(curr_pos-1))
1290 		return (NULL_POSITION);
1291 
1292 	n = size_linebuf;
1293 	linebuf[--n] = '\0';
1294 	for (;;)
1295 	{
1296 		c = ch_back_get();
1297 		if (c == '\n' || ABORT_SIGS())
1298 		{
1299 			/*
1300 			 * This is the newline ending the previous line.
1301 			 * We have hit the beginning of the line.
1302 			 */
1303 			new_pos = ch_tell() + 1;
1304 			break;
1305 		}
1306 		if (c == EOI)
1307 		{
1308 			/*
1309 			 * We have hit the beginning of the file.
1310 			 * This must be the first line in the file.
1311 			 * This must, of course, be the beginning of the line.
1312 			 */
1313 			new_pos = ch_zero();
1314 			break;
1315 		}
1316 		if (n <= 0)
1317 		{
1318 			int old_size_linebuf = size_linebuf;
1319 			char *fm;
1320 			char *to;
1321 			if (expand_linebuf())
1322 			{
1323 				/*
1324 				 * Overflowed the input buffer.
1325 				 * Pretend the line ended here.
1326 				 */
1327 				new_pos = ch_tell() + 1;
1328 				break;
1329 			}
1330 			/*
1331 			 * Shift the data to the end of the new linebuf.
1332 			 */
1333 			for (fm = linebuf + old_size_linebuf - 1,
1334 			      to = linebuf + size_linebuf - 1;
1335 			     fm >= linebuf;  fm--, to--)
1336 				*to = *fm;
1337 			n = size_linebuf - old_size_linebuf;
1338 		}
1339 		linebuf[--n] = c;
1340 	}
1341 	if (linep != NULL)
1342 		*linep = &linebuf[n];
1343 	if (line_lenp != NULL)
1344 		*line_lenp = size_linebuf - 1 - n;
1345 	return (new_pos);
1346 }
1347 
1348 /*
1349  * Find the shift necessary to show the end of the longest displayed line.
1350  */
1351 	public int
1352 rrshift()
1353 {
1354 	POSITION pos;
1355 	int save_width;
1356 	int line;
1357 	int longest = 0;
1358 
1359 	save_width = sc_width;
1360 	sc_width = INT_MAX;
1361 	hshift = 0;
1362 	pos = position(TOP);
1363 	for (line = 0; line < sc_height && pos != NULL_POSITION; line++)
1364 	{
1365 		pos = forw_line(pos);
1366 		if (column > longest)
1367 			longest = column;
1368 	}
1369 	sc_width = save_width;
1370 	if (longest < sc_width)
1371 		return 0;
1372 	return longest - sc_width;
1373 }
1374