xref: /freebsd/contrib/less/line.c (revision d93a896ef95946b0bf1219866fcb324b78543444)
1 /*
2  * Copyright (C) 1984-2017  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 
11 /*
12  * Routines to manipulate the "line buffer".
13  * The line buffer holds a line of output as it is being built
14  * in preparation for output to the screen.
15  */
16 
17 #include "less.h"
18 #include "charset.h"
19 #include "position.h"
20 
21 static char *linebuf = NULL;	/* Buffer which holds the current output line */
22 static char *attr = NULL;	/* Extension of linebuf to hold attributes */
23 public int size_linebuf = 0;	/* Size of line buffer (and attr buffer) */
24 
25 static int cshift;		/* Current left-shift of output line buffer */
26 public int hshift;		/* Desired left-shift of output line buffer */
27 public int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
28 public int ntabstops = 1;	/* Number of tabstops */
29 public int tabdefault = 8;	/* Default repeated tabstops */
30 public POSITION highest_hilite;	/* Pos of last hilite in file found so far */
31 
32 static int curr;		/* Index into linebuf */
33 static int column;		/* Printable length, accounting for
34 				   backspaces, etc. */
35 static int overstrike;		/* Next char should overstrike previous char */
36 static int last_overstrike = AT_NORMAL;
37 static int is_null_line;	/* There is no current line */
38 static int lmargin;		/* Left margin */
39 static LWCHAR pendc;
40 static POSITION pendpos;
41 static char *end_ansi_chars;
42 static char *mid_ansi_chars;
43 
44 static int attr_swidth();
45 static int attr_ewidth();
46 static int do_append();
47 
48 extern int sigs;
49 extern int bs_mode;
50 extern int linenums;
51 extern int ctldisp;
52 extern int twiddle;
53 extern int binattr;
54 extern int status_col;
55 extern int auto_wrap, ignaw;
56 extern int bo_s_width, bo_e_width;
57 extern int ul_s_width, ul_e_width;
58 extern int bl_s_width, bl_e_width;
59 extern int so_s_width, so_e_width;
60 extern int sc_width, sc_height;
61 extern int utf_mode;
62 extern POSITION start_attnpos;
63 extern POSITION end_attnpos;
64 
65 static char mbc_buf[MAX_UTF_CHAR_LEN];
66 static int mbc_buf_len = 0;
67 static int mbc_buf_index = 0;
68 static POSITION mbc_pos;
69 
70 /*
71  * Initialize from environment variables.
72  */
73 	public void
74 init_line()
75 {
76 	end_ansi_chars = lgetenv("LESSANSIENDCHARS");
77 	if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
78 		end_ansi_chars = "m";
79 
80 	mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
81 	if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
82 		mid_ansi_chars = "0123456789:;[?!\"'#%()*+ ";
83 
84 	linebuf = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
85 	attr = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
86 	size_linebuf = LINEBUF_SIZE;
87 }
88 
89 /*
90  * Expand the line buffer.
91  */
92 	static int
93 expand_linebuf()
94 {
95 	/* Double the size of the line buffer. */
96 	int new_size = size_linebuf * 2;
97 
98 	/* Just realloc to expand the buffer, if we can. */
99 #if HAVE_REALLOC
100 	char *new_buf = (char *) realloc(linebuf, new_size);
101 	char *new_attr = (char *) realloc(attr, new_size);
102 #else
103 	char *new_buf = (char *) calloc(new_size, sizeof(char));
104 	char *new_attr = (char *) calloc(new_size, sizeof(char));
105 #endif
106 	if (new_buf == NULL || new_attr == NULL)
107 	{
108 		if (new_attr != NULL)
109 			free(new_attr);
110 		if (new_buf != NULL)
111 			free(new_buf);
112 		return 1;
113 	}
114 #if HAVE_REALLOC
115 	/*
116 	 * We realloc'd the buffers; they already have the old contents.
117 	 */
118 	#if 0
119 	memset(new_buf + size_linebuf, 0, new_size - size_linebuf);
120 	memset(new_attr + size_linebuf, 0, new_size - size_linebuf);
121 	#endif
122 #else
123 	/*
124 	 * We just calloc'd the buffers; copy the old contents.
125 	 */
126 	memcpy(new_buf, linebuf, size_linebuf * sizeof(char));
127 	memcpy(new_attr, attr, size_linebuf * sizeof(char));
128 	free(attr);
129 	free(linebuf);
130 #endif
131 	linebuf = new_buf;
132 	attr = new_attr;
133 	size_linebuf = new_size;
134 	return 0;
135 }
136 
137 /*
138  * Is a character ASCII?
139  */
140 	public int
141 is_ascii_char(ch)
142 	LWCHAR ch;
143 {
144 	return (ch <= 0x7F);
145 }
146 
147 /*
148  * Rewind the line buffer.
149  */
150 	public void
151 prewind()
152 {
153 	curr = 0;
154 	column = 0;
155 	cshift = 0;
156 	overstrike = 0;
157 	last_overstrike = AT_NORMAL;
158 	mbc_buf_len = 0;
159 	is_null_line = 0;
160 	pendc = '\0';
161 	lmargin = 0;
162 	if (status_col)
163 		lmargin += 1;
164 }
165 
166 /*
167  * Insert the line number (of the given position) into the line buffer.
168  */
169 	public void
170 plinenum(pos)
171 	POSITION pos;
172 {
173 	LINENUM linenum = 0;
174 	int i;
175 
176 	if (linenums == OPT_ONPLUS)
177 	{
178 		/*
179 		 * Get the line number and put it in the current line.
180 		 * {{ Note: since find_linenum calls forw_raw_line,
181 		 *    it may seek in the input file, requiring the caller
182 		 *    of plinenum to re-seek if necessary. }}
183 		 * {{ Since forw_raw_line modifies linebuf, we must
184 		 *    do this first, before storing anything in linebuf. }}
185 		 */
186 		linenum = find_linenum(pos);
187 	}
188 
189 	/*
190 	 * Display a status column if the -J option is set.
191 	 */
192 	if (status_col)
193 	{
194 		linebuf[curr] = ' ';
195 		if (start_attnpos != NULL_POSITION &&
196 		    pos >= start_attnpos && pos < end_attnpos)
197 			attr[curr] = AT_NORMAL|AT_HILITE;
198 		else
199 			attr[curr] = AT_NORMAL;
200 		curr++;
201 		column++;
202 	}
203 	/*
204 	 * Display the line number at the start of each line
205 	 * if the -N option is set.
206 	 */
207 	if (linenums == OPT_ONPLUS)
208 	{
209 		char buf[INT_STRLEN_BOUND(pos) + 2];
210 		int n;
211 
212 		linenumtoa(linenum, buf);
213 		n = (int) strlen(buf);
214 		if (n < MIN_LINENUM_WIDTH)
215 			n = MIN_LINENUM_WIDTH;
216 		sprintf(linebuf+curr, "%*s ", n, buf);
217 		n++;  /* One space after the line number. */
218 		for (i = 0; i < n; i++)
219 			attr[curr+i] = AT_BOLD;
220 		curr += n;
221 		column += n;
222 		lmargin += n;
223 	}
224 
225 	/*
226 	 * Append enough spaces to bring us to the lmargin.
227 	 */
228 	while (column < lmargin)
229 	{
230 		linebuf[curr] = ' ';
231 		attr[curr++] = AT_NORMAL;
232 		column++;
233 	}
234 }
235 
236 /*
237  * Shift the input line left.
238  * This means discarding N printable chars at the start of the buffer.
239  */
240 	static void
241 pshift(shift)
242 	int shift;
243 {
244 	LWCHAR prev_ch = 0;
245 	unsigned char c;
246 	int shifted = 0;
247 	int to;
248 	int from;
249 	int len;
250 	int width;
251 	int prev_attr;
252 	int next_attr;
253 
254 	if (shift > column - lmargin)
255 		shift = column - lmargin;
256 	if (shift > curr - lmargin)
257 		shift = curr - lmargin;
258 
259 	to = from = lmargin;
260 	/*
261 	 * We keep on going when shifted == shift
262 	 * to get all combining chars.
263 	 */
264 	while (shifted <= shift && from < curr)
265 	{
266 		c = linebuf[from];
267 		if (ctldisp == OPT_ONPLUS && IS_CSI_START(c))
268 		{
269 			/* Keep cumulative effect.  */
270 			linebuf[to] = c;
271 			attr[to++] = attr[from++];
272 			while (from < curr && linebuf[from])
273 			{
274 				linebuf[to] = linebuf[from];
275 				attr[to++] = attr[from];
276 				if (!is_ansi_middle(linebuf[from++]))
277 					break;
278 			}
279 			continue;
280 		}
281 
282 		width = 0;
283 
284 		if (!IS_ASCII_OCTET(c) && utf_mode)
285 		{
286 			/* Assumes well-formedness validation already done.  */
287 			LWCHAR ch;
288 
289 			len = utf_len(c);
290 			if (from + len > curr)
291 				break;
292 			ch = get_wchar(linebuf + from);
293 			if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch))
294 				width = is_wide_char(ch) ? 2 : 1;
295 			prev_ch = ch;
296 		} else
297 		{
298 			len = 1;
299 			if (c == '\b')
300 				/* XXX - Incorrect if several '\b' in a row.  */
301 				width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
302 			else if (!control_char(c))
303 				width = 1;
304 			prev_ch = 0;
305 		}
306 
307 		if (width == 2 && shift - shifted == 1) {
308 			/* Should never happen when called by pshift_all().  */
309 			attr[to] = attr[from];
310 			/*
311 			 * Assume a wide_char will never be the first half of a
312 			 * combining_char pair, so reset prev_ch in case we're
313 			 * followed by a '\b'.
314 			 */
315 			prev_ch = linebuf[to++] = ' ';
316 			from += len;
317 			shifted++;
318 			continue;
319 		}
320 
321 		/* Adjust width for magic cookies. */
322 		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
323 		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
324 		if (!is_at_equiv(attr[from], prev_attr) &&
325 			!is_at_equiv(attr[from], next_attr))
326 		{
327 			width += attr_swidth(attr[from]);
328 			if (from + len < curr)
329 				width += attr_ewidth(attr[from]);
330 			if (is_at_equiv(prev_attr, next_attr))
331 			{
332 				width += attr_ewidth(prev_attr);
333 				if (from + len < curr)
334 					width += attr_swidth(next_attr);
335 			}
336 		}
337 
338 		if (shift - shifted < width)
339 			break;
340 		from += len;
341 		shifted += width;
342 		if (shifted < 0)
343 			shifted = 0;
344 	}
345 	while (from < curr)
346 	{
347 		linebuf[to] = linebuf[from];
348 		attr[to++] = attr[from++];
349 	}
350 	curr = to;
351 	column -= shifted;
352 	cshift += shifted;
353 }
354 
355 /*
356  *
357  */
358 	public void
359 pshift_all()
360 {
361 	pshift(column);
362 }
363 
364 /*
365  * Return the printing width of the start (enter) sequence
366  * for a given character attribute.
367  */
368 	static int
369 attr_swidth(a)
370 	int a;
371 {
372 	int w = 0;
373 
374 	a = apply_at_specials(a);
375 
376 	if (a & AT_UNDERLINE)
377 		w += ul_s_width;
378 	if (a & AT_BOLD)
379 		w += bo_s_width;
380 	if (a & AT_BLINK)
381 		w += bl_s_width;
382 	if (a & AT_STANDOUT)
383 		w += so_s_width;
384 
385 	return w;
386 }
387 
388 /*
389  * Return the printing width of the end (exit) sequence
390  * for a given character attribute.
391  */
392 	static int
393 attr_ewidth(a)
394 	int a;
395 {
396 	int w = 0;
397 
398 	a = apply_at_specials(a);
399 
400 	if (a & AT_UNDERLINE)
401 		w += ul_e_width;
402 	if (a & AT_BOLD)
403 		w += bo_e_width;
404 	if (a & AT_BLINK)
405 		w += bl_e_width;
406 	if (a & AT_STANDOUT)
407 		w += so_e_width;
408 
409 	return w;
410 }
411 
412 /*
413  * Return the printing width of a given character and attribute,
414  * if the character were added to the current position in the line buffer.
415  * Adding a character with a given attribute may cause an enter or exit
416  * attribute sequence to be inserted, so this must be taken into account.
417  */
418 	static int
419 pwidth(ch, a, prev_ch)
420 	LWCHAR ch;
421 	int a;
422 	LWCHAR prev_ch;
423 {
424 	int w;
425 
426 	if (ch == '\b')
427 		/*
428 		 * Backspace moves backwards one or two positions.
429 		 * XXX - Incorrect if several '\b' in a row.
430 		 */
431 		return (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
432 
433 	if (!utf_mode || is_ascii_char(ch))
434 	{
435 		if (control_char((char)ch))
436 		{
437 			/*
438 			 * Control characters do unpredictable things,
439 			 * so we don't even try to guess; say it doesn't move.
440 			 * This can only happen if the -r flag is in effect.
441 			 */
442 			return (0);
443 		}
444 	} else
445 	{
446 		if (is_composing_char(ch) || is_combining_char(prev_ch, ch))
447 		{
448 			/*
449 			 * Composing and combining chars take up no space.
450 			 *
451 			 * Some terminals, upon failure to compose a
452 			 * composing character with the character(s) that
453 			 * precede(s) it will actually take up one column
454 			 * for the composing character; there isn't much
455 			 * we could do short of testing the (complex)
456 			 * composition process ourselves and printing
457 			 * a binary representation when it fails.
458 			 */
459 			return (0);
460 		}
461 	}
462 
463 	/*
464 	 * Other characters take one or two columns,
465 	 * plus the width of any attribute enter/exit sequence.
466 	 */
467 	w = 1;
468 	if (is_wide_char(ch))
469 		w++;
470 	if (curr > 0 && !is_at_equiv(attr[curr-1], a))
471 		w += attr_ewidth(attr[curr-1]);
472 	if ((apply_at_specials(a) != AT_NORMAL) &&
473 	    (curr == 0 || !is_at_equiv(attr[curr-1], a)))
474 		w += attr_swidth(a);
475 	return (w);
476 }
477 
478 /*
479  * Delete to the previous base character in the line buffer.
480  * Return 1 if one is found.
481  */
482 	static int
483 backc()
484 {
485 	LWCHAR prev_ch;
486 	char *p = linebuf + curr;
487 	LWCHAR ch = step_char(&p, -1, linebuf + lmargin);
488 	int width;
489 
490 	/* This assumes that there is no '\b' in linebuf.  */
491 	while (   curr > lmargin
492 	       && column > lmargin
493 	       && (!(attr[curr - 1] & (AT_ANSI|AT_BINARY))))
494 	{
495 		curr = (int) (p - linebuf);
496 		prev_ch = step_char(&p, -1, linebuf + lmargin);
497 		width = pwidth(ch, attr[curr], prev_ch);
498 		column -= width;
499 		if (width > 0)
500 			return 1;
501 		ch = prev_ch;
502 	}
503 
504 	return 0;
505 }
506 
507 /*
508  * Are we currently within a recognized ANSI escape sequence?
509  */
510 	static int
511 in_ansi_esc_seq()
512 {
513 	char *p;
514 
515 	/*
516 	 * Search backwards for either an ESC (which means we ARE in a seq);
517 	 * or an end char (which means we're NOT in a seq).
518 	 */
519 	for (p = &linebuf[curr];  p > linebuf; )
520 	{
521 		LWCHAR ch = step_char(&p, -1, linebuf);
522 		if (IS_CSI_START(ch))
523 			return (1);
524 		if (!is_ansi_middle(ch))
525 			return (0);
526 	}
527 	return (0);
528 }
529 
530 /*
531  * Is a character the end of an ANSI escape sequence?
532  */
533 	public int
534 is_ansi_end(ch)
535 	LWCHAR ch;
536 {
537 	if (!is_ascii_char(ch))
538 		return (0);
539 	return (strchr(end_ansi_chars, (char) ch) != NULL);
540 }
541 
542 /*
543  *
544  */
545 	public int
546 is_ansi_middle(ch)
547 	LWCHAR ch;
548 {
549 	if (!is_ascii_char(ch))
550 		return (0);
551 	if (is_ansi_end(ch))
552 		return (0);
553 	return (strchr(mid_ansi_chars, (char) ch) != NULL);
554 }
555 
556 /*
557  * Append a character and attribute to the line buffer.
558  */
559 #define	STORE_CHAR(ch,a,rep,pos) \
560 	do { \
561 		if (store_char((ch),(a),(rep),(pos))) return (1); \
562 	} while (0)
563 
564 	static int
565 store_char(ch, a, rep, pos)
566 	LWCHAR ch;
567 	int a;
568 	char *rep;
569 	POSITION pos;
570 {
571 	int w;
572 	int replen;
573 	char cs;
574 
575 	w = (a & (AT_UNDERLINE|AT_BOLD));	/* Pre-use w.  */
576 	if (w != AT_NORMAL)
577 		last_overstrike = w;
578 
579 #if HILITE_SEARCH
580 	{
581 		int matches;
582 		if (is_hilited(pos, pos+1, 0, &matches))
583 		{
584 			/*
585 			 * This character should be highlighted.
586 			 * Override the attribute passed in.
587 			 */
588 			if (a != AT_ANSI)
589 			{
590 				if (highest_hilite != NULL_POSITION &&
591 				    pos > highest_hilite)
592 				    	highest_hilite = pos;
593 				a |= AT_HILITE;
594 			}
595 		}
596 	}
597 #endif
598 
599 	if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq())
600 	{
601 		if (!is_ansi_end(ch) && !is_ansi_middle(ch)) {
602 			/* Remove whole unrecognized sequence.  */
603 			char *p = &linebuf[curr];
604 			LWCHAR bch;
605 			do {
606 				bch = step_char(&p, -1, linebuf);
607 			} while (p > linebuf && !IS_CSI_START(bch));
608 			curr = (int) (p - linebuf);
609 			return 0;
610 		}
611 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
612 		w = 0;
613 	}
614 	else if (ctldisp == OPT_ONPLUS && IS_CSI_START(ch))
615 	{
616 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
617 		w = 0;
618 	}
619 	else
620 	{
621 		char *p = &linebuf[curr];
622 		LWCHAR prev_ch = step_char(&p, -1, linebuf);
623 		w = pwidth(ch, a, prev_ch);
624 	}
625 
626 	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
627 		/*
628 		 * Won't fit on screen.
629 		 */
630 		return (1);
631 
632 	if (rep == NULL)
633 	{
634 		cs = (char) ch;
635 		rep = &cs;
636 		replen = 1;
637 	} else
638 	{
639 		replen = utf_len(rep[0]);
640 	}
641 	if (curr + replen >= size_linebuf-6)
642 	{
643 		/*
644 		 * Won't fit in line buffer.
645 		 * Try to expand it.
646 		 */
647 		if (expand_linebuf())
648 			return (1);
649 	}
650 
651 	while (replen-- > 0)
652 	{
653 		linebuf[curr] = *rep++;
654 		attr[curr] = a;
655 		curr++;
656 	}
657 	column += w;
658 	return (0);
659 }
660 
661 /*
662  * Append a tab to the line buffer.
663  * Store spaces to represent the tab.
664  */
665 #define	STORE_TAB(a,pos) \
666 	do { if (store_tab((a),(pos))) return (1); } while (0)
667 
668 	static int
669 store_tab(attr, pos)
670 	int attr;
671 	POSITION pos;
672 {
673 	int to_tab = column + cshift - lmargin;
674 	int i;
675 
676 	if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
677 		to_tab = tabdefault -
678 		     ((to_tab - tabstops[ntabstops-1]) % tabdefault);
679 	else
680 	{
681 		for (i = ntabstops - 2;  i >= 0;  i--)
682 			if (to_tab >= tabstops[i])
683 				break;
684 		to_tab = tabstops[i+1] - to_tab;
685 	}
686 
687 	if (column + to_tab - 1 + pwidth(' ', attr, 0) + attr_ewidth(attr) > sc_width)
688 		return 1;
689 
690 	do {
691 		STORE_CHAR(' ', attr, " ", pos);
692 	} while (--to_tab > 0);
693 	return 0;
694 }
695 
696 #define STORE_PRCHAR(c, pos) \
697 	do { if (store_prchar((c), (pos))) return 1; } while (0)
698 
699 	static int
700 store_prchar(c, pos)
701 	LWCHAR c;
702 	POSITION pos;
703 {
704 	char *s;
705 
706 	/*
707 	 * Convert to printable representation.
708 	 */
709 	s = prchar(c);
710 
711 	/*
712 	 * Make sure we can get the entire representation
713 	 * of the character on this line.
714 	 */
715 	if (column + (int) strlen(s) - 1 +
716             pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
717 		return 1;
718 
719 	for ( ;  *s != 0;  s++)
720 		STORE_CHAR(*s, AT_BINARY, NULL, pos);
721 
722 	return 0;
723 }
724 
725 	static int
726 flush_mbc_buf(pos)
727 	POSITION pos;
728 {
729 	int i;
730 
731 	for (i = 0; i < mbc_buf_index; i++)
732 		if (store_prchar(mbc_buf[i], pos))
733 			return mbc_buf_index - i;
734 
735 	return 0;
736 }
737 
738 /*
739  * Append a character to the line buffer.
740  * Expand tabs into spaces, handle underlining, boldfacing, etc.
741  * Returns 0 if ok, 1 if couldn't fit in buffer.
742  */
743 	public int
744 pappend(c, pos)
745 	unsigned char c;
746 	POSITION pos;
747 {
748 	int r;
749 
750 	if (pendc)
751 	{
752 		if (c == '\r' && pendc == '\r')
753 			return (0);
754 		if (do_append(pendc, NULL, pendpos))
755 			/*
756 			 * Oops.  We've probably lost the char which
757 			 * was in pendc, since caller won't back up.
758 			 */
759 			return (1);
760 		pendc = '\0';
761 	}
762 
763 	if (c == '\r' && bs_mode == BS_SPECIAL)
764 	{
765 		if (mbc_buf_len > 0)  /* utf_mode must be on. */
766 		{
767 			/* Flush incomplete (truncated) sequence. */
768 			r = flush_mbc_buf(mbc_pos);
769 			mbc_buf_index = r + 1;
770 			mbc_buf_len = 0;
771 			if (r)
772 				return (mbc_buf_index);
773 		}
774 
775 		/*
776 		 * Don't put the CR into the buffer until we see
777 		 * the next char.  If the next char is a newline,
778 		 * discard the CR.
779 		 */
780 		pendc = c;
781 		pendpos = pos;
782 		return (0);
783 	}
784 
785 	if (!utf_mode)
786 	{
787 		r = do_append(c, NULL, pos);
788 	} else
789 	{
790 		/* Perform strict validation in all possible cases. */
791 		if (mbc_buf_len == 0)
792 		{
793 		retry:
794 			mbc_buf_index = 1;
795 			*mbc_buf = c;
796 			if (IS_ASCII_OCTET(c))
797 				r = do_append(c, NULL, pos);
798 			else if (IS_UTF8_LEAD(c))
799 			{
800 				mbc_buf_len = utf_len(c);
801 				mbc_pos = pos;
802 				return (0);
803 			} else
804 				/* UTF8_INVALID or stray UTF8_TRAIL */
805 				r = flush_mbc_buf(pos);
806 		} else if (IS_UTF8_TRAIL(c))
807 		{
808 			mbc_buf[mbc_buf_index++] = c;
809 			if (mbc_buf_index < mbc_buf_len)
810 				return (0);
811 			if (is_utf8_well_formed(mbc_buf, mbc_buf_index))
812 				r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos);
813 			else
814 				/* Complete, but not shortest form, sequence. */
815 				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
816 			mbc_buf_len = 0;
817 		} else
818 		{
819 			/* Flush incomplete (truncated) sequence.  */
820 			r = flush_mbc_buf(mbc_pos);
821 			mbc_buf_index = r + 1;
822 			mbc_buf_len = 0;
823 			/* Handle new char.  */
824 			if (!r)
825 				goto retry;
826  		}
827 	}
828 
829 	/*
830 	 * If we need to shift the line, do it.
831 	 * But wait until we get to at least the middle of the screen,
832 	 * so shifting it doesn't affect the chars we're currently
833 	 * pappending.  (Bold & underline can get messed up otherwise.)
834 	 */
835 	if (cshift < hshift && column > sc_width / 2)
836 	{
837 		linebuf[curr] = '\0';
838 		pshift(hshift - cshift);
839 	}
840 	if (r)
841 	{
842 		/* How many chars should caller back up? */
843 		r = (!utf_mode) ? 1 : mbc_buf_index;
844 	}
845 	return (r);
846 }
847 
848 	static int
849 do_append(ch, rep, pos)
850 	LWCHAR ch;
851 	char *rep;
852 	POSITION pos;
853 {
854 	int a;
855 	LWCHAR prev_ch;
856 
857 	a = AT_NORMAL;
858 
859 	if (ch == '\b')
860 	{
861 		if (bs_mode == BS_CONTROL)
862 			goto do_control_char;
863 
864 		/*
865 		 * A better test is needed here so we don't
866 		 * backspace over part of the printed
867 		 * representation of a binary character.
868 		 */
869 		if (   curr <= lmargin
870 		    || column <= lmargin
871 		    || (attr[curr - 1] & (AT_ANSI|AT_BINARY)))
872 			STORE_PRCHAR('\b', pos);
873 		else if (bs_mode == BS_NORMAL)
874 			STORE_CHAR(ch, AT_NORMAL, NULL, pos);
875 		else if (bs_mode == BS_SPECIAL)
876 			overstrike = backc();
877 
878 		return 0;
879 	}
880 
881 	if (overstrike > 0)
882 	{
883 		/*
884 		 * Overstrike the character at the current position
885 		 * in the line buffer.  This will cause either
886 		 * underline (if a "_" is overstruck),
887 		 * bold (if an identical character is overstruck),
888 		 * or just deletion of the character in the buffer.
889 		 */
890 		overstrike = utf_mode ? -1 : 0;
891 		if (utf_mode)
892 		{
893 			/* To be correct, this must be a base character.  */
894 			prev_ch = get_wchar(linebuf + curr);
895 		} else
896 		{
897 			prev_ch = (unsigned char) linebuf[curr];
898 		}
899 		a = attr[curr];
900 		if (ch == prev_ch)
901 		{
902 			/*
903 			 * Overstriking a char with itself means make it bold.
904 			 * But overstriking an underscore with itself is
905 			 * ambiguous.  It could mean make it bold, or
906 			 * it could mean make it underlined.
907 			 * Use the previous overstrike to resolve it.
908 			 */
909 			if (ch == '_')
910 			{
911 				if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
912 					a |= (AT_BOLD|AT_UNDERLINE);
913 				else if (last_overstrike != AT_NORMAL)
914 					a |= last_overstrike;
915 				else
916 					a |= AT_BOLD;
917 			} else
918 				a |= AT_BOLD;
919 		} else if (ch == '_')
920 		{
921 			a |= AT_UNDERLINE;
922 			ch = prev_ch;
923 			rep = linebuf + curr;
924 		} else if (prev_ch == '_')
925 		{
926 			a |= AT_UNDERLINE;
927 		}
928 		/* Else we replace prev_ch, but we keep its attributes.  */
929 	} else if (overstrike < 0)
930 	{
931 		if (   is_composing_char(ch)
932 		    || is_combining_char(get_wchar(linebuf + curr), ch))
933 			/* Continuation of the same overstrike.  */
934 			a = last_overstrike;
935 		else
936 			overstrike = 0;
937 	}
938 
939 	if (ch == '\t')
940 	{
941 		/*
942 		 * Expand a tab into spaces.
943 		 */
944 		switch (bs_mode)
945 		{
946 		case BS_CONTROL:
947 			goto do_control_char;
948 		case BS_NORMAL:
949 		case BS_SPECIAL:
950 			STORE_TAB(a, pos);
951 			break;
952 		}
953 	} else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch))
954 	{
955 	do_control_char:
956 		if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && IS_CSI_START(ch)))
957 		{
958 			/*
959 			 * Output as a normal character.
960 			 */
961 			STORE_CHAR(ch, AT_NORMAL, rep, pos);
962 		} else
963 		{
964 			STORE_PRCHAR((char) ch, pos);
965 		}
966 	} else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch))
967 	{
968 		char *s;
969 
970 		s = prutfchar(ch);
971 
972 		if (column + (int) strlen(s) - 1 +
973 		    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
974 			return (1);
975 
976 		for ( ;  *s != 0;  s++)
977 			STORE_CHAR(*s, AT_BINARY, NULL, pos);
978  	} else
979 	{
980 		STORE_CHAR(ch, a, rep, pos);
981 	}
982  	return (0);
983 }
984 
985 /*
986  *
987  */
988 	public int
989 pflushmbc()
990 {
991 	int r = 0;
992 
993 	if (mbc_buf_len > 0)
994 	{
995 		/* Flush incomplete (truncated) sequence.  */
996 		r = flush_mbc_buf(mbc_pos);
997 		mbc_buf_len = 0;
998 	}
999 	return r;
1000 }
1001 
1002 /*
1003  * Terminate the line in the line buffer.
1004  */
1005 	public void
1006 pdone(endline, forw)
1007 	int endline;
1008 	int forw;
1009 {
1010 	(void) pflushmbc();
1011 
1012 	if (pendc && (pendc != '\r' || !endline))
1013 		/*
1014 		 * If we had a pending character, put it in the buffer.
1015 		 * But discard a pending CR if we are at end of line
1016 		 * (that is, discard the CR in a CR/LF sequence).
1017 		 */
1018 		(void) do_append(pendc, NULL, pendpos);
1019 
1020 	/*
1021 	 * Make sure we've shifted the line, if we need to.
1022 	 */
1023 	if (cshift < hshift)
1024 		pshift(hshift - cshift);
1025 
1026 	if (ctldisp == OPT_ONPLUS && is_ansi_end('m'))
1027 	{
1028 		/* Switch to normal attribute at end of line. */
1029 		char *p = "\033[m";
1030 		for ( ;  *p != '\0';  p++)
1031 		{
1032 			linebuf[curr] = *p;
1033 			attr[curr++] = AT_ANSI;
1034 		}
1035 	}
1036 
1037 	/*
1038 	 * Add a newline if necessary,
1039 	 * and append a '\0' to the end of the line.
1040 	 * We output a newline if we're not at the right edge of the screen,
1041 	 * or if the terminal doesn't auto wrap,
1042 	 * or if this is really the end of the line AND the terminal ignores
1043 	 * a newline at the right edge.
1044 	 * (In the last case we don't want to output a newline if the terminal
1045 	 * doesn't ignore it since that would produce an extra blank line.
1046 	 * But we do want to output a newline if the terminal ignores it in case
1047 	 * the next line is blank.  In that case the single newline output for
1048 	 * that blank line would be ignored!)
1049 	 */
1050 	if (column < sc_width || !auto_wrap || (endline && ignaw) || ctldisp == OPT_ON)
1051 	{
1052 		linebuf[curr] = '\n';
1053 		attr[curr] = AT_NORMAL;
1054 		curr++;
1055 	}
1056 	else if (ignaw && column >= sc_width && forw)
1057 	{
1058 		/*
1059 		 * Terminals with "ignaw" don't wrap until they *really* need
1060 		 * to, i.e. when the character *after* the last one to fit on a
1061 		 * line is output. But they are too hard to deal with when they
1062 		 * get in the state where a full screen width of characters
1063 		 * have been output but the cursor is sitting on the right edge
1064 		 * instead of at the start of the next line.
1065 		 * So we nudge them into wrapping by outputting a space
1066 		 * character plus a backspace.  But do this only if moving
1067 		 * forward; if we're moving backward and drawing this line at
1068 		 * the top of the screen, the space would overwrite the first
1069 		 * char on the next line.  We don't need to do this "nudge"
1070 		 * at the top of the screen anyway.
1071 		 */
1072 		linebuf[curr] = ' ';
1073 		attr[curr++] = AT_NORMAL;
1074 		linebuf[curr] = '\b';
1075 		attr[curr++] = AT_NORMAL;
1076 	}
1077 	linebuf[curr] = '\0';
1078 	attr[curr] = AT_NORMAL;
1079 }
1080 
1081 /*
1082  *
1083  */
1084 	public void
1085 set_status_col(c)
1086 	char c;
1087 {
1088 	linebuf[0] = c;
1089 	attr[0] = AT_NORMAL|AT_HILITE;
1090 }
1091 
1092 /*
1093  * Get a character from the current line.
1094  * Return the character as the function return value,
1095  * and the character attribute in *ap.
1096  */
1097 	public int
1098 gline(i, ap)
1099 	int i;
1100 	int *ap;
1101 {
1102 	if (is_null_line)
1103 	{
1104 		/*
1105 		 * If there is no current line, we pretend the line is
1106 		 * either "~" or "", depending on the "twiddle" flag.
1107 		 */
1108 		if (twiddle)
1109 		{
1110 			if (i == 0)
1111 			{
1112 				*ap = AT_BOLD;
1113 				return '~';
1114 			}
1115 			--i;
1116 		}
1117 		/* Make sure we're back to AT_NORMAL before the '\n'.  */
1118 		*ap = AT_NORMAL;
1119 		return i ? '\0' : '\n';
1120 	}
1121 
1122 	*ap = attr[i];
1123 	return (linebuf[i] & 0xFF);
1124 }
1125 
1126 /*
1127  * Indicate that there is no current line.
1128  */
1129 	public void
1130 null_line()
1131 {
1132 	is_null_line = 1;
1133 	cshift = 0;
1134 }
1135 
1136 /*
1137  * Analogous to forw_line(), but deals with "raw lines":
1138  * lines which are not split for screen width.
1139  * {{ This is supposed to be more efficient than forw_line(). }}
1140  */
1141 	public POSITION
1142 forw_raw_line(curr_pos, linep, line_lenp)
1143 	POSITION curr_pos;
1144 	char **linep;
1145 	int *line_lenp;
1146 {
1147 	int n;
1148 	int c;
1149 	POSITION new_pos;
1150 
1151 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
1152 		(c = ch_forw_get()) == EOI)
1153 		return (NULL_POSITION);
1154 
1155 	n = 0;
1156 	for (;;)
1157 	{
1158 		if (c == '\n' || c == EOI || ABORT_SIGS())
1159 		{
1160 			new_pos = ch_tell();
1161 			break;
1162 		}
1163 		if (n >= size_linebuf-1)
1164 		{
1165 			if (expand_linebuf())
1166 			{
1167 				/*
1168 				 * Overflowed the input buffer.
1169 				 * Pretend the line ended here.
1170 				 */
1171 				new_pos = ch_tell() - 1;
1172 				break;
1173 			}
1174 		}
1175 		linebuf[n++] = c;
1176 		c = ch_forw_get();
1177 	}
1178 	linebuf[n] = '\0';
1179 	if (linep != NULL)
1180 		*linep = linebuf;
1181 	if (line_lenp != NULL)
1182 		*line_lenp = n;
1183 	return (new_pos);
1184 }
1185 
1186 /*
1187  * Analogous to back_line(), but deals with "raw lines".
1188  * {{ This is supposed to be more efficient than back_line(). }}
1189  */
1190 	public POSITION
1191 back_raw_line(curr_pos, linep, line_lenp)
1192 	POSITION curr_pos;
1193 	char **linep;
1194 	int *line_lenp;
1195 {
1196 	int n;
1197 	int c;
1198 	POSITION new_pos;
1199 
1200 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero() ||
1201 		ch_seek(curr_pos-1))
1202 		return (NULL_POSITION);
1203 
1204 	n = size_linebuf;
1205 	linebuf[--n] = '\0';
1206 	for (;;)
1207 	{
1208 		c = ch_back_get();
1209 		if (c == '\n' || ABORT_SIGS())
1210 		{
1211 			/*
1212 			 * This is the newline ending the previous line.
1213 			 * We have hit the beginning of the line.
1214 			 */
1215 			new_pos = ch_tell() + 1;
1216 			break;
1217 		}
1218 		if (c == EOI)
1219 		{
1220 			/*
1221 			 * We have hit the beginning of the file.
1222 			 * This must be the first line in the file.
1223 			 * This must, of course, be the beginning of the line.
1224 			 */
1225 			new_pos = ch_zero();
1226 			break;
1227 		}
1228 		if (n <= 0)
1229 		{
1230 			int old_size_linebuf = size_linebuf;
1231 			char *fm;
1232 			char *to;
1233 			if (expand_linebuf())
1234 			{
1235 				/*
1236 				 * Overflowed the input buffer.
1237 				 * Pretend the line ended here.
1238 				 */
1239 				new_pos = ch_tell() + 1;
1240 				break;
1241 			}
1242 			/*
1243 			 * Shift the data to the end of the new linebuf.
1244 			 */
1245 			for (fm = linebuf + old_size_linebuf - 1,
1246 			      to = linebuf + size_linebuf - 1;
1247 			     fm >= linebuf;  fm--, to--)
1248 				*to = *fm;
1249 			n = size_linebuf - old_size_linebuf;
1250 		}
1251 		linebuf[--n] = c;
1252 	}
1253 	if (linep != NULL)
1254 		*linep = &linebuf[n];
1255 	if (line_lenp != NULL)
1256 		*line_lenp = size_linebuf - 1 - n;
1257 	return (new_pos);
1258 }
1259 
1260 /*
1261  * Find the shift necessary to show the end of the longest displayed line.
1262  */
1263 	public int
1264 rrshift()
1265 {
1266 	POSITION pos;
1267 	int save_width;
1268 	int line;
1269 	int longest = 0;
1270 
1271 	save_width = sc_width;
1272 	sc_width = INT_MAX;
1273 	hshift = 0;
1274 	pos = position(TOP);
1275 	for (line = 0; line < sc_height && pos != NULL_POSITION; line++)
1276 	{
1277 		pos = forw_line(pos);
1278 		if (column > longest)
1279 			longest = column;
1280 	}
1281 	sc_width = save_width;
1282 	if (longest < sc_width)
1283 		return 0;
1284 	return longest - sc_width;
1285 }
1286