xref: /freebsd/contrib/less/line.c (revision 361e428888e630eb708c72cf31579a25ba5d4f03)
1 /*
2  * Copyright (C) 1984-2015  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 
11 /*
12  * Routines to manipulate the "line buffer".
13  * The line buffer holds a line of output as it is being built
14  * in preparation for output to the screen.
15  */
16 
17 #include "less.h"
18 #include "charset.h"
19 
20 static char *linebuf = NULL;	/* Buffer which holds the current output line */
21 static char *attr = NULL;	/* Extension of linebuf to hold attributes */
22 public int size_linebuf = 0;	/* Size of line buffer (and attr buffer) */
23 
24 static int cshift;		/* Current left-shift of output line buffer */
25 public int hshift;		/* Desired left-shift of output line buffer */
26 public int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
27 public int ntabstops = 1;	/* Number of tabstops */
28 public int tabdefault = 8;	/* Default repeated tabstops */
29 public POSITION highest_hilite;	/* Pos of last hilite in file found so far */
30 
31 static int curr;		/* Index into linebuf */
32 static int column;		/* Printable length, accounting for
33 				   backspaces, etc. */
34 static int overstrike;		/* Next char should overstrike previous char */
35 static int last_overstrike = AT_NORMAL;
36 static int is_null_line;	/* There is no current line */
37 static int lmargin;		/* Left margin */
38 static LWCHAR pendc;
39 static POSITION pendpos;
40 static char *end_ansi_chars;
41 static char *mid_ansi_chars;
42 
43 static int attr_swidth();
44 static int attr_ewidth();
45 static int do_append();
46 
47 extern int sigs;
48 extern int bs_mode;
49 extern int linenums;
50 extern int ctldisp;
51 extern int twiddle;
52 extern int binattr;
53 extern int status_col;
54 extern int auto_wrap, ignaw;
55 extern int bo_s_width, bo_e_width;
56 extern int ul_s_width, ul_e_width;
57 extern int bl_s_width, bl_e_width;
58 extern int so_s_width, so_e_width;
59 extern int sc_width, sc_height;
60 extern int utf_mode;
61 extern POSITION start_attnpos;
62 extern POSITION end_attnpos;
63 
64 static char mbc_buf[MAX_UTF_CHAR_LEN];
65 static int mbc_buf_len = 0;
66 static int mbc_buf_index = 0;
67 static POSITION mbc_pos;
68 
69 /*
70  * Initialize from environment variables.
71  */
72 	public void
73 init_line()
74 {
75 	end_ansi_chars = lgetenv("LESSANSIENDCHARS");
76 	if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
77 		end_ansi_chars = "m";
78 
79 	mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
80 	if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
81 		mid_ansi_chars = "0123456789:;[?!\"'#%()*+ ";
82 
83 	linebuf = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
84 	attr = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
85 	size_linebuf = LINEBUF_SIZE;
86 }
87 
88 /*
89  * Expand the line buffer.
90  */
91 	static int
92 expand_linebuf()
93 {
94 	/* Double the size of the line buffer. */
95 	int new_size = size_linebuf * 2;
96 
97 	/* Just realloc to expand the buffer, if we can. */
98 #if HAVE_REALLOC
99 	char *new_buf = (char *) realloc(linebuf, new_size);
100 	char *new_attr = (char *) realloc(attr, new_size);
101 #else
102 	char *new_buf = (char *) calloc(new_size, sizeof(char));
103 	char *new_attr = (char *) calloc(new_size, sizeof(char));
104 #endif
105 	if (new_buf == NULL || new_attr == NULL)
106 	{
107 		if (new_attr != NULL)
108 			free(new_attr);
109 		if (new_buf != NULL)
110 			free(new_buf);
111 		return 1;
112 	}
113 #if HAVE_REALLOC
114 	/*
115 	 * We realloc'd the buffers; they already have the old contents.
116 	 */
117 	#if 0
118 	memset(new_buf + size_linebuf, 0, new_size - size_linebuf);
119 	memset(new_attr + size_linebuf, 0, new_size - size_linebuf);
120 	#endif
121 #else
122 	/*
123 	 * We just calloc'd the buffers; copy the old contents.
124 	 */
125 	memcpy(new_buf, linebuf, size_linebuf * sizeof(char));
126 	memcpy(new_attr, attr, size_linebuf * sizeof(char));
127 	free(attr);
128 	free(linebuf);
129 #endif
130 	linebuf = new_buf;
131 	attr = new_attr;
132 	size_linebuf = new_size;
133 	return 0;
134 }
135 
136 /*
137  * Is a character ASCII?
138  */
139 	public int
140 is_ascii_char(ch)
141 	LWCHAR ch;
142 {
143 	return (ch <= 0x7F);
144 }
145 
146 /*
147  * Rewind the line buffer.
148  */
149 	public void
150 prewind()
151 {
152 	curr = 0;
153 	column = 0;
154 	cshift = 0;
155 	overstrike = 0;
156 	last_overstrike = AT_NORMAL;
157 	mbc_buf_len = 0;
158 	is_null_line = 0;
159 	pendc = '\0';
160 	lmargin = 0;
161 	if (status_col)
162 		lmargin += 1;
163 }
164 
165 /*
166  * Insert the line number (of the given position) into the line buffer.
167  */
168 	public void
169 plinenum(pos)
170 	POSITION pos;
171 {
172 	register LINENUM linenum = 0;
173 	register int i;
174 
175 	if (linenums == OPT_ONPLUS)
176 	{
177 		/*
178 		 * Get the line number and put it in the current line.
179 		 * {{ Note: since find_linenum calls forw_raw_line,
180 		 *    it may seek in the input file, requiring the caller
181 		 *    of plinenum to re-seek if necessary. }}
182 		 * {{ Since forw_raw_line modifies linebuf, we must
183 		 *    do this first, before storing anything in linebuf. }}
184 		 */
185 		linenum = find_linenum(pos);
186 	}
187 
188 	/*
189 	 * Display a status column if the -J option is set.
190 	 */
191 	if (status_col)
192 	{
193 		linebuf[curr] = ' ';
194 		if (start_attnpos != NULL_POSITION &&
195 		    pos >= start_attnpos && pos < end_attnpos)
196 			attr[curr] = AT_NORMAL|AT_HILITE;
197 		else
198 			attr[curr] = AT_NORMAL;
199 		curr++;
200 		column++;
201 	}
202 	/*
203 	 * Display the line number at the start of each line
204 	 * if the -N option is set.
205 	 */
206 	if (linenums == OPT_ONPLUS)
207 	{
208 		char buf[INT_STRLEN_BOUND(pos) + 2];
209 		int n;
210 
211 		linenumtoa(linenum, buf);
212 		n = (int) strlen(buf);
213 		if (n < MIN_LINENUM_WIDTH)
214 			n = MIN_LINENUM_WIDTH;
215 		sprintf(linebuf+curr, "%*s ", n, buf);
216 		n++;  /* One space after the line number. */
217 		for (i = 0; i < n; i++)
218 			attr[curr+i] = AT_NORMAL;
219 		curr += n;
220 		column += n;
221 		lmargin += n;
222 	}
223 
224 	/*
225 	 * Append enough spaces to bring us to the lmargin.
226 	 */
227 	while (column < lmargin)
228 	{
229 		linebuf[curr] = ' ';
230 		attr[curr++] = AT_NORMAL;
231 		column++;
232 	}
233 }
234 
235 /*
236  * Shift the input line left.
237  * This means discarding N printable chars at the start of the buffer.
238  */
239 	static void
240 pshift(shift)
241 	int shift;
242 {
243 	LWCHAR prev_ch = 0;
244 	unsigned char c;
245 	int shifted = 0;
246 	int to;
247 	int from;
248 	int len;
249 	int width;
250 	int prev_attr;
251 	int next_attr;
252 
253 	if (shift > column - lmargin)
254 		shift = column - lmargin;
255 	if (shift > curr - lmargin)
256 		shift = curr - lmargin;
257 
258 	to = from = lmargin;
259 	/*
260 	 * We keep on going when shifted == shift
261 	 * to get all combining chars.
262 	 */
263 	while (shifted <= shift && from < curr)
264 	{
265 		c = linebuf[from];
266 		if (ctldisp == OPT_ONPLUS && IS_CSI_START(c))
267 		{
268 			/* Keep cumulative effect.  */
269 			linebuf[to] = c;
270 			attr[to++] = attr[from++];
271 			while (from < curr && linebuf[from])
272 			{
273 				linebuf[to] = linebuf[from];
274 				attr[to++] = attr[from];
275 				if (!is_ansi_middle(linebuf[from++]))
276 					break;
277 			}
278 			continue;
279 		}
280 
281 		width = 0;
282 
283 		if (!IS_ASCII_OCTET(c) && utf_mode)
284 		{
285 			/* Assumes well-formedness validation already done.  */
286 			LWCHAR ch;
287 
288 			len = utf_len(c);
289 			if (from + len > curr)
290 				break;
291 			ch = get_wchar(linebuf + from);
292 			if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch))
293 				width = is_wide_char(ch) ? 2 : 1;
294 			prev_ch = ch;
295 		} else
296 		{
297 			len = 1;
298 			if (c == '\b')
299 				/* XXX - Incorrect if several '\b' in a row.  */
300 				width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
301 			else if (!control_char(c))
302 				width = 1;
303 			prev_ch = 0;
304 		}
305 
306 		if (width == 2 && shift - shifted == 1) {
307 			/* Should never happen when called by pshift_all().  */
308 			attr[to] = attr[from];
309 			/*
310 			 * Assume a wide_char will never be the first half of a
311 			 * combining_char pair, so reset prev_ch in case we're
312 			 * followed by a '\b'.
313 			 */
314 			prev_ch = linebuf[to++] = ' ';
315 			from += len;
316 			shifted++;
317 			continue;
318 		}
319 
320 		/* Adjust width for magic cookies. */
321 		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
322 		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
323 		if (!is_at_equiv(attr[from], prev_attr) &&
324 			!is_at_equiv(attr[from], next_attr))
325 		{
326 			width += attr_swidth(attr[from]);
327 			if (from + len < curr)
328 				width += attr_ewidth(attr[from]);
329 			if (is_at_equiv(prev_attr, next_attr))
330 			{
331 				width += attr_ewidth(prev_attr);
332 				if (from + len < curr)
333 					width += attr_swidth(next_attr);
334 			}
335 		}
336 
337 		if (shift - shifted < width)
338 			break;
339 		from += len;
340 		shifted += width;
341 		if (shifted < 0)
342 			shifted = 0;
343 	}
344 	while (from < curr)
345 	{
346 		linebuf[to] = linebuf[from];
347 		attr[to++] = attr[from++];
348 	}
349 	curr = to;
350 	column -= shifted;
351 	cshift += shifted;
352 }
353 
354 /*
355  *
356  */
357 	public void
358 pshift_all()
359 {
360 	pshift(column);
361 }
362 
363 /*
364  * Return the printing width of the start (enter) sequence
365  * for a given character attribute.
366  */
367 	static int
368 attr_swidth(a)
369 	int a;
370 {
371 	int w = 0;
372 
373 	a = apply_at_specials(a);
374 
375 	if (a & AT_UNDERLINE)
376 		w += ul_s_width;
377 	if (a & AT_BOLD)
378 		w += bo_s_width;
379 	if (a & AT_BLINK)
380 		w += bl_s_width;
381 	if (a & AT_STANDOUT)
382 		w += so_s_width;
383 
384 	return w;
385 }
386 
387 /*
388  * Return the printing width of the end (exit) sequence
389  * for a given character attribute.
390  */
391 	static int
392 attr_ewidth(a)
393 	int a;
394 {
395 	int w = 0;
396 
397 	a = apply_at_specials(a);
398 
399 	if (a & AT_UNDERLINE)
400 		w += ul_e_width;
401 	if (a & AT_BOLD)
402 		w += bo_e_width;
403 	if (a & AT_BLINK)
404 		w += bl_e_width;
405 	if (a & AT_STANDOUT)
406 		w += so_e_width;
407 
408 	return w;
409 }
410 
411 /*
412  * Return the printing width of a given character and attribute,
413  * if the character were added to the current position in the line buffer.
414  * Adding a character with a given attribute may cause an enter or exit
415  * attribute sequence to be inserted, so this must be taken into account.
416  */
417 	static int
418 pwidth(ch, a, prev_ch)
419 	LWCHAR ch;
420 	int a;
421 	LWCHAR prev_ch;
422 {
423 	int w;
424 
425 	if (ch == '\b')
426 		/*
427 		 * Backspace moves backwards one or two positions.
428 		 * XXX - Incorrect if several '\b' in a row.
429 		 */
430 		return (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
431 
432 	if (!utf_mode || is_ascii_char(ch))
433 	{
434 		if (control_char((char)ch))
435 		{
436 			/*
437 			 * Control characters do unpredictable things,
438 			 * so we don't even try to guess; say it doesn't move.
439 			 * This can only happen if the -r flag is in effect.
440 			 */
441 			return (0);
442 		}
443 	} else
444 	{
445 		if (is_composing_char(ch) || is_combining_char(prev_ch, ch))
446 		{
447 			/*
448 			 * Composing and combining chars take up no space.
449 			 *
450 			 * Some terminals, upon failure to compose a
451 			 * composing character with the character(s) that
452 			 * precede(s) it will actually take up one column
453 			 * for the composing character; there isn't much
454 			 * we could do short of testing the (complex)
455 			 * composition process ourselves and printing
456 			 * a binary representation when it fails.
457 			 */
458 			return (0);
459 		}
460 	}
461 
462 	/*
463 	 * Other characters take one or two columns,
464 	 * plus the width of any attribute enter/exit sequence.
465 	 */
466 	w = 1;
467 	if (is_wide_char(ch))
468 		w++;
469 	if (curr > 0 && !is_at_equiv(attr[curr-1], a))
470 		w += attr_ewidth(attr[curr-1]);
471 	if ((apply_at_specials(a) != AT_NORMAL) &&
472 	    (curr == 0 || !is_at_equiv(attr[curr-1], a)))
473 		w += attr_swidth(a);
474 	return (w);
475 }
476 
477 /*
478  * Delete to the previous base character in the line buffer.
479  * Return 1 if one is found.
480  */
481 	static int
482 backc()
483 {
484 	LWCHAR prev_ch;
485 	char *p = linebuf + curr;
486 	LWCHAR ch = step_char(&p, -1, linebuf + lmargin);
487 	int width;
488 
489 	/* This assumes that there is no '\b' in linebuf.  */
490 	while (   curr > lmargin
491 	       && column > lmargin
492 	       && (!(attr[curr - 1] & (AT_ANSI|AT_BINARY))))
493 	{
494 		curr = (int) (p - linebuf);
495 		prev_ch = step_char(&p, -1, linebuf + lmargin);
496 		width = pwidth(ch, attr[curr], prev_ch);
497 		column -= width;
498 		if (width > 0)
499 			return 1;
500 		ch = prev_ch;
501 	}
502 
503 	return 0;
504 }
505 
506 /*
507  * Are we currently within a recognized ANSI escape sequence?
508  */
509 	static int
510 in_ansi_esc_seq()
511 {
512 	char *p;
513 
514 	/*
515 	 * Search backwards for either an ESC (which means we ARE in a seq);
516 	 * or an end char (which means we're NOT in a seq).
517 	 */
518 	for (p = &linebuf[curr];  p > linebuf; )
519 	{
520 		LWCHAR ch = step_char(&p, -1, linebuf);
521 		if (IS_CSI_START(ch))
522 			return (1);
523 		if (!is_ansi_middle(ch))
524 			return (0);
525 	}
526 	return (0);
527 }
528 
529 /*
530  * Is a character the end of an ANSI escape sequence?
531  */
532 	public int
533 is_ansi_end(ch)
534 	LWCHAR ch;
535 {
536 	if (!is_ascii_char(ch))
537 		return (0);
538 	return (strchr(end_ansi_chars, (char) ch) != NULL);
539 }
540 
541 /*
542  *
543  */
544 	public int
545 is_ansi_middle(ch)
546 	LWCHAR ch;
547 {
548 	if (!is_ascii_char(ch))
549 		return (0);
550 	if (is_ansi_end(ch))
551 		return (0);
552 	return (strchr(mid_ansi_chars, (char) ch) != NULL);
553 }
554 
555 /*
556  * Append a character and attribute to the line buffer.
557  */
558 #define	STORE_CHAR(ch,a,rep,pos) \
559 	do { \
560 		if (store_char((ch),(a),(rep),(pos))) return (1); \
561 	} while (0)
562 
563 	static int
564 store_char(ch, a, rep, pos)
565 	LWCHAR ch;
566 	int a;
567 	char *rep;
568 	POSITION pos;
569 {
570 	int w;
571 	int replen;
572 	char cs;
573 
574 	w = (a & (AT_UNDERLINE|AT_BOLD));	/* Pre-use w.  */
575 	if (w != AT_NORMAL)
576 		last_overstrike = w;
577 
578 #if HILITE_SEARCH
579 	{
580 		int matches;
581 		if (is_hilited(pos, pos+1, 0, &matches))
582 		{
583 			/*
584 			 * This character should be highlighted.
585 			 * Override the attribute passed in.
586 			 */
587 			if (a != AT_ANSI)
588 			{
589 				if (highest_hilite != NULL_POSITION &&
590 				    pos > highest_hilite)
591 				    	highest_hilite = pos;
592 				a |= AT_HILITE;
593 			}
594 		}
595 	}
596 #endif
597 
598 	if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq())
599 	{
600 		if (!is_ansi_end(ch) && !is_ansi_middle(ch)) {
601 			/* Remove whole unrecognized sequence.  */
602 			char *p = &linebuf[curr];
603 			LWCHAR bch;
604 			do {
605 				bch = step_char(&p, -1, linebuf);
606 			} while (p > linebuf && !IS_CSI_START(bch));
607 			curr = (int) (p - linebuf);
608 			return 0;
609 		}
610 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
611 		w = 0;
612 	}
613 	else if (ctldisp == OPT_ONPLUS && IS_CSI_START(ch))
614 	{
615 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
616 		w = 0;
617 	}
618 	else
619 	{
620 		char *p = &linebuf[curr];
621 		LWCHAR prev_ch = step_char(&p, -1, linebuf);
622 		w = pwidth(ch, a, prev_ch);
623 	}
624 
625 	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
626 		/*
627 		 * Won't fit on screen.
628 		 */
629 		return (1);
630 
631 	if (rep == NULL)
632 	{
633 		cs = (char) ch;
634 		rep = &cs;
635 		replen = 1;
636 	} else
637 	{
638 		replen = utf_len(rep[0]);
639 	}
640 	if (curr + replen >= size_linebuf-6)
641 	{
642 		/*
643 		 * Won't fit in line buffer.
644 		 * Try to expand it.
645 		 */
646 		if (expand_linebuf())
647 			return (1);
648 	}
649 
650 	while (replen-- > 0)
651 	{
652 		linebuf[curr] = *rep++;
653 		attr[curr] = a;
654 		curr++;
655 	}
656 	column += w;
657 	return (0);
658 }
659 
660 /*
661  * Append a tab to the line buffer.
662  * Store spaces to represent the tab.
663  */
664 #define	STORE_TAB(a,pos) \
665 	do { if (store_tab((a),(pos))) return (1); } while (0)
666 
667 	static int
668 store_tab(attr, pos)
669 	int attr;
670 	POSITION pos;
671 {
672 	int to_tab = column + cshift - lmargin;
673 	int i;
674 
675 	if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
676 		to_tab = tabdefault -
677 		     ((to_tab - tabstops[ntabstops-1]) % tabdefault);
678 	else
679 	{
680 		for (i = ntabstops - 2;  i >= 0;  i--)
681 			if (to_tab >= tabstops[i])
682 				break;
683 		to_tab = tabstops[i+1] - to_tab;
684 	}
685 
686 	if (column + to_tab - 1 + pwidth(' ', attr, 0) + attr_ewidth(attr) > sc_width)
687 		return 1;
688 
689 	do {
690 		STORE_CHAR(' ', attr, " ", pos);
691 	} while (--to_tab > 0);
692 	return 0;
693 }
694 
695 #define STORE_PRCHAR(c, pos) \
696 	do { if (store_prchar((c), (pos))) return 1; } while (0)
697 
698 	static int
699 store_prchar(c, pos)
700 	LWCHAR c;
701 	POSITION pos;
702 {
703 	char *s;
704 
705 	/*
706 	 * Convert to printable representation.
707 	 */
708 	s = prchar(c);
709 
710 	/*
711 	 * Make sure we can get the entire representation
712 	 * of the character on this line.
713 	 */
714 	if (column + (int) strlen(s) - 1 +
715             pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
716 		return 1;
717 
718 	for ( ;  *s != 0;  s++)
719 		STORE_CHAR(*s, AT_BINARY, NULL, pos);
720 
721 	return 0;
722 }
723 
724 	static int
725 flush_mbc_buf(pos)
726 	POSITION pos;
727 {
728 	int i;
729 
730 	for (i = 0; i < mbc_buf_index; i++)
731 		if (store_prchar(mbc_buf[i], pos))
732 			return mbc_buf_index - i;
733 
734 	return 0;
735 }
736 
737 /*
738  * Append a character to the line buffer.
739  * Expand tabs into spaces, handle underlining, boldfacing, etc.
740  * Returns 0 if ok, 1 if couldn't fit in buffer.
741  */
742 	public int
743 pappend(c, pos)
744 	unsigned char c;
745 	POSITION pos;
746 {
747 	int r;
748 
749 	if (pendc)
750 	{
751 		if (c == '\r' && pendc == '\r')
752 			return (0);
753 		if (do_append(pendc, NULL, pendpos))
754 			/*
755 			 * Oops.  We've probably lost the char which
756 			 * was in pendc, since caller won't back up.
757 			 */
758 			return (1);
759 		pendc = '\0';
760 	}
761 
762 	if (c == '\r' && bs_mode == BS_SPECIAL)
763 	{
764 		if (mbc_buf_len > 0)  /* utf_mode must be on. */
765 		{
766 			/* Flush incomplete (truncated) sequence. */
767 			r = flush_mbc_buf(mbc_pos);
768 			mbc_buf_index = r + 1;
769 			mbc_buf_len = 0;
770 			if (r)
771 				return (mbc_buf_index);
772 		}
773 
774 		/*
775 		 * Don't put the CR into the buffer until we see
776 		 * the next char.  If the next char is a newline,
777 		 * discard the CR.
778 		 */
779 		pendc = c;
780 		pendpos = pos;
781 		return (0);
782 	}
783 
784 	if (!utf_mode)
785 	{
786 		r = do_append(c, NULL, pos);
787 	} else
788 	{
789 		/* Perform strict validation in all possible cases. */
790 		if (mbc_buf_len == 0)
791 		{
792 		retry:
793 			mbc_buf_index = 1;
794 			*mbc_buf = c;
795 			if (IS_ASCII_OCTET(c))
796 				r = do_append(c, NULL, pos);
797 			else if (IS_UTF8_LEAD(c))
798 			{
799 				mbc_buf_len = utf_len(c);
800 				mbc_pos = pos;
801 				return (0);
802 			} else
803 				/* UTF8_INVALID or stray UTF8_TRAIL */
804 				r = flush_mbc_buf(pos);
805 		} else if (IS_UTF8_TRAIL(c))
806 		{
807 			mbc_buf[mbc_buf_index++] = c;
808 			if (mbc_buf_index < mbc_buf_len)
809 				return (0);
810 			if (is_utf8_well_formed(mbc_buf, mbc_buf_index))
811 				r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos);
812 			else
813 				/* Complete, but not shortest form, sequence. */
814 				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
815 			mbc_buf_len = 0;
816 		} else
817 		{
818 			/* Flush incomplete (truncated) sequence.  */
819 			r = flush_mbc_buf(mbc_pos);
820 			mbc_buf_index = r + 1;
821 			mbc_buf_len = 0;
822 			/* Handle new char.  */
823 			if (!r)
824 				goto retry;
825  		}
826 	}
827 
828 	/*
829 	 * If we need to shift the line, do it.
830 	 * But wait until we get to at least the middle of the screen,
831 	 * so shifting it doesn't affect the chars we're currently
832 	 * pappending.  (Bold & underline can get messed up otherwise.)
833 	 */
834 	if (cshift < hshift && column > sc_width / 2)
835 	{
836 		linebuf[curr] = '\0';
837 		pshift(hshift - cshift);
838 	}
839 	if (r)
840 	{
841 		/* How many chars should caller back up? */
842 		r = (!utf_mode) ? 1 : mbc_buf_index;
843 	}
844 	return (r);
845 }
846 
847 	static int
848 do_append(ch, rep, pos)
849 	LWCHAR ch;
850 	char *rep;
851 	POSITION pos;
852 {
853 	register int a;
854 	LWCHAR prev_ch;
855 
856 	a = AT_NORMAL;
857 
858 	if (ch == '\b')
859 	{
860 		if (bs_mode == BS_CONTROL)
861 			goto do_control_char;
862 
863 		/*
864 		 * A better test is needed here so we don't
865 		 * backspace over part of the printed
866 		 * representation of a binary character.
867 		 */
868 		if (   curr <= lmargin
869 		    || column <= lmargin
870 		    || (attr[curr - 1] & (AT_ANSI|AT_BINARY)))
871 			STORE_PRCHAR('\b', pos);
872 		else if (bs_mode == BS_NORMAL)
873 			STORE_CHAR(ch, AT_NORMAL, NULL, pos);
874 		else if (bs_mode == BS_SPECIAL)
875 			overstrike = backc();
876 
877 		return 0;
878 	}
879 
880 	if (overstrike > 0)
881 	{
882 		/*
883 		 * Overstrike the character at the current position
884 		 * in the line buffer.  This will cause either
885 		 * underline (if a "_" is overstruck),
886 		 * bold (if an identical character is overstruck),
887 		 * or just deletion of the character in the buffer.
888 		 */
889 		overstrike = utf_mode ? -1 : 0;
890 		if (utf_mode)
891 		{
892 			/* To be correct, this must be a base character.  */
893 			prev_ch = get_wchar(linebuf + curr);
894 		} else
895 		{
896 			prev_ch = (unsigned char) linebuf[curr];
897 		}
898 		a = attr[curr];
899 		if (ch == prev_ch)
900 		{
901 			/*
902 			 * Overstriking a char with itself means make it bold.
903 			 * But overstriking an underscore with itself is
904 			 * ambiguous.  It could mean make it bold, or
905 			 * it could mean make it underlined.
906 			 * Use the previous overstrike to resolve it.
907 			 */
908 			if (ch == '_')
909 			{
910 				if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
911 					a |= (AT_BOLD|AT_UNDERLINE);
912 				else if (last_overstrike != AT_NORMAL)
913 					a |= last_overstrike;
914 				else
915 					a |= AT_BOLD;
916 			} else
917 				a |= AT_BOLD;
918 		} else if (ch == '_')
919 		{
920 			a |= AT_UNDERLINE;
921 			ch = prev_ch;
922 			rep = linebuf + curr;
923 		} else if (prev_ch == '_')
924 		{
925 			a |= AT_UNDERLINE;
926 		}
927 		/* Else we replace prev_ch, but we keep its attributes.  */
928 	} else if (overstrike < 0)
929 	{
930 		if (   is_composing_char(ch)
931 		    || is_combining_char(get_wchar(linebuf + curr), ch))
932 			/* Continuation of the same overstrike.  */
933 			a = last_overstrike;
934 		else
935 			overstrike = 0;
936 	}
937 
938 	if (ch == '\t')
939 	{
940 		/*
941 		 * Expand a tab into spaces.
942 		 */
943 		switch (bs_mode)
944 		{
945 		case BS_CONTROL:
946 			goto do_control_char;
947 		case BS_NORMAL:
948 		case BS_SPECIAL:
949 			STORE_TAB(a, pos);
950 			break;
951 		}
952 	} else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch))
953 	{
954 	do_control_char:
955 		if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && IS_CSI_START(ch)))
956 		{
957 			/*
958 			 * Output as a normal character.
959 			 */
960 			STORE_CHAR(ch, AT_NORMAL, rep, pos);
961 		} else
962 		{
963 			STORE_PRCHAR((char) ch, pos);
964 		}
965 	} else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch))
966 	{
967 		char *s;
968 
969 		s = prutfchar(ch);
970 
971 		if (column + (int) strlen(s) - 1 +
972 		    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
973 			return (1);
974 
975 		for ( ;  *s != 0;  s++)
976 			STORE_CHAR(*s, AT_BINARY, NULL, pos);
977  	} else
978 	{
979 		STORE_CHAR(ch, a, rep, pos);
980 	}
981  	return (0);
982 }
983 
984 /*
985  *
986  */
987 	public int
988 pflushmbc()
989 {
990 	int r = 0;
991 
992 	if (mbc_buf_len > 0)
993 	{
994 		/* Flush incomplete (truncated) sequence.  */
995 		r = flush_mbc_buf(mbc_pos);
996 		mbc_buf_len = 0;
997 	}
998 	return r;
999 }
1000 
1001 /*
1002  * Terminate the line in the line buffer.
1003  */
1004 	public void
1005 pdone(endline, forw)
1006 	int endline;
1007 	int forw;
1008 {
1009 	(void) pflushmbc();
1010 
1011 	if (pendc && (pendc != '\r' || !endline))
1012 		/*
1013 		 * If we had a pending character, put it in the buffer.
1014 		 * But discard a pending CR if we are at end of line
1015 		 * (that is, discard the CR in a CR/LF sequence).
1016 		 */
1017 		(void) do_append(pendc, NULL, pendpos);
1018 
1019 	/*
1020 	 * Make sure we've shifted the line, if we need to.
1021 	 */
1022 	if (cshift < hshift)
1023 		pshift(hshift - cshift);
1024 
1025 	if (ctldisp == OPT_ONPLUS && is_ansi_end('m'))
1026 	{
1027 		/* Switch to normal attribute at end of line. */
1028 		char *p = "\033[m";
1029 		for ( ;  *p != '\0';  p++)
1030 		{
1031 			linebuf[curr] = *p;
1032 			attr[curr++] = AT_ANSI;
1033 		}
1034 	}
1035 
1036 	/*
1037 	 * Add a newline if necessary,
1038 	 * and append a '\0' to the end of the line.
1039 	 * We output a newline if we're not at the right edge of the screen,
1040 	 * or if the terminal doesn't auto wrap,
1041 	 * or if this is really the end of the line AND the terminal ignores
1042 	 * a newline at the right edge.
1043 	 * (In the last case we don't want to output a newline if the terminal
1044 	 * doesn't ignore it since that would produce an extra blank line.
1045 	 * But we do want to output a newline if the terminal ignores it in case
1046 	 * the next line is blank.  In that case the single newline output for
1047 	 * that blank line would be ignored!)
1048 	 */
1049 	if (column < sc_width || !auto_wrap || (endline && ignaw) || ctldisp == OPT_ON)
1050 	{
1051 		linebuf[curr] = '\n';
1052 		attr[curr] = AT_NORMAL;
1053 		curr++;
1054 	}
1055 	else if (ignaw && column >= sc_width && forw)
1056 	{
1057 		/*
1058 		 * Terminals with "ignaw" don't wrap until they *really* need
1059 		 * to, i.e. when the character *after* the last one to fit on a
1060 		 * line is output. But they are too hard to deal with when they
1061 		 * get in the state where a full screen width of characters
1062 		 * have been output but the cursor is sitting on the right edge
1063 		 * instead of at the start of the next line.
1064 		 * So we nudge them into wrapping by outputting a space
1065 		 * character plus a backspace.  But do this only if moving
1066 		 * forward; if we're moving backward and drawing this line at
1067 		 * the top of the screen, the space would overwrite the first
1068 		 * char on the next line.  We don't need to do this "nudge"
1069 		 * at the top of the screen anyway.
1070 		 */
1071 		linebuf[curr] = ' ';
1072 		attr[curr++] = AT_NORMAL;
1073 		linebuf[curr] = '\b';
1074 		attr[curr++] = AT_NORMAL;
1075 	}
1076 	linebuf[curr] = '\0';
1077 	attr[curr] = AT_NORMAL;
1078 }
1079 
1080 /*
1081  *
1082  */
1083 	public void
1084 set_status_col(c)
1085 	char c;
1086 {
1087 	linebuf[0] = c;
1088 	attr[0] = AT_NORMAL|AT_HILITE;
1089 }
1090 
1091 /*
1092  * Get a character from the current line.
1093  * Return the character as the function return value,
1094  * and the character attribute in *ap.
1095  */
1096 	public int
1097 gline(i, ap)
1098 	register int i;
1099 	register int *ap;
1100 {
1101 	if (is_null_line)
1102 	{
1103 		/*
1104 		 * If there is no current line, we pretend the line is
1105 		 * either "~" or "", depending on the "twiddle" flag.
1106 		 */
1107 		if (twiddle)
1108 		{
1109 			if (i == 0)
1110 			{
1111 				*ap = AT_BOLD;
1112 				return '~';
1113 			}
1114 			--i;
1115 		}
1116 		/* Make sure we're back to AT_NORMAL before the '\n'.  */
1117 		*ap = AT_NORMAL;
1118 		return i ? '\0' : '\n';
1119 	}
1120 
1121 	*ap = attr[i];
1122 	return (linebuf[i] & 0xFF);
1123 }
1124 
1125 /*
1126  * Indicate that there is no current line.
1127  */
1128 	public void
1129 null_line()
1130 {
1131 	is_null_line = 1;
1132 	cshift = 0;
1133 }
1134 
1135 /*
1136  * Analogous to forw_line(), but deals with "raw lines":
1137  * lines which are not split for screen width.
1138  * {{ This is supposed to be more efficient than forw_line(). }}
1139  */
1140 	public POSITION
1141 forw_raw_line(curr_pos, linep, line_lenp)
1142 	POSITION curr_pos;
1143 	char **linep;
1144 	int *line_lenp;
1145 {
1146 	register int n;
1147 	register int c;
1148 	POSITION new_pos;
1149 
1150 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
1151 		(c = ch_forw_get()) == EOI)
1152 		return (NULL_POSITION);
1153 
1154 	n = 0;
1155 	for (;;)
1156 	{
1157 		if (c == '\n' || c == EOI || ABORT_SIGS())
1158 		{
1159 			new_pos = ch_tell();
1160 			break;
1161 		}
1162 		if (n >= size_linebuf-1)
1163 		{
1164 			if (expand_linebuf())
1165 			{
1166 				/*
1167 				 * Overflowed the input buffer.
1168 				 * Pretend the line ended here.
1169 				 */
1170 				new_pos = ch_tell() - 1;
1171 				break;
1172 			}
1173 		}
1174 		linebuf[n++] = c;
1175 		c = ch_forw_get();
1176 	}
1177 	linebuf[n] = '\0';
1178 	if (linep != NULL)
1179 		*linep = linebuf;
1180 	if (line_lenp != NULL)
1181 		*line_lenp = n;
1182 	return (new_pos);
1183 }
1184 
1185 /*
1186  * Analogous to back_line(), but deals with "raw lines".
1187  * {{ This is supposed to be more efficient than back_line(). }}
1188  */
1189 	public POSITION
1190 back_raw_line(curr_pos, linep, line_lenp)
1191 	POSITION curr_pos;
1192 	char **linep;
1193 	int *line_lenp;
1194 {
1195 	register int n;
1196 	register int c;
1197 	POSITION new_pos;
1198 
1199 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero() ||
1200 		ch_seek(curr_pos-1))
1201 		return (NULL_POSITION);
1202 
1203 	n = size_linebuf;
1204 	linebuf[--n] = '\0';
1205 	for (;;)
1206 	{
1207 		c = ch_back_get();
1208 		if (c == '\n' || ABORT_SIGS())
1209 		{
1210 			/*
1211 			 * This is the newline ending the previous line.
1212 			 * We have hit the beginning of the line.
1213 			 */
1214 			new_pos = ch_tell() + 1;
1215 			break;
1216 		}
1217 		if (c == EOI)
1218 		{
1219 			/*
1220 			 * We have hit the beginning of the file.
1221 			 * This must be the first line in the file.
1222 			 * This must, of course, be the beginning of the line.
1223 			 */
1224 			new_pos = ch_zero();
1225 			break;
1226 		}
1227 		if (n <= 0)
1228 		{
1229 			int old_size_linebuf = size_linebuf;
1230 			char *fm;
1231 			char *to;
1232 			if (expand_linebuf())
1233 			{
1234 				/*
1235 				 * Overflowed the input buffer.
1236 				 * Pretend the line ended here.
1237 				 */
1238 				new_pos = ch_tell() + 1;
1239 				break;
1240 			}
1241 			/*
1242 			 * Shift the data to the end of the new linebuf.
1243 			 */
1244 			for (fm = linebuf + old_size_linebuf - 1,
1245 			      to = linebuf + size_linebuf - 1;
1246 			     fm >= linebuf;  fm--, to--)
1247 				*to = *fm;
1248 			n = size_linebuf - old_size_linebuf;
1249 		}
1250 		linebuf[--n] = c;
1251 	}
1252 	if (linep != NULL)
1253 		*linep = &linebuf[n];
1254 	if (line_lenp != NULL)
1255 		*line_lenp = size_linebuf - 1 - n;
1256 	return (new_pos);
1257 }
1258