xref: /freebsd/contrib/less/line.c (revision f0a75d274af375d15b97b830966b99a02b7db911)
1 /* $FreeBSD$ */
2 /*
3  * Copyright (C) 1984-2005  Mark Nudelman
4  *
5  * You may distribute under the terms of either the GNU General Public
6  * License or the Less License, as specified in the README file.
7  *
8  * For more information about less, or for information on how to
9  * contact the author, see the README file.
10  */
11 
12 
13 /*
14  * Routines to manipulate the "line buffer".
15  * The line buffer holds a line of output as it is being built
16  * in preparation for output to the screen.
17  */
18 
19 #include "less.h"
20 #include "charset.h"
21 
22 static char *linebuf = NULL;	/* Buffer which holds the current output line */
23 static char *attr = NULL;	/* Extension of linebuf to hold attributes */
24 public int size_linebuf = 0;	/* Size of line buffer (and attr buffer) */
25 
26 static int cshift;		/* Current left-shift of output line buffer */
27 public int hshift;		/* Desired left-shift of output line buffer */
28 public int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
29 public int ntabstops = 1;	/* Number of tabstops */
30 public int tabdefault = 8;	/* Default repeated tabstops */
31 
32 static int curr;		/* Index into linebuf */
33 static int column;		/* Printable length, accounting for
34 				   backspaces, etc. */
35 static int overstrike;		/* Next char should overstrike previous char */
36 static int last_overstrike = AT_NORMAL;
37 static int is_null_line;	/* There is no current line */
38 static int lmargin;		/* Left margin */
39 static int line_matches;	/* Number of search matches in this line */
40 static char pendc;
41 static POSITION pendpos;
42 static char *end_ansi_chars;
43 static char *mid_ansi_chars;
44 
45 static int attr_swidth();
46 static int attr_ewidth();
47 static int do_append();
48 
49 extern int sigs;
50 extern int bs_mode;
51 extern int linenums;
52 extern int ctldisp;
53 extern int twiddle;
54 extern int binattr;
55 extern int status_col;
56 extern int auto_wrap, ignaw;
57 extern int bo_s_width, bo_e_width;
58 extern int ul_s_width, ul_e_width;
59 extern int bl_s_width, bl_e_width;
60 extern int so_s_width, so_e_width;
61 extern int sc_width, sc_height;
62 extern int utf_mode;
63 extern POSITION start_attnpos;
64 extern POSITION end_attnpos;
65 
66 static char mbc_buf[MAX_UTF_CHAR_LEN];
67 static int mbc_buf_len = 0;
68 static int mbc_buf_index = 0;
69 static POSITION mbc_pos;
70 
71 /*
72  * Initialize from environment variables.
73  */
74 	public void
75 init_line()
76 {
77 	end_ansi_chars = lgetenv("LESSANSIENDCHARS");
78 	if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
79 		end_ansi_chars = "m";
80 
81 	mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
82 	if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
83 		mid_ansi_chars = "0123456789;[?!\"'#%()*+ ";
84 
85 	linebuf = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
86 	attr = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
87 	size_linebuf = LINEBUF_SIZE;
88 }
89 
90 /*
91  * Expand the line buffer.
92  */
93 	static int
94 expand_linebuf()
95 {
96 	/* Double the size of the line buffer. */
97 	int new_size = size_linebuf * 2;
98 
99 	/* Just realloc to expand the buffer, if we can. */
100 #if HAVE_REALLOC
101 	char *new_buf = (char *) realloc(linebuf, new_size);
102 	char *new_attr = (char *) realloc(attr, new_size);
103 #else
104 	char *new_buf = (char *) calloc(new_size, sizeof(char));
105 	char *new_attr = (char *) calloc(new_size, sizeof(char));
106 #endif
107 	if (new_buf == NULL || new_attr == NULL)
108 	{
109 		if (new_attr != NULL)
110 			free(new_attr);
111 		if (new_buf != NULL)
112 			free(new_buf);
113 		return 1;
114 	}
115 #if HAVE_REALLOC
116 	/*
117 	 * We realloc'd the buffers; they already have the old contents.
118 	 */
119 	#if 0
120 	memset(new_buf + size_linebuf, 0, new_size - size_linebuf);
121 	memset(new_attr + size_linebuf, 0, new_size - size_linebuf);
122 	#endif
123 #else
124 	/*
125 	 * We just calloc'd the buffers; copy the old contents.
126 	 */
127 	memcpy(new_buf, linebuf, size_linebuf * sizeof(char));
128 	memcpy(new_attr, attr, size_linebuf * sizeof(char));
129 	free(attr);
130 	free(linebuf);
131 #endif
132 	linebuf = new_buf;
133 	attr = new_attr;
134 	size_linebuf = new_size;
135 	return 0;
136 }
137 
138 /*
139  * Is a character ASCII?
140  */
141 	public int
142 is_ascii_char(ch)
143 	LWCHAR ch;
144 {
145 	return (ch <= 0x7F);
146 }
147 
148 /*
149  * Rewind the line buffer.
150  */
151 	public void
152 prewind()
153 {
154 	curr = 0;
155 	column = 0;
156 	cshift = 0;
157 	overstrike = 0;
158 	last_overstrike = AT_NORMAL;
159 	mbc_buf_len = 0;
160 	is_null_line = 0;
161 	pendc = '\0';
162 	lmargin = 0;
163 	if (status_col)
164 		lmargin += 1;
165 #if HILITE_SEARCH
166 	line_matches = 0;
167 #endif
168 }
169 
170 /*
171  * Insert the line number (of the given position) into the line buffer.
172  */
173 	public void
174 plinenum(pos)
175 	POSITION pos;
176 {
177 	register LINENUM linenum = 0;
178 	register int i;
179 
180 	if (linenums == OPT_ONPLUS)
181 	{
182 		/*
183 		 * Get the line number and put it in the current line.
184 		 * {{ Note: since find_linenum calls forw_raw_line,
185 		 *    it may seek in the input file, requiring the caller
186 		 *    of plinenum to re-seek if necessary. }}
187 		 * {{ Since forw_raw_line modifies linebuf, we must
188 		 *    do this first, before storing anything in linebuf. }}
189 		 */
190 		linenum = find_linenum(pos);
191 	}
192 
193 	/*
194 	 * Display a status column if the -J option is set.
195 	 */
196 	if (status_col)
197 	{
198 		linebuf[curr] = ' ';
199 		if (start_attnpos != NULL_POSITION &&
200 		    pos >= start_attnpos && pos < end_attnpos)
201 			attr[curr] = AT_NORMAL|AT_HILITE;
202 		else
203 			attr[curr] = AT_NORMAL;
204 		curr++;
205 		column++;
206 	}
207 	/*
208 	 * Display the line number at the start of each line
209 	 * if the -N option is set.
210 	 */
211 	if (linenums == OPT_ONPLUS)
212 	{
213 		char buf[INT_STRLEN_BOUND(pos) + 2];
214 		int n;
215 
216 		linenumtoa(linenum, buf);
217 		n = strlen(buf);
218 		if (n < MIN_LINENUM_WIDTH)
219 			n = MIN_LINENUM_WIDTH;
220 		sprintf(linebuf+curr, "%*s ", n, buf);
221 		n++;  /* One space after the line number. */
222 		for (i = 0; i < n; i++)
223 			attr[curr+i] = AT_NORMAL;
224 		curr += n;
225 		column += n;
226 		lmargin += n;
227 	}
228 
229 	/*
230 	 * Append enough spaces to bring us to the lmargin.
231 	 */
232 	while (column < lmargin)
233 	{
234 		linebuf[curr] = ' ';
235 		attr[curr++] = AT_NORMAL;
236 		column++;
237 	}
238 }
239 
240 /*
241  * Shift the input line left.
242  * This means discarding N printable chars at the start of the buffer.
243  */
244 	static void
245 pshift(shift)
246 	int shift;
247 {
248 	LWCHAR prev_ch = 0;
249 	unsigned char c;
250 	int shifted = 0;
251 	int to;
252 	int from;
253 	int len;
254 	int width;
255 	int prev_attr;
256 	int next_attr;
257 
258 	if (shift > column - lmargin)
259 		shift = column - lmargin;
260 	if (shift > curr - lmargin)
261 		shift = curr - lmargin;
262 
263 	to = from = lmargin;
264 	/*
265 	 * We keep on going when shifted == shift
266 	 * to get all combining chars.
267 	 */
268 	while (shifted <= shift && from < curr)
269 	{
270 		c = linebuf[from];
271 		if (c == ESC && ctldisp == OPT_ONPLUS)
272 		{
273 			/* Keep cumulative effect.  */
274 			linebuf[to] = c;
275 			attr[to++] = attr[from++];
276 			while (from < curr && linebuf[from])
277 			{
278 				linebuf[to] = linebuf[from];
279 				attr[to++] = attr[from];
280 				if (!is_ansi_middle(linebuf[from++]))
281 					break;
282 			}
283 			continue;
284 		}
285 
286 		width = 0;
287 
288 		if (!IS_ASCII_OCTET(c) && utf_mode)
289 		{
290 			/* Assumes well-formedness validation already done.  */
291 			LWCHAR ch;
292 
293 			len = utf_len(c);
294 			if (from + len > curr)
295 				break;
296 			ch = get_wchar(linebuf + from);
297 			if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch))
298 				width = is_wide_char(ch) ? 2 : 1;
299 			prev_ch = ch;
300 		} else
301 		{
302 			len = 1;
303 			if (c == '\b')
304 				/* XXX - Incorrect if several '\b' in a row.  */
305 				width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
306 			else if (!control_char(c))
307 				width = 1;
308 			prev_ch = 0;
309 		}
310 
311 		if (width == 2 && shift - shifted == 1) {
312 			/* Should never happen when called by pshift_all().  */
313 			attr[to] = attr[from];
314 			/*
315 			 * Assume a wide_char will never be the first half of a
316 			 * combining_char pair, so reset prev_ch in case we're
317 			 * followed by a '\b'.
318 			 */
319 			prev_ch = linebuf[to++] = ' ';
320 			from += len;
321 			shifted++;
322 			continue;
323 		}
324 
325 		/* Adjust width for magic cookies. */
326 		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
327 		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
328 		if (!is_at_equiv(attr[from], prev_attr) &&
329 			!is_at_equiv(attr[from], next_attr))
330 		{
331 			width += attr_swidth(attr[from]);
332 			if (from + len < curr)
333 				width += attr_ewidth(attr[from]);
334 			if (is_at_equiv(prev_attr, next_attr))
335 			{
336 				width += attr_ewidth(prev_attr);
337 				if (from + len < curr)
338 					width += attr_swidth(next_attr);
339 			}
340 		}
341 
342 		if (shift - shifted < width)
343 			break;
344 		from += len;
345 		shifted += width;
346 		if (shifted < 0)
347 			shifted = 0;
348 	}
349 	while (from < curr)
350 	{
351 		linebuf[to] = linebuf[from];
352 		attr[to++] = attr[from++];
353 	}
354 	curr = to;
355 	column -= shifted;
356 	cshift += shifted;
357 }
358 
359 /*
360  *
361  */
362 	public void
363 pshift_all()
364 {
365 	pshift(column);
366 }
367 
368 /*
369  * Return the printing width of the start (enter) sequence
370  * for a given character attribute.
371  */
372 	static int
373 attr_swidth(a)
374 	int a;
375 {
376 	int w = 0;
377 
378 	a = apply_at_specials(a);
379 
380 	if (a & AT_UNDERLINE)
381 		w += ul_s_width;
382 	if (a & AT_BOLD)
383 		w += bo_s_width;
384 	if (a & AT_BLINK)
385 		w += bl_s_width;
386 	if (a & AT_STANDOUT)
387 		w += so_s_width;
388 
389 	return w;
390 }
391 
392 /*
393  * Return the printing width of the end (exit) sequence
394  * for a given character attribute.
395  */
396 	static int
397 attr_ewidth(a)
398 	int a;
399 {
400 	int w = 0;
401 
402 	a = apply_at_specials(a);
403 
404 	if (a & AT_UNDERLINE)
405 		w += ul_e_width;
406 	if (a & AT_BOLD)
407 		w += bo_e_width;
408 	if (a & AT_BLINK)
409 		w += bl_e_width;
410 	if (a & AT_STANDOUT)
411 		w += so_e_width;
412 
413 	return w;
414 }
415 
416 /*
417  * Return the printing width of a given character and attribute,
418  * if the character were added to the current position in the line buffer.
419  * Adding a character with a given attribute may cause an enter or exit
420  * attribute sequence to be inserted, so this must be taken into account.
421  */
422 	static int
423 pwidth(ch, a, prev_ch)
424 	LWCHAR ch;
425 	int a;
426 	LWCHAR prev_ch;
427 {
428 	int w;
429 
430 	if (ch == '\b')
431 		/*
432 		 * Backspace moves backwards one or two positions.
433 		 * XXX - Incorrect if several '\b' in a row.
434 		 */
435 		return (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
436 
437 	if (!utf_mode || is_ascii_char(ch))
438 	{
439 		if (control_char((char)ch))
440 		{
441 			/*
442 			 * Control characters do unpredictable things,
443 			 * so we don't even try to guess; say it doesn't move.
444 			 * This can only happen if the -r flag is in effect.
445 			 */
446 			return (0);
447 		}
448 	} else
449 	{
450 		if (is_composing_char(ch) || is_combining_char(prev_ch, ch))
451 		{
452 			/*
453 			 * Composing and combining chars take up no space.
454 			 *
455 			 * Some terminals, upon failure to compose a
456 			 * composing character with the character(s) that
457 			 * precede(s) it will actually take up one column
458 			 * for the composing character; there isn't much
459 			 * we could do short of testing the (complex)
460 			 * composition process ourselves and printing
461 			 * a binary representation when it fails.
462 			 */
463 			return (0);
464 		}
465 	}
466 
467 	/*
468 	 * Other characters take one or two columns,
469 	 * plus the width of any attribute enter/exit sequence.
470 	 */
471 	w = 1;
472 	if (is_wide_char(ch))
473 		w++;
474 	if (curr > 0 && !is_at_equiv(attr[curr-1], a))
475 		w += attr_ewidth(attr[curr-1]);
476 	if ((apply_at_specials(a) != AT_NORMAL) &&
477 	    (curr == 0 || !is_at_equiv(attr[curr-1], a)))
478 		w += attr_swidth(a);
479 	return (w);
480 }
481 
482 /*
483  * Delete to the previous base character in the line buffer.
484  * Return 1 if one is found.
485  */
486 	static int
487 backc()
488 {
489 	LWCHAR prev_ch;
490 	char *p = linebuf + curr;
491 	LWCHAR ch = step_char(&p, -1, linebuf + lmargin);
492 	int width;
493 
494 	/* This assumes that there is no '\b' in linebuf.  */
495 	while (   curr > lmargin
496 	       && column > lmargin
497 	       && (!(attr[curr - 1] & (AT_ANSI|AT_BINARY))))
498 	{
499 		curr = p - linebuf;
500 		prev_ch = step_char(&p, -1, linebuf + lmargin);
501 		width = pwidth(ch, attr[curr], prev_ch);
502 		column -= width;
503 		if (width > 0)
504 			return 1;
505 		ch = prev_ch;
506 	}
507 
508 	return 0;
509 }
510 
511 /*
512  * Are we currently within a recognized ANSI escape sequence?
513  */
514 	static int
515 in_ansi_esc_seq()
516 {
517 	char *p;
518 
519 	/*
520 	 * Search backwards for either an ESC (which means we ARE in a seq);
521 	 * or an end char (which means we're NOT in a seq).
522 	 */
523 	for (p = &linebuf[curr];  p > linebuf; )
524 	{
525 		LWCHAR ch = step_char(&p, -1, linebuf);
526 		if (ch == ESC)
527 			return (1);
528 		if (!is_ansi_middle(ch))
529 			return (0);
530 	}
531 	return (0);
532 }
533 
534 /*
535  * Is a character the end of an ANSI escape sequence?
536  */
537 	public int
538 is_ansi_end(ch)
539 	LWCHAR ch;
540 {
541 	if (!is_ascii_char(ch))
542 		return (0);
543 	return (strchr(end_ansi_chars, (char) ch) != NULL);
544 }
545 
546 /*
547  *
548  */
549 	public int
550 is_ansi_middle(ch)
551 	LWCHAR ch;
552 {
553 	if (!is_ascii_char(ch))
554 		return (0);
555 	if (is_ansi_end(ch))
556 		return (0);
557 	return (strchr(mid_ansi_chars, (char) ch) != NULL);
558 }
559 
560 /*
561  * Append a character and attribute to the line buffer.
562  */
563 #define	STORE_CHAR(ch,a,rep,pos) \
564 	do { \
565 		if (store_char((ch),(a),(rep),(pos))) return (1); \
566 	} while (0)
567 
568 	static int
569 store_char(ch, a, rep, pos)
570 	LWCHAR ch;
571 	int a;
572 	char *rep;
573 	POSITION pos;
574 {
575 	int w;
576 	int replen;
577 	char cs;
578 
579 	w = (a & (AT_UNDERLINE|AT_BOLD));	/* Pre-use w.  */
580 	if (w != AT_NORMAL)
581 		last_overstrike = w;
582 
583 #if HILITE_SEARCH
584 	{
585 		int matches;
586 		if (is_hilited(pos, pos+1, 0, &matches))
587 		{
588 			/*
589 			 * This character should be highlighted.
590 			 * Override the attribute passed in.
591 			 */
592 			if (a != AT_ANSI)
593 				a |= AT_HILITE;
594 		}
595 		line_matches += matches;
596 	}
597 #endif
598 
599 	if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq())
600 	{
601 		if (!is_ansi_end(ch) && !is_ansi_middle(ch)) {
602 			/* Remove whole unrecognized sequence.  */
603 			do {
604 				--curr;
605 			} while (linebuf[curr] != ESC);
606 			return 0;
607 		}
608 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
609 		w = 0;
610 	}
611 	else if (ctldisp == OPT_ONPLUS && ch == ESC)
612 	{
613 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
614 		w = 0;
615 	}
616 	else
617 	{
618 		char *p = &linebuf[curr];
619 		LWCHAR prev_ch = step_char(&p, -1, linebuf);
620 		w = pwidth(ch, a, prev_ch);
621 	}
622 
623 	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
624 		/*
625 		 * Won't fit on screen.
626 		 */
627 		return (1);
628 
629 	if (rep == NULL)
630 	{
631 		cs = (char) ch;
632 		rep = &cs;
633 		replen = 1;
634 	} else
635 	{
636 		replen = utf_len(rep[0]);
637 	}
638 	if (curr + replen >= size_linebuf-6)
639 	{
640 		/*
641 		 * Won't fit in line buffer.
642 		 * Try to expand it.
643 		 */
644 		if (expand_linebuf())
645 			return (1);
646 	}
647 
648 	while (replen-- > 0)
649 	{
650 		linebuf[curr] = *rep++;
651 		attr[curr] = a;
652 		curr++;
653 	}
654 	column += w;
655 	return (0);
656 }
657 
658 /*
659  * Append a tab to the line buffer.
660  * Store spaces to represent the tab.
661  */
662 #define	STORE_TAB(a,pos) \
663 	do { if (store_tab((a),(pos))) return (1); } while (0)
664 
665 	static int
666 store_tab(attr, pos)
667 	int attr;
668 	POSITION pos;
669 {
670 	int to_tab = column + cshift - lmargin;
671 	int i;
672 
673 	if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
674 		to_tab = tabdefault -
675 		     ((to_tab - tabstops[ntabstops-1]) % tabdefault);
676 	else
677 	{
678 		for (i = ntabstops - 2;  i >= 0;  i--)
679 			if (to_tab >= tabstops[i])
680 				break;
681 		to_tab = tabstops[i+1] - to_tab;
682 	}
683 
684 	if (column + to_tab - 1 + pwidth(' ', attr, 0) + attr_ewidth(attr) > sc_width)
685 		return 1;
686 
687 	do {
688 		STORE_CHAR(' ', attr, " ", pos);
689 	} while (--to_tab > 0);
690 	return 0;
691 }
692 
693 #define STORE_PRCHAR(c, pos) \
694 	do { if (store_prchar((c), (pos))) return 1; } while (0)
695 
696 	static int
697 store_prchar(c, pos)
698 	char c;
699 	POSITION pos;
700 {
701 	char *s;
702 
703 	/*
704 	 * Convert to printable representation.
705 	 */
706 	s = prchar(c);
707 
708 	/*
709 	 * Make sure we can get the entire representation
710 	 * of the character on this line.
711 	 */
712 	if (column + (int) strlen(s) - 1 +
713             pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
714 		return 1;
715 
716 	for ( ;  *s != 0;  s++)
717 		STORE_CHAR(*s, AT_BINARY, NULL, pos);
718 
719 	return 0;
720 }
721 
722 	static int
723 flush_mbc_buf(pos)
724 	POSITION pos;
725 {
726 	int i;
727 
728 	for (i = 0; i < mbc_buf_index; i++)
729 		if (store_prchar(mbc_buf[i], pos))
730 			return mbc_buf_index - i;
731 
732 	return 0;
733 }
734 
735 /*
736  * Append a character to the line buffer.
737  * Expand tabs into spaces, handle underlining, boldfacing, etc.
738  * Returns 0 if ok, 1 if couldn't fit in buffer.
739  */
740 	public int
741 pappend(c, pos)
742 	char c;
743 	POSITION pos;
744 {
745 	int r;
746 
747 	if (pendc)
748 	{
749 		if (do_append(pendc, NULL, pendpos))
750 			/*
751 			 * Oops.  We've probably lost the char which
752 			 * was in pendc, since caller won't back up.
753 			 */
754 			return (1);
755 		pendc = '\0';
756 	}
757 
758 	if (c == '\r' && bs_mode == BS_SPECIAL)
759 	{
760 		if (mbc_buf_len > 0)  /* utf_mode must be on. */
761 		{
762 			/* Flush incomplete (truncated) sequence. */
763 			r = flush_mbc_buf(mbc_pos);
764 			mbc_buf_index = r + 1;
765 			mbc_buf_len = 0;
766 			if (r)
767 				return (mbc_buf_index);
768 		}
769 
770 		/*
771 		 * Don't put the CR into the buffer until we see
772 		 * the next char.  If the next char is a newline,
773 		 * discard the CR.
774 		 */
775 		pendc = c;
776 		pendpos = pos;
777 		return (0);
778 	}
779 
780 	if (!utf_mode)
781 	{
782 		r = do_append((LWCHAR) c, NULL, pos);
783 	} else
784 	{
785 		/* Perform strict validation in all possible cases. */
786 		if (mbc_buf_len == 0)
787 		{
788 		retry:
789 			mbc_buf_index = 1;
790 			*mbc_buf = c;
791 			if (IS_ASCII_OCTET(c))
792 				r = do_append((LWCHAR) c, NULL, pos);
793 			else if (IS_UTF8_LEAD(c))
794 			{
795 				mbc_buf_len = utf_len(c);
796 				mbc_pos = pos;
797 				return (0);
798 			} else
799 				/* UTF8_INVALID or stray UTF8_TRAIL */
800 				r = flush_mbc_buf(pos);
801 		} else if (IS_UTF8_TRAIL(c))
802 		{
803 			mbc_buf[mbc_buf_index++] = c;
804 			if (mbc_buf_index < mbc_buf_len)
805 				return (0);
806 			if (is_utf8_well_formed(mbc_buf))
807 				r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos);
808 			else
809 				/* Complete, but not shortest form, sequence. */
810 				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
811 			mbc_buf_len = 0;
812 		} else
813 		{
814 			/* Flush incomplete (truncated) sequence.  */
815 			r = flush_mbc_buf(mbc_pos);
816 			mbc_buf_index = r + 1;
817 			mbc_buf_len = 0;
818 			/* Handle new char.  */
819 			if (!r)
820 				goto retry;
821  		}
822 	}
823 
824 	/*
825 	 * If we need to shift the line, do it.
826 	 * But wait until we get to at least the middle of the screen,
827 	 * so shifting it doesn't affect the chars we're currently
828 	 * pappending.  (Bold & underline can get messed up otherwise.)
829 	 */
830 	if (cshift < hshift && column > sc_width / 2)
831 	{
832 		linebuf[curr] = '\0';
833 		pshift(hshift - cshift);
834 	}
835 	if (r)
836 	{
837 		/* How many chars should caller back up? */
838 		r = (!utf_mode) ? 1 : mbc_buf_index;
839 	}
840 	return (r);
841 }
842 
843 	static int
844 do_append(ch, rep, pos)
845 	LWCHAR ch;
846 	char *rep;
847 	POSITION pos;
848 {
849 	register int a;
850 	LWCHAR prev_ch;
851 
852 	a = AT_NORMAL;
853 
854 	if (ch == '\b')
855 	{
856 		if (bs_mode == BS_CONTROL)
857 			goto do_control_char;
858 
859 		/*
860 		 * A better test is needed here so we don't
861 		 * backspace over part of the printed
862 		 * representation of a binary character.
863 		 */
864 		if (   curr <= lmargin
865 		    || column <= lmargin
866 		    || (attr[curr - 1] & (AT_ANSI|AT_BINARY)))
867 			STORE_PRCHAR('\b', pos);
868 		else if (bs_mode == BS_NORMAL)
869 			STORE_CHAR(ch, AT_NORMAL, NULL, pos);
870 		else if (bs_mode == BS_SPECIAL)
871 			overstrike = backc();
872 
873 		return 0;
874 	}
875 
876 	if (overstrike > 0)
877 	{
878 		/*
879 		 * Overstrike the character at the current position
880 		 * in the line buffer.  This will cause either
881 		 * underline (if a "_" is overstruck),
882 		 * bold (if an identical character is overstruck),
883 		 * or just deletion of the character in the buffer.
884 		 */
885 		overstrike = utf_mode ? -1 : 0;
886 		/* To be correct, this must be a base character.  */
887 		prev_ch = get_wchar(linebuf + curr);
888 		a = attr[curr];
889 		if (ch == prev_ch)
890 		{
891 			/*
892 			 * Overstriking a char with itself means make it bold.
893 			 * But overstriking an underscore with itself is
894 			 * ambiguous.  It could mean make it bold, or
895 			 * it could mean make it underlined.
896 			 * Use the previous overstrike to resolve it.
897 			 */
898 			if (ch == '_')
899 			{
900 				if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
901 					a |= (AT_BOLD|AT_UNDERLINE);
902 				else if (last_overstrike != AT_NORMAL)
903 					a |= last_overstrike;
904 				else
905 					a |= AT_BOLD;
906 			} else
907 				a |= AT_BOLD;
908 		} else if (ch == '_')
909 		{
910 			a |= AT_UNDERLINE;
911 			ch = prev_ch;
912 			rep = linebuf + curr;
913 		} else if (prev_ch == '_')
914 		{
915 			a |= AT_UNDERLINE;
916 		}
917 		/* Else we replace prev_ch, but we keep its attributes.  */
918 	} else if (overstrike < 0)
919 	{
920 		if (   is_composing_char(ch)
921 		    || is_combining_char(get_wchar(linebuf + curr), ch))
922 			/* Continuation of the same overstrike.  */
923 			a = last_overstrike;
924 		else
925 			overstrike = 0;
926 	}
927 
928 	if (ch == '\t')
929 	{
930 		/*
931 		 * Expand a tab into spaces.
932 		 */
933 		switch (bs_mode)
934 		{
935 		case BS_CONTROL:
936 			goto do_control_char;
937 		case BS_NORMAL:
938 		case BS_SPECIAL:
939 			STORE_TAB(a, pos);
940 			break;
941 		}
942 	} else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch))
943 	{
944 	do_control_char:
945 		if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && ch == ESC))
946 		{
947 			/*
948 			 * Output as a normal character.
949 			 */
950 			STORE_CHAR(ch, AT_NORMAL, rep, pos);
951 		} else
952 		{
953 			STORE_PRCHAR((char) ch, pos);
954 		}
955 	} else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch))
956 	{
957 		char *s;
958 
959 		s = prutfchar(ch);
960 
961 		if (column + (int) strlen(s) - 1 +
962 		    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
963 			return (1);
964 
965 		for ( ;  *s != 0;  s++)
966 			STORE_CHAR(*s, AT_BINARY, NULL, pos);
967  	} else
968 	{
969 		STORE_CHAR(ch, a, rep, pos);
970 	}
971  	return (0);
972 }
973 
974 /*
975  *
976  */
977 	public int
978 pflushmbc()
979 {
980 	int r = 0;
981 
982 	if (mbc_buf_len > 0)
983 	{
984 		/* Flush incomplete (truncated) sequence.  */
985 		r = flush_mbc_buf(mbc_pos);
986 		mbc_buf_len = 0;
987 	}
988 	return r;
989 }
990 
991 /*
992  * Terminate the line in the line buffer.
993  */
994 	public void
995 pdone(endline)
996 	int endline;
997 {
998 	(void) pflushmbc();
999 
1000 	if (pendc && (pendc != '\r' || !endline))
1001 		/*
1002 		 * If we had a pending character, put it in the buffer.
1003 		 * But discard a pending CR if we are at end of line
1004 		 * (that is, discard the CR in a CR/LF sequence).
1005 		 */
1006 		(void) do_append(pendc, NULL, pendpos);
1007 
1008 	/*
1009 	 * Make sure we've shifted the line, if we need to.
1010 	 */
1011 	if (cshift < hshift)
1012 		pshift(hshift - cshift);
1013 
1014 	if (ctldisp == OPT_ONPLUS && is_ansi_end('m'))
1015 	{
1016 		/* Switch to normal attribute at end of line. */
1017 		char *p = "\033[m";
1018 		for ( ;  *p != '\0';  p++)
1019 		{
1020 			linebuf[curr] = *p;
1021 			attr[curr++] = AT_ANSI;
1022 		}
1023 	}
1024 
1025 	/*
1026 	 * Add a newline if necessary,
1027 	 * and append a '\0' to the end of the line.
1028 	 */
1029 	if (column < sc_width || !auto_wrap || ignaw || ctldisp == OPT_ON)
1030 	{
1031 		linebuf[curr] = '\n';
1032 		attr[curr] = AT_NORMAL;
1033 		curr++;
1034 	}
1035 	linebuf[curr] = '\0';
1036 	attr[curr] = AT_NORMAL;
1037 
1038 #if HILITE_SEARCH
1039 	if (status_col && line_matches > 0)
1040 	{
1041 		linebuf[0] = '*';
1042 		attr[0] = AT_NORMAL|AT_HILITE;
1043 	}
1044 #endif
1045 }
1046 
1047 /*
1048  * Get a character from the current line.
1049  * Return the character as the function return value,
1050  * and the character attribute in *ap.
1051  */
1052 	public int
1053 gline(i, ap)
1054 	register int i;
1055 	register int *ap;
1056 {
1057 	if (is_null_line)
1058 	{
1059 		/*
1060 		 * If there is no current line, we pretend the line is
1061 		 * either "~" or "", depending on the "twiddle" flag.
1062 		 */
1063 		if (twiddle)
1064 		{
1065 			if (i == 0)
1066 			{
1067 				*ap = AT_BOLD;
1068 				return '~';
1069 			}
1070 			--i;
1071 		}
1072 		/* Make sure we're back to AT_NORMAL before the '\n'.  */
1073 		*ap = AT_NORMAL;
1074 		return i ? '\0' : '\n';
1075 	}
1076 
1077 	*ap = attr[i];
1078 	return (linebuf[i] & 0xFF);
1079 }
1080 
1081 /*
1082  * Indicate that there is no current line.
1083  */
1084 	public void
1085 null_line()
1086 {
1087 	is_null_line = 1;
1088 	cshift = 0;
1089 }
1090 
1091 /*
1092  * Analogous to forw_line(), but deals with "raw lines":
1093  * lines which are not split for screen width.
1094  * {{ This is supposed to be more efficient than forw_line(). }}
1095  */
1096 	public POSITION
1097 forw_raw_line(curr_pos, linep)
1098 	POSITION curr_pos;
1099 	char **linep;
1100 {
1101 	register int n;
1102 	register int c;
1103 	POSITION new_pos;
1104 
1105 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
1106 		(c = ch_forw_get()) == EOI)
1107 		return (NULL_POSITION);
1108 
1109 	n = 0;
1110 	for (;;)
1111 	{
1112 		if (c == '\n' || c == EOI || ABORT_SIGS())
1113 		{
1114 			new_pos = ch_tell();
1115 			break;
1116 		}
1117 		if (n >= size_linebuf-1)
1118 		{
1119 			if (expand_linebuf())
1120 			{
1121 				/*
1122 				 * Overflowed the input buffer.
1123 				 * Pretend the line ended here.
1124 				 */
1125 				new_pos = ch_tell() - 1;
1126 				break;
1127 			}
1128 		}
1129 		linebuf[n++] = c;
1130 		c = ch_forw_get();
1131 	}
1132 	linebuf[n] = '\0';
1133 	if (linep != NULL)
1134 		*linep = linebuf;
1135 	return (new_pos);
1136 }
1137 
1138 /*
1139  * Analogous to back_line(), but deals with "raw lines".
1140  * {{ This is supposed to be more efficient than back_line(). }}
1141  */
1142 	public POSITION
1143 back_raw_line(curr_pos, linep)
1144 	POSITION curr_pos;
1145 	char **linep;
1146 {
1147 	register int n;
1148 	register int c;
1149 	POSITION new_pos;
1150 
1151 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero() ||
1152 		ch_seek(curr_pos-1))
1153 		return (NULL_POSITION);
1154 
1155 	n = size_linebuf;
1156 	linebuf[--n] = '\0';
1157 	for (;;)
1158 	{
1159 		c = ch_back_get();
1160 		if (c == '\n' || ABORT_SIGS())
1161 		{
1162 			/*
1163 			 * This is the newline ending the previous line.
1164 			 * We have hit the beginning of the line.
1165 			 */
1166 			new_pos = ch_tell() + 1;
1167 			break;
1168 		}
1169 		if (c == EOI)
1170 		{
1171 			/*
1172 			 * We have hit the beginning of the file.
1173 			 * This must be the first line in the file.
1174 			 * This must, of course, be the beginning of the line.
1175 			 */
1176 			new_pos = ch_zero();
1177 			break;
1178 		}
1179 		if (n <= 0)
1180 		{
1181 			int old_size_linebuf = size_linebuf;
1182 			char *fm;
1183 			char *to;
1184 			if (expand_linebuf())
1185 			{
1186 				/*
1187 				 * Overflowed the input buffer.
1188 				 * Pretend the line ended here.
1189 				 */
1190 				new_pos = ch_tell() + 1;
1191 				break;
1192 			}
1193 			/*
1194 			 * Shift the data to the end of the new linebuf.
1195 			 */
1196 			for (fm = linebuf + old_size_linebuf - 1,
1197 			      to = linebuf + size_linebuf - 1;
1198 			     fm >= linebuf;  fm--, to--)
1199 				*to = *fm;
1200 			n = size_linebuf - old_size_linebuf;
1201 		}
1202 		linebuf[--n] = c;
1203 	}
1204 	if (linep != NULL)
1205 		*linep = &linebuf[n];
1206 	return (new_pos);
1207 }
1208