xref: /freebsd/contrib/less/cvt.c (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1 /*
2  * Copyright (C) 1984-2022  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to convert text in various ways.  Used by search.
12  */
13 
14 #include "less.h"
15 #include "charset.h"
16 
17 extern int utf_mode;
18 
19 /*
20  * Get the length of a buffer needed to convert a string.
21  */
22 	public int
23 cvt_length(len, ops)
24 	int len;
25 	int ops;
26 {
27 	if (utf_mode)
28 		/*
29 		 * Just copying a string in UTF-8 mode can cause it to grow
30 		 * in length.
31 		 * Four output bytes for one input byte is the worst case.
32 		 */
33 		len *= 4;
34 	return (len + 1);
35 }
36 
37 /*
38  * Allocate a chpos array for use by cvt_text.
39  */
40 	public int *
41 cvt_alloc_chpos(len)
42 	int len;
43 {
44 	int i;
45 	int *chpos = (int *) ecalloc(sizeof(int), len);
46 	/* Initialize all entries to an invalid position. */
47 	for (i = 0;  i < len;  i++)
48 		chpos[i] = -1;
49 	return (chpos);
50 }
51 
52 /*
53  * Convert text.  Perform the transformations specified by ops.
54  * Returns converted text in odst.  The original offset of each
55  * odst character (when it was in osrc) is returned in the chpos array.
56  */
57 	public void
58 cvt_text(odst, osrc, chpos, lenp, ops)
59 	char *odst;
60 	char *osrc;
61 	int *chpos;
62 	int *lenp;
63 	int ops;
64 {
65 	char *dst;
66 	char *edst = odst;
67 	char *src;
68 	char *src_end;
69 	LWCHAR ch;
70 
71 	if (lenp != NULL)
72 		src_end = osrc + *lenp;
73 	else
74 		src_end = osrc + strlen(osrc);
75 
76 	for (src = osrc, dst = odst;  src < src_end;  )
77 	{
78 		int src_pos = (int) (src - osrc);
79 		int dst_pos = (int) (dst - odst);
80 		struct ansi_state *pansi;
81 		ch = step_char(&src, +1, src_end);
82 		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
83 		{
84 			/* Delete backspace and preceding char. */
85 			do {
86 				dst--;
87 			} while (dst > odst && utf_mode &&
88 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
89 		} else if ((ops & CVT_ANSI) && (pansi = ansi_start(ch)) != NULL)
90 		{
91 			/* Skip to end of ANSI escape sequence. */
92 			while (src < src_end)
93 			{
94 				if (ansi_step(pansi, ch) != ANSI_MID)
95 					break;
96 				ch = *src++;
97 			}
98 			ansi_done(pansi);
99 		} else
100 		{
101 			/* Just copy the char to the destination buffer. */
102 			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
103 				ch = TO_LOWER(ch);
104 			put_wchar(&dst, ch);
105 			/* Record the original position of the char. */
106 			if (chpos != NULL)
107 				chpos[dst_pos] = src_pos;
108 		}
109 		if (dst > edst)
110 			edst = dst;
111 	}
112 	if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
113 		edst--;
114 	*edst = '\0';
115 	if (lenp != NULL)
116 		*lenp = (int) (edst - odst);
117 	/* FIXME: why was this here?  if (chpos != NULL) chpos[dst - odst] = src - osrc; */
118 }
119