xref: /freebsd/contrib/less/cvt.c (revision c77c488926555ca344ae3a417544cf7a720e1de1)
1f0be0a1fSXin LI /*
2*c77c4889SXin LI  * Copyright (C) 1984-2024  Mark Nudelman
3f0be0a1fSXin LI  *
4f0be0a1fSXin LI  * You may distribute under the terms of either the GNU General Public
5f0be0a1fSXin LI  * License or the Less License, as specified in the README file.
6f0be0a1fSXin LI  *
796e55cc7SXin LI  * For more information, see the README file.
8f0be0a1fSXin LI  */
9f0be0a1fSXin LI 
10f0be0a1fSXin LI /*
11f0be0a1fSXin LI  * Routines to convert text in various ways.  Used by search.
12f0be0a1fSXin LI  */
13f0be0a1fSXin LI 
14f0be0a1fSXin LI #include "less.h"
15f0be0a1fSXin LI #include "charset.h"
16f0be0a1fSXin LI 
17f0be0a1fSXin LI extern int utf_mode;
18f0be0a1fSXin LI 
19f0be0a1fSXin LI /*
20f0be0a1fSXin LI  * Get the length of a buffer needed to convert a string.
21f0be0a1fSXin LI  */
22*c77c4889SXin LI public size_t cvt_length(size_t len, int ops)
23f0be0a1fSXin LI {
24*c77c4889SXin LI 	(void) ops;
25f0be0a1fSXin LI 	if (utf_mode)
26f0be0a1fSXin LI 		/*
27f0be0a1fSXin LI 		 * Just copying a string in UTF-8 mode can cause it to grow
28f0be0a1fSXin LI 		 * in length.
29f0be0a1fSXin LI 		 * Four output bytes for one input byte is the worst case.
30f0be0a1fSXin LI 		 */
31f0be0a1fSXin LI 		len *= 4;
32f0be0a1fSXin LI 	return (len + 1);
33f0be0a1fSXin LI }
34f0be0a1fSXin LI 
35f0be0a1fSXin LI /*
36f0be0a1fSXin LI  * Allocate a chpos array for use by cvt_text.
37f0be0a1fSXin LI  */
38*c77c4889SXin LI public int * cvt_alloc_chpos(size_t len)
39f0be0a1fSXin LI {
40*c77c4889SXin LI 	size_t i;
41*c77c4889SXin LI 	int *chpos = (int *) ecalloc(len, sizeof(int));
42f0be0a1fSXin LI 	/* Initialize all entries to an invalid position. */
43f0be0a1fSXin LI 	for (i = 0;  i < len;  i++)
44f0be0a1fSXin LI 		chpos[i] = -1;
45f0be0a1fSXin LI 	return (chpos);
46f0be0a1fSXin LI }
47f0be0a1fSXin LI 
48f0be0a1fSXin LI /*
49f0be0a1fSXin LI  * Convert text.  Perform the transformations specified by ops.
50f0be0a1fSXin LI  * Returns converted text in odst.  The original offset of each
51f0be0a1fSXin LI  * odst character (when it was in osrc) is returned in the chpos array.
52f0be0a1fSXin LI  */
53*c77c4889SXin LI public void cvt_text(mutable char *odst, constant char *osrc, mutable int *chpos, mutable size_t *lenp, int ops)
54f0be0a1fSXin LI {
55f0be0a1fSXin LI 	char *dst;
5696e55cc7SXin LI 	char *edst = odst;
57*c77c4889SXin LI 	constant char *src;
58*c77c4889SXin LI 	constant char *src_end;
59f0be0a1fSXin LI 	LWCHAR ch;
60f0be0a1fSXin LI 
61f0be0a1fSXin LI 	if (lenp != NULL)
62f0be0a1fSXin LI 		src_end = osrc + *lenp;
63f0be0a1fSXin LI 	else
64f0be0a1fSXin LI 		src_end = osrc + strlen(osrc);
65f0be0a1fSXin LI 
66f0be0a1fSXin LI 	for (src = osrc, dst = odst;  src < src_end;  )
67f0be0a1fSXin LI 	{
68*c77c4889SXin LI 		size_t src_pos = ptr_diff(src, osrc);
69*c77c4889SXin LI 		size_t dst_pos = ptr_diff(dst, odst);
702235c7feSXin LI 		struct ansi_state *pansi;
71*c77c4889SXin LI 		ch = step_charc(&src, +1, src_end);
72f0be0a1fSXin LI 		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
73f0be0a1fSXin LI 		{
74f0be0a1fSXin LI 			/* Delete backspace and preceding char. */
75f0be0a1fSXin LI 			do {
76f0be0a1fSXin LI 				dst--;
77b2ea2440SXin LI 			} while (dst > odst && utf_mode &&
78f0be0a1fSXin LI 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
792235c7feSXin LI 		} else if ((ops & CVT_ANSI) && (pansi = ansi_start(ch)) != NULL)
80f0be0a1fSXin LI 		{
81f0be0a1fSXin LI 			/* Skip to end of ANSI escape sequence. */
82f0be0a1fSXin LI 			while (src < src_end)
832235c7feSXin LI 			{
842235c7feSXin LI 				if (ansi_step(pansi, ch) != ANSI_MID)
85f0be0a1fSXin LI 					break;
86*c77c4889SXin LI 				ch = (LWCHAR) *src++; /* {{ would step_char work? }} */
872235c7feSXin LI 			}
882235c7feSXin LI 			ansi_done(pansi);
89f0be0a1fSXin LI 		} else
90f0be0a1fSXin LI 		{
91f0be0a1fSXin LI 			/* Just copy the char to the destination buffer. */
92*c77c4889SXin LI 			char *cdst = dst;
93f0be0a1fSXin LI 			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
94f0be0a1fSXin LI 				ch = TO_LOWER(ch);
95f0be0a1fSXin LI 			put_wchar(&dst, ch);
9696e55cc7SXin LI 			/* Record the original position of the char. */
9796e55cc7SXin LI 			if (chpos != NULL)
98*c77c4889SXin LI 			{
99*c77c4889SXin LI 				while (cdst++ < dst)
100*c77c4889SXin LI 					chpos[dst_pos++] = (int) src_pos; /*{{type-issue}}*/
101*c77c4889SXin LI 			}
102f0be0a1fSXin LI 		}
10396e55cc7SXin LI 		if (dst > edst)
10496e55cc7SXin LI 			edst = dst;
105f0be0a1fSXin LI 	}
10696e55cc7SXin LI 	if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
10796e55cc7SXin LI 		edst--;
10896e55cc7SXin LI 	*edst = '\0';
109f0be0a1fSXin LI 	if (lenp != NULL)
110*c77c4889SXin LI 		*lenp = ptr_diff(edst, odst);
11196e55cc7SXin LI 	/* FIXME: why was this here?  if (chpos != NULL) chpos[dst - odst] = src - osrc; */
112f0be0a1fSXin LI }
113