xref: /freebsd/contrib/less/cvt.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1 /*
2  * Copyright (C) 1984-2024  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to convert text in various ways.  Used by search.
12  */
13 
14 #include "less.h"
15 #include "charset.h"
16 
17 extern int utf_mode;
18 
19 /*
20  * Get the length of a buffer needed to convert a string.
21  */
22 public size_t cvt_length(size_t len, int ops)
23 {
24 	(void) ops;
25 	if (utf_mode)
26 		/*
27 		 * Just copying a string in UTF-8 mode can cause it to grow
28 		 * in length.
29 		 * Four output bytes for one input byte is the worst case.
30 		 */
31 		len *= 4;
32 	return (len + 1);
33 }
34 
35 /*
36  * Allocate a chpos array for use by cvt_text.
37  */
38 public int * cvt_alloc_chpos(size_t len)
39 {
40 	size_t i;
41 	int *chpos = (int *) ecalloc(len, sizeof(int));
42 	/* Initialize all entries to an invalid position. */
43 	for (i = 0;  i < len;  i++)
44 		chpos[i] = -1;
45 	return (chpos);
46 }
47 
48 /*
49  * Convert text.  Perform the transformations specified by ops.
50  * Returns converted text in odst.  The original offset of each
51  * odst character (when it was in osrc) is returned in the chpos array.
52  */
53 public void cvt_text(mutable char *odst, constant char *osrc, mutable int *chpos, mutable size_t *lenp, int ops)
54 {
55 	char *dst;
56 	char *edst = odst;
57 	constant char *src;
58 	constant char *src_end;
59 	LWCHAR ch;
60 
61 	if (lenp != NULL)
62 		src_end = osrc + *lenp;
63 	else
64 		src_end = osrc + strlen(osrc);
65 
66 	for (src = osrc, dst = odst;  src < src_end;  )
67 	{
68 		size_t src_pos = ptr_diff(src, osrc);
69 		size_t dst_pos = ptr_diff(dst, odst);
70 		struct ansi_state *pansi;
71 		ch = step_charc(&src, +1, src_end);
72 		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
73 		{
74 			/* Delete backspace and preceding char. */
75 			do {
76 				dst--;
77 			} while (dst > odst && utf_mode &&
78 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
79 		} else if ((ops & CVT_ANSI) && (pansi = ansi_start(ch)) != NULL)
80 		{
81 			/* Skip to end of ANSI escape sequence. */
82 			while (src < src_end)
83 			{
84 				if (ansi_step(pansi, ch) != ANSI_MID)
85 					break;
86 				ch = (LWCHAR) *src++; /* {{ would step_char work? }} */
87 			}
88 			ansi_done(pansi);
89 		} else
90 		{
91 			/* Just copy the char to the destination buffer. */
92 			char *cdst = dst;
93 			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
94 				ch = TO_LOWER(ch);
95 			put_wchar(&dst, ch);
96 			/* Record the original position of the char. */
97 			if (chpos != NULL)
98 			{
99 				while (cdst++ < dst)
100 					chpos[dst_pos++] = (int) src_pos; /*{{type-issue}}*/
101 			}
102 		}
103 		if (dst > edst)
104 			edst = dst;
105 	}
106 	if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
107 		edst--;
108 	*edst = '\0';
109 	if (lenp != NULL)
110 		*lenp = ptr_diff(edst, odst);
111 	/* FIXME: why was this here?  if (chpos != NULL) chpos[dst - odst] = src - osrc; */
112 }
113