xref: /freebsd/contrib/less/cvt.c (revision e6bfd18d21b225af6a0ed67ceeaf1293b7b9eba5)
1 /*
2  * Copyright (C) 1984-2023  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to convert text in various ways.  Used by search.
12  */
13 
14 #include "less.h"
15 #include "charset.h"
16 
17 extern int utf_mode;
18 
19 /*
20  * Get the length of a buffer needed to convert a string.
21  */
22 public int cvt_length(int len, int ops)
23 {
24 	if (utf_mode)
25 		/*
26 		 * Just copying a string in UTF-8 mode can cause it to grow
27 		 * in length.
28 		 * Four output bytes for one input byte is the worst case.
29 		 */
30 		len *= 4;
31 	return (len + 1);
32 }
33 
34 /*
35  * Allocate a chpos array for use by cvt_text.
36  */
37 public int * cvt_alloc_chpos(int len)
38 {
39 	int i;
40 	int *chpos = (int *) ecalloc(sizeof(int), len);
41 	/* Initialize all entries to an invalid position. */
42 	for (i = 0;  i < len;  i++)
43 		chpos[i] = -1;
44 	return (chpos);
45 }
46 
47 /*
48  * Convert text.  Perform the transformations specified by ops.
49  * Returns converted text in odst.  The original offset of each
50  * odst character (when it was in osrc) is returned in the chpos array.
51  */
52 public void cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops)
53 {
54 	char *dst;
55 	char *edst = odst;
56 	char *src;
57 	char *src_end;
58 	LWCHAR ch;
59 
60 	if (lenp != NULL)
61 		src_end = osrc + *lenp;
62 	else
63 		src_end = osrc + strlen(osrc);
64 
65 	for (src = osrc, dst = odst;  src < src_end;  )
66 	{
67 		int src_pos = (int) (src - osrc);
68 		int dst_pos = (int) (dst - odst);
69 		struct ansi_state *pansi;
70 		ch = step_char(&src, +1, src_end);
71 		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
72 		{
73 			/* Delete backspace and preceding char. */
74 			do {
75 				dst--;
76 			} while (dst > odst && utf_mode &&
77 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
78 		} else if ((ops & CVT_ANSI) && (pansi = ansi_start(ch)) != NULL)
79 		{
80 			/* Skip to end of ANSI escape sequence. */
81 			while (src < src_end)
82 			{
83 				if (ansi_step(pansi, ch) != ANSI_MID)
84 					break;
85 				ch = *src++;
86 			}
87 			ansi_done(pansi);
88 		} else
89 		{
90 			/* Just copy the char to the destination buffer. */
91 			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
92 				ch = TO_LOWER(ch);
93 			put_wchar(&dst, ch);
94 			/* Record the original position of the char. */
95 			if (chpos != NULL)
96 				chpos[dst_pos] = src_pos;
97 		}
98 		if (dst > edst)
99 			edst = dst;
100 	}
101 	if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
102 		edst--;
103 	*edst = '\0';
104 	if (lenp != NULL)
105 		*lenp = (int) (edst - odst);
106 	/* FIXME: why was this here?  if (chpos != NULL) chpos[dst - odst] = src - osrc; */
107 }
108