1 /*
2 * Copyright (C) 1984-2025 Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information, see the README file.
8 */
9
10 /*
11 * Routines to convert text in various ways. Used by search.
12 */
13
14 #include "less.h"
15 #include "charset.h"
16
17 extern int utf_mode;
18
19 /*
20 * Get the length of a buffer needed to convert a string.
21 */
cvt_length(size_t len,int ops)22 public size_t cvt_length(size_t len, int ops)
23 {
24 (void) ops;
25 if (utf_mode)
26 /*
27 * Just copying a string in UTF-8 mode can cause it to grow
28 * in length.
29 * Four output bytes for one input byte is the worst case.
30 */
31 len *= 4;
32 return (len + 1);
33 }
34
35 /*
36 * Allocate a chpos array for use by cvt_text.
37 */
cvt_alloc_chpos(size_t len)38 public int * cvt_alloc_chpos(size_t len)
39 {
40 size_t i;
41 int *chpos = (int *) ecalloc(len, sizeof(int));
42 /* Initialize all entries to an invalid position. */
43 for (i = 0; i < len; i++)
44 chpos[i] = -1;
45 return (chpos);
46 }
47
48 /*
49 * Convert text. Perform the transformations specified by ops.
50 * Returns converted text in odst. The original offset of each
51 * odst character (when it was in osrc) is returned in the chpos array.
52 */
cvt_text(mutable char * odst,constant char * osrc,mutable int * chpos,mutable size_t * lenp,int ops)53 public void cvt_text(mutable char *odst, constant char *osrc, mutable int *chpos, mutable size_t *lenp, int ops)
54 {
55 char *dst;
56 char *edst = odst;
57 constant char *src;
58 constant char *src_end;
59 LWCHAR ch;
60
61 if (lenp != NULL)
62 src_end = osrc + *lenp;
63 else
64 src_end = osrc + strlen(osrc);
65
66 for (src = osrc, dst = odst; src < src_end; )
67 {
68 size_t src_pos = ptr_diff(src, osrc);
69 size_t dst_pos = ptr_diff(dst, odst);
70 struct ansi_state *pansi;
71 ch = step_charc(&src, +1, src_end);
72 if ((ops & CVT_BS) && ch == '\b' && dst > odst)
73 {
74 /* Delete backspace and preceding char. */
75 do {
76 dst--;
77 } while (dst > odst && utf_mode &&
78 !IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
79 } else if ((ops & CVT_ANSI) && (pansi = ansi_start(ch)) != NULL)
80 {
81 /* Skip to end of ANSI escape sequence. */
82 while (src < src_end)
83 {
84 if (ansi_step(pansi, ch) != ANSI_MID)
85 break;
86 ch = (LWCHAR) *src++; /* {{ would step_char work? }} */
87 }
88 ansi_done(pansi);
89 } else
90 {
91 /* Just copy the char to the destination buffer. */
92 char *cdst = dst;
93 if ((ops & CVT_TO_LC) && IS_UPPER(ch))
94 ch = TO_LOWER(ch);
95 put_wchar(&dst, ch);
96 /* Record the original position of the char. */
97 if (chpos != NULL)
98 {
99 while (cdst++ < dst)
100 chpos[dst_pos++] = (int) src_pos; /*{{type-issue}}*/
101 }
102 }
103 if (dst > edst)
104 edst = dst;
105 }
106 if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
107 edst--;
108 *edst = '\0';
109 if (lenp != NULL)
110 *lenp = ptr_diff(edst, odst);
111 /* FIXME: why was this here? if (chpos != NULL) chpos[dst - odst] = src - osrc; */
112 }
113