xref: /freebsd/contrib/less/cvt.c (revision 9a14aa017b21c292740c00ee098195cd46642730)
1 /*
2  * Copyright (C) 1984-2011  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information about less, or for information on how to
8  * contact the author, see the README file.
9  */
10 
11 /*
12  * Routines to convert text in various ways.  Used by search.
13  */
14 
15 #include "less.h"
16 #include "charset.h"
17 
18 extern int utf_mode;
19 
20 /*
21  * Get the length of a buffer needed to convert a string.
22  */
23 	public int
24 cvt_length(len, ops)
25 	int len;
26 	int ops;
27 {
28 	if (utf_mode)
29 		/*
30 		 * Just copying a string in UTF-8 mode can cause it to grow
31 		 * in length.
32 		 * Four output bytes for one input byte is the worst case.
33 		 */
34 		len *= 4;
35 	return (len + 1);
36 }
37 
38 /*
39  * Allocate a chpos array for use by cvt_text.
40  */
41 	public int *
42 cvt_alloc_chpos(len)
43 	int len;
44 {
45 	int i;
46 	int *chpos = (int *) ecalloc(sizeof(int), len);
47 	/* Initialize all entries to an invalid position. */
48 	for (i = 0;  i < len;  i++)
49 		chpos[i] = -1;
50 	return (chpos);
51 }
52 
53 /*
54  * Convert text.  Perform the transformations specified by ops.
55  * Returns converted text in odst.  The original offset of each
56  * odst character (when it was in osrc) is returned in the chpos array.
57  */
58 	public void
59 cvt_text(odst, osrc, chpos, lenp, ops)
60 	char *odst;
61 	char *osrc;
62 	int *chpos;
63 	int *lenp;
64 	int ops;
65 {
66 	char *dst;
67 	char *src;
68 	register char *src_end;
69 	LWCHAR ch;
70 
71 	if (lenp != NULL)
72 		src_end = osrc + *lenp;
73 	else
74 		src_end = osrc + strlen(osrc);
75 
76 	for (src = osrc, dst = odst;  src < src_end;  )
77 	{
78 		int src_pos = src - osrc;
79 		int dst_pos = dst - odst;
80 		ch = step_char(&src, +1, src_end);
81 		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
82 		{
83 			/* Delete backspace and preceding char. */
84 			do {
85 				dst--;
86 			} while (dst > odst &&
87 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
88 		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch))
89 		{
90 			/* Skip to end of ANSI escape sequence. */
91 			src++;  /* skip the CSI start char */
92 			while (src < src_end)
93 				if (!is_ansi_middle(*src++))
94 					break;
95 		} else
96 		{
97 			/* Just copy the char to the destination buffer. */
98 			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
99 				ch = TO_LOWER(ch);
100 			put_wchar(&dst, ch);
101 			/*
102 			 * Record the original position of the char.
103 			 * But if we've already recorded a position
104 			 * for this char (due to a backspace), leave
105 			 * it alone; if multiple source chars map to
106 			 * one destination char, we want the position
107 			 * of the first one.
108 			 */
109 			if (chpos != NULL && chpos[dst_pos] < 0)
110 				chpos[dst_pos] = src_pos;
111 		}
112 	}
113 	if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r')
114 		dst--;
115 	*dst = '\0';
116 	if (lenp != NULL)
117 		*lenp = dst - odst;
118 	if (chpos != NULL)
119 		chpos[dst - odst] = src - osrc;
120 }
121