xref: /freebsd/contrib/sendmail/libsm/lowercase.c (revision 8aac90f18aef7c9eea906c3ff9a001ca7b94f375)
1 /*
2  * Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
3  *	All rights reserved.
4  *
5  * By using this file, you agree to the terms and conditions set
6  * forth in the LICENSE file which can be found at the top level of
7  * the sendmail distribution.
8  *
9  */
10 
11 #include <sm/gen.h>
12 #include <sm/sendmail.h>
13 
14 #include <ctype.h>
15 #include <sm/string.h>
16 #include <sm/heap.h>
17 #if USE_EAI
18 # include <sm/limits.h>
19 # include <unicode/ucasemap.h>
20 # include <unicode/ustring.h>
21 # include <unicode/uchar.h>
22 # include <sm/ixlen.h>
23 
24 /*
25 **  ASCIISTR -- check whether a string is printable ASCII
26 **
27 **	Parameters:
28 **		str -- string
29 **
30 **	Returns:
31 **		TRUE iff printable ASCII
32 */
33 
34 bool
35 asciistr(str)
36 	const char *str;
37 {
38 	unsigned char ch;
39 
40 	if  (str == NULL)
41 		return true;
42 	while ((ch = (unsigned char)*str) != '\0' && ch >= 32 && ch < 127)
43 		str++;
44 	return ch == '\0';
45 }
46 
47 /*
48 **  ASCIINSTR -- check whether a string is printable ASCII up to len
49 **
50 **	Parameters:
51 **		str -- string
52 **		len -- length to check
53 **
54 **	Returns:
55 **		TRUE iff printable ASCII
56 */
57 
58 bool
59 asciinstr(str, len)
60 	const char *str;
61 	size_t len;
62 {
63 	unsigned char ch;
64 	int n;
65 
66 	if (str == NULL)
67 		return true;
68 	SM_REQUIRE(len < INT_MAX);
69 	n = 0;
70 	while (n < len && (ch = (unsigned char)*str) != '\0'
71 	       && ch >= 32 && ch < 127)
72 	{
73 		n++;
74 		str++;
75 	}
76 	return n == len || ch == '\0';
77 }
78 #endif /* USE_EAI */
79 
80 /*
81 **  MAKELOWER -- Translate a line into lower case
82 **
83 **	Parameters:
84 **		p -- string to translate (modified in place if possible). [A]
85 **
86 **	Returns:
87 **		lower cased string
88 **
89 **	Side Effects:
90 **		String p is translated to lower case if possible.
91 */
92 
93 char *
94 makelower(p)
95 	char *p;
96 {
97 	char c;
98 	char *orig;
99 
100 	if (p == NULL)
101 		return p;
102 	orig = p;
103 #if USE_EAI
104 	if (!asciistr(p))
105 		return (char *)sm_lowercase(p);
106 #endif
107 	for (; (c = *p) != '\0'; p++)
108 		if (isascii(c) && isupper(c))
109 			*p = tolower(c);
110 	return orig;
111 }
112 
113 #if USE_EAI
114 /*
115 **  SM_LOWERCASE -- lower case a UTF-8 string
116 **	Note: this should ONLY be applied to a UTF-8 string,
117 **	i.e., the caller should check first if it isn't an ASCII string.
118 **
119 **	Parameters:
120 **		str -- original string
121 **
122 **	Returns:
123 **		lower case version of string [S]
124 **
125 **	How to return an error description due to failed unicode calls?
126 **	However, is that even relevant?
127 */
128 
129 char *
130 sm_lowercase(str)
131 	const char *str;
132 {
133 	int olen, ilen;
134 	UErrorCode error;
135 	ssize_t req;
136 	int n;
137 	static UCaseMap *csm = NULL;
138 	static char *out = NULL;
139 	static int outlen = 0;
140 
141 # if SM_CHECK_REQUIRE
142 	if (sm_debug_active(&SmExpensiveRequire, 3))
143 		SM_REQUIRE(!asciistr(str));
144 # endif
145 	/* an empty string is always ASCII */
146 	SM_REQUIRE(NULL != str && '\0' != *str);
147 
148 	if (NULL == csm)
149 	{
150 		error = U_ZERO_ERROR;
151 		csm = ucasemap_open("en_US", U_FOLD_CASE_DEFAULT, &error);
152 		if (U_SUCCESS(error) == 0)
153 		{
154 			/* syserr("ucasemap_open error: %s", u_errorName(error)); */
155 			return NULL;
156 		}
157 	}
158 
159 	ilen = strlen(str);
160 	olen = ilen + 1;
161 	if (olen > outlen)
162 	{
163 		outlen = olen;
164 		out = sm_realloc_x(out, outlen);
165 	}
166 
167 	for (n = 0; n < 3; n++)
168 	{
169 		error = U_ZERO_ERROR;
170 		req = ucasemap_utf8FoldCase(csm, out, olen, str, ilen, &error);
171 		if (U_SUCCESS(error))
172 		{
173 			if (req >= olen)
174 			{
175 				outlen = req + 1;
176 				out = sm_realloc_x(out, outlen);
177 				out[req] = '\0';
178 			}
179 			break;
180 		}
181 		else if (error == U_BUFFER_OVERFLOW_ERROR)
182 		{
183 			outlen = req + 1;
184 			out = sm_realloc_x(out, outlen);
185 			olen = outlen;
186 		}
187 		else
188 		{
189 			/* syserr("conversion error for \"%s\": %s", str, u_errorName(error)); */
190 			return NULL;
191 		}
192 	}
193 	return out;
194 }
195 #endif /* USE_EAI */
196