1 /*
2 * Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
3 * All rights reserved.
4 *
5 * By using this file, you agree to the terms and conditions set
6 * forth in the LICENSE file which can be found at the top level of
7 * the sendmail distribution.
8 *
9 */
10
11 #include <sm/gen.h>
12 #include <sm/sendmail.h>
13
14 #include <ctype.h>
15 #include <sm/string.h>
16 #include <sm/heap.h>
17 #if USE_EAI
18 # include <sm/limits.h>
19 # include <unicode/ucasemap.h>
20 # include <unicode/ustring.h>
21 # include <unicode/uchar.h>
22 # include <sm/ixlen.h>
23
24 /*
25 ** ASCIISTR -- check whether a string is printable ASCII
26 **
27 ** Parameters:
28 ** str -- string
29 **
30 ** Returns:
31 ** TRUE iff printable ASCII
32 */
33
34 bool
asciistr(str)35 asciistr(str)
36 const char *str;
37 {
38 unsigned char ch;
39
40 if (str == NULL)
41 return true;
42 while ((ch = (unsigned char)*str) != '\0' && ch >= 32 && ch < 127)
43 str++;
44 return ch == '\0';
45 }
46
47 /*
48 ** ASCIINSTR -- check whether a string is printable ASCII up to len
49 **
50 ** Parameters:
51 ** str -- string
52 ** len -- length to check
53 **
54 ** Returns:
55 ** TRUE iff printable ASCII
56 */
57
58 bool
asciinstr(str,len)59 asciinstr(str, len)
60 const char *str;
61 size_t len;
62 {
63 unsigned char ch;
64 int n;
65
66 if (str == NULL)
67 return true;
68 SM_REQUIRE(len < INT_MAX);
69 n = 0;
70 while (n < len && (ch = (unsigned char)*str) != '\0'
71 && ch >= 32 && ch < 127)
72 {
73 n++;
74 str++;
75 }
76 return n == len || ch == '\0';
77 }
78 #endif /* USE_EAI */
79
80 /*
81 ** MAKELOWER -- Translate a line into lower case
82 **
83 ** Parameters:
84 ** p -- string to translate (modified in place if possible). [A]
85 **
86 ** Returns:
87 ** lower cased string
88 **
89 ** Side Effects:
90 ** String p is translated to lower case if possible.
91 */
92
93 char *
makelower(p)94 makelower(p)
95 char *p;
96 {
97 char c;
98 char *orig;
99
100 if (p == NULL)
101 return p;
102 orig = p;
103 #if USE_EAI
104 if (!asciistr(p))
105 return (char *)sm_lowercase(p);
106 #endif
107 for (; (c = *p) != '\0'; p++)
108 if (isascii(c) && isupper(c))
109 *p = tolower(c);
110 return orig;
111 }
112
113 #if USE_EAI
114 /*
115 ** SM_LOWERCASE -- lower case a UTF-8 string
116 ** Note: this should ONLY be applied to a UTF-8 string,
117 ** i.e., the caller should check first if it isn't an ASCII string.
118 **
119 ** Parameters:
120 ** str -- original string
121 **
122 ** Returns:
123 ** lower case version of string [S]
124 **
125 ** How to return an error description due to failed unicode calls?
126 ** However, is that even relevant?
127 */
128
129 char *
sm_lowercase(str)130 sm_lowercase(str)
131 const char *str;
132 {
133 int olen, ilen;
134 UErrorCode error;
135 ssize_t req;
136 int n;
137 static UCaseMap *csm = NULL;
138 static char *out = NULL;
139 static int outlen = 0;
140
141 # if SM_CHECK_REQUIRE
142 if (sm_debug_active(&SmExpensiveRequire, 3))
143 SM_REQUIRE(!asciistr(str));
144 # endif
145 /* an empty string is always ASCII */
146 SM_REQUIRE(NULL != str && '\0' != *str);
147
148 if (NULL == csm)
149 {
150 error = U_ZERO_ERROR;
151 csm = ucasemap_open("en_US", U_FOLD_CASE_DEFAULT, &error);
152 if (U_SUCCESS(error) == 0)
153 {
154 /* syserr("ucasemap_open error: %s", u_errorName(error)); */
155 return NULL;
156 }
157 }
158
159 ilen = strlen(str);
160 olen = ilen + 1;
161 if (olen > outlen)
162 {
163 outlen = olen;
164 out = sm_realloc_x(out, outlen);
165 }
166
167 for (n = 0; n < 3; n++)
168 {
169 error = U_ZERO_ERROR;
170 req = ucasemap_utf8FoldCase(csm, out, olen, str, ilen, &error);
171 if (U_SUCCESS(error))
172 {
173 if (req >= olen)
174 {
175 outlen = req + 1;
176 out = sm_realloc_x(out, outlen);
177 out[req] = '\0';
178 }
179 break;
180 }
181 else if (error == U_BUFFER_OVERFLOW_ERROR)
182 {
183 outlen = req + 1;
184 out = sm_realloc_x(out, outlen);
185 olen = outlen;
186 }
187 else
188 {
189 /* syserr("conversion error for \"%s\": %s", str, u_errorName(error)); */
190 return NULL;
191 }
192 }
193 return out;
194 }
195 #endif /* USE_EAI */
196