xref: /freebsd/contrib/sendmail/libsm/lowercase.c (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 /*
2  * Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
3  *	All rights reserved.
4  *
5  * By using this file, you agree to the terms and conditions set
6  * forth in the LICENSE file which can be found at the top level of
7  * the sendmail distribution.
8  *
9  */
10 
11 #include <sm/gen.h>
12 #include <sm/sendmail.h>
13 
14 #include <ctype.h>
15 #include <sm/string.h>
16 #include <sm/heap.h>
17 #if USE_EAI
18 # include <sm/ixlen.h>
19 # include <unicode/ucasemap.h>
20 # include <unicode/ustring.h>
21 # include <unicode/uchar.h>
22 
23 /*
24 **  ASCIISTR -- check whether a string is printable ASCII
25 **
26 **	Parameters:
27 **		str -- string
28 **
29 **	Returns:
30 **		TRUE iff printable ASCII
31 */
32 
33 bool
34 asciistr(str)
35 	const char *str;
36 {
37 	unsigned char ch;
38 
39 	if  (str == NULL)
40 		return true;
41 	while ((ch = (unsigned char)*str) != '\0' && ch >= 32 && ch < 127)
42 		str++;
43 	return ch == '\0';
44 }
45 #endif /* USE_EAI */
46 
47 /*
48 **  MAKELOWER -- Translate a line into lower case
49 **
50 **	Parameters:
51 **		p -- string to translate (modified in place if possible). [A]
52 **
53 **	Returns:
54 **		lower cased string
55 **
56 **	Side Effects:
57 **		String p is translated to lower case if possible.
58 */
59 
60 char *
61 makelower(p)
62 	char *p;
63 {
64 	char c;
65 	char *orig;
66 
67 	if (p == NULL)
68 		return p;
69 	orig = p;
70 #if USE_EAI
71 	if (!asciistr(p))
72 		return (char *)sm_lowercase(p);
73 #endif
74 	for (; (c = *p) != '\0'; p++)
75 		if (isascii(c) && isupper(c))
76 			*p = tolower(c);
77 	return orig;
78 }
79 
80 #if USE_EAI
81 /*
82 **  SM_LOWERCASE -- lower case a UTF-8 string
83 **	Note: this should ONLY be applied to a UTF-8 string,
84 **	i.e., the caller should check first if it isn't an ASCII string.
85 **
86 **	Parameters:
87 **		str -- original string
88 **
89 **	Returns:
90 **		lower case version of string [S]
91 **
92 **	How to return an error description due to failed unicode calls?
93 **	However, is that even relevant?
94 */
95 
96 char *
97 sm_lowercase(str)
98 	const char *str;
99 {
100 	int olen, ilen;
101 	UErrorCode error;
102 	ssize_t req;
103 	int n;
104 	static UCaseMap *csm = NULL;
105 	static char *out = NULL;
106 	static int outlen = 0;
107 
108 # if SM_CHECK_REQUIRE
109 	if (sm_debug_active(&SmExpensiveRequire, 3))
110 		SM_REQUIRE(!asciistr(str));
111 # endif
112 	/* an empty string is always ASCII */
113 	SM_REQUIRE(NULL != str && '\0' != *str);
114 
115 	if (NULL == csm)
116 	{
117 		error = U_ZERO_ERROR;
118 		csm = ucasemap_open("en_US", U_FOLD_CASE_DEFAULT, &error);
119 		if (U_SUCCESS(error) == 0)
120 		{
121 			/* syserr("ucasemap_open error: %s", u_errorName(error)); */
122 			return NULL;
123 		}
124 	}
125 
126 	ilen = strlen(str);
127 	olen = ilen + 1;
128 	if (olen > outlen)
129 	{
130 		outlen = olen;
131 		out = sm_realloc_x(out, outlen);
132 	}
133 
134 	for (n = 0; n < 3; n++)
135 	{
136 		error = U_ZERO_ERROR;
137 		req = ucasemap_utf8FoldCase(csm, out, olen, str, ilen, &error);
138 		if (U_SUCCESS(error))
139 		{
140 			if (req >= olen)
141 			{
142 				outlen = req + 1;
143 				out = sm_realloc_x(out, outlen);
144 				out[req] = '\0';
145 			}
146 			break;
147 		}
148 		else if (error == U_BUFFER_OVERFLOW_ERROR)
149 		{
150 			outlen = req + 1;
151 			out = sm_realloc_x(out, outlen);
152 			olen = outlen;
153 		}
154 		else
155 		{
156 			/* syserr("conversion error for \"%s\": %s", str, u_errorName(error)); */
157 			return NULL;
158 		}
159 	}
160 	return out;
161 }
162 #endif /* USE_EAI */
163