xref: /freebsd/contrib/sendmail/libsm/lowercase.c (revision 2fb4f839f3fc72ce2bab12f9ba4760f97f73e97f)
1*2fb4f839SGregory Neil Shapiro /*
2*2fb4f839SGregory Neil Shapiro  * Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
3*2fb4f839SGregory Neil Shapiro  *	All rights reserved.
4*2fb4f839SGregory Neil Shapiro  *
5*2fb4f839SGregory Neil Shapiro  * By using this file, you agree to the terms and conditions set
6*2fb4f839SGregory Neil Shapiro  * forth in the LICENSE file which can be found at the top level of
7*2fb4f839SGregory Neil Shapiro  * the sendmail distribution.
8*2fb4f839SGregory Neil Shapiro  *
9*2fb4f839SGregory Neil Shapiro  */
10*2fb4f839SGregory Neil Shapiro 
11*2fb4f839SGregory Neil Shapiro #include <sm/gen.h>
12*2fb4f839SGregory Neil Shapiro #include <sm/sendmail.h>
13*2fb4f839SGregory Neil Shapiro 
14*2fb4f839SGregory Neil Shapiro #include <ctype.h>
15*2fb4f839SGregory Neil Shapiro #include <sm/string.h>
16*2fb4f839SGregory Neil Shapiro #include <sm/heap.h>
17*2fb4f839SGregory Neil Shapiro #if USE_EAI
18*2fb4f839SGregory Neil Shapiro # include <sm/ixlen.h>
19*2fb4f839SGregory Neil Shapiro # include <unicode/ucasemap.h>
20*2fb4f839SGregory Neil Shapiro # include <unicode/ustring.h>
21*2fb4f839SGregory Neil Shapiro # include <unicode/uchar.h>
22*2fb4f839SGregory Neil Shapiro 
23*2fb4f839SGregory Neil Shapiro /*
24*2fb4f839SGregory Neil Shapiro **  ASCIISTR -- check whether a string is printable ASCII
25*2fb4f839SGregory Neil Shapiro **
26*2fb4f839SGregory Neil Shapiro **	Parameters:
27*2fb4f839SGregory Neil Shapiro **		str -- string
28*2fb4f839SGregory Neil Shapiro **
29*2fb4f839SGregory Neil Shapiro **	Returns:
30*2fb4f839SGregory Neil Shapiro **		TRUE iff printable ASCII
31*2fb4f839SGregory Neil Shapiro */
32*2fb4f839SGregory Neil Shapiro 
33*2fb4f839SGregory Neil Shapiro bool
34*2fb4f839SGregory Neil Shapiro asciistr(str)
35*2fb4f839SGregory Neil Shapiro 	const char *str;
36*2fb4f839SGregory Neil Shapiro {
37*2fb4f839SGregory Neil Shapiro 	unsigned char ch;
38*2fb4f839SGregory Neil Shapiro 
39*2fb4f839SGregory Neil Shapiro 	if  (str == NULL)
40*2fb4f839SGregory Neil Shapiro 		return true;
41*2fb4f839SGregory Neil Shapiro 	while ((ch = (unsigned char)*str) != '\0' && ch >= 32 && ch < 127)
42*2fb4f839SGregory Neil Shapiro 		str++;
43*2fb4f839SGregory Neil Shapiro 	return ch == '\0';
44*2fb4f839SGregory Neil Shapiro }
45*2fb4f839SGregory Neil Shapiro #endif /* USE_EAI */
46*2fb4f839SGregory Neil Shapiro 
47*2fb4f839SGregory Neil Shapiro /*
48*2fb4f839SGregory Neil Shapiro **  MAKELOWER -- Translate a line into lower case
49*2fb4f839SGregory Neil Shapiro **
50*2fb4f839SGregory Neil Shapiro **	Parameters:
51*2fb4f839SGregory Neil Shapiro **		p -- string to translate (modified in place if possible). [A]
52*2fb4f839SGregory Neil Shapiro **
53*2fb4f839SGregory Neil Shapiro **	Returns:
54*2fb4f839SGregory Neil Shapiro **		lower cased string
55*2fb4f839SGregory Neil Shapiro **
56*2fb4f839SGregory Neil Shapiro **	Side Effects:
57*2fb4f839SGregory Neil Shapiro **		String p is translated to lower case if possible.
58*2fb4f839SGregory Neil Shapiro */
59*2fb4f839SGregory Neil Shapiro 
60*2fb4f839SGregory Neil Shapiro char *
61*2fb4f839SGregory Neil Shapiro makelower(p)
62*2fb4f839SGregory Neil Shapiro 	char *p;
63*2fb4f839SGregory Neil Shapiro {
64*2fb4f839SGregory Neil Shapiro 	char c;
65*2fb4f839SGregory Neil Shapiro 	char *orig;
66*2fb4f839SGregory Neil Shapiro 
67*2fb4f839SGregory Neil Shapiro 	if (p == NULL)
68*2fb4f839SGregory Neil Shapiro 		return p;
69*2fb4f839SGregory Neil Shapiro 	orig = p;
70*2fb4f839SGregory Neil Shapiro #if USE_EAI
71*2fb4f839SGregory Neil Shapiro 	if (!asciistr(p))
72*2fb4f839SGregory Neil Shapiro 		return (char *)sm_lowercase(p);
73*2fb4f839SGregory Neil Shapiro #endif
74*2fb4f839SGregory Neil Shapiro 	for (; (c = *p) != '\0'; p++)
75*2fb4f839SGregory Neil Shapiro 		if (isascii(c) && isupper(c))
76*2fb4f839SGregory Neil Shapiro 			*p = tolower(c);
77*2fb4f839SGregory Neil Shapiro 	return orig;
78*2fb4f839SGregory Neil Shapiro }
79*2fb4f839SGregory Neil Shapiro 
80*2fb4f839SGregory Neil Shapiro #if USE_EAI
81*2fb4f839SGregory Neil Shapiro /*
82*2fb4f839SGregory Neil Shapiro **  SM_LOWERCASE -- lower case a UTF-8 string
83*2fb4f839SGregory Neil Shapiro **	Note: this should ONLY be applied to a UTF-8 string,
84*2fb4f839SGregory Neil Shapiro **	i.e., the caller should check first if it isn't an ASCII string.
85*2fb4f839SGregory Neil Shapiro **
86*2fb4f839SGregory Neil Shapiro **	Parameters:
87*2fb4f839SGregory Neil Shapiro **		str -- original string
88*2fb4f839SGregory Neil Shapiro **
89*2fb4f839SGregory Neil Shapiro **	Returns:
90*2fb4f839SGregory Neil Shapiro **		lower case version of string [S]
91*2fb4f839SGregory Neil Shapiro **
92*2fb4f839SGregory Neil Shapiro **	How to return an error description due to failed unicode calls?
93*2fb4f839SGregory Neil Shapiro **	However, is that even relevant?
94*2fb4f839SGregory Neil Shapiro */
95*2fb4f839SGregory Neil Shapiro 
96*2fb4f839SGregory Neil Shapiro char *
97*2fb4f839SGregory Neil Shapiro sm_lowercase(str)
98*2fb4f839SGregory Neil Shapiro 	const char *str;
99*2fb4f839SGregory Neil Shapiro {
100*2fb4f839SGregory Neil Shapiro 	int olen, ilen;
101*2fb4f839SGregory Neil Shapiro 	UErrorCode error;
102*2fb4f839SGregory Neil Shapiro 	ssize_t req;
103*2fb4f839SGregory Neil Shapiro 	int n;
104*2fb4f839SGregory Neil Shapiro 	static UCaseMap *csm = NULL;
105*2fb4f839SGregory Neil Shapiro 	static char *out = NULL;
106*2fb4f839SGregory Neil Shapiro 	static int outlen = 0;
107*2fb4f839SGregory Neil Shapiro 
108*2fb4f839SGregory Neil Shapiro # if SM_CHECK_REQUIRE
109*2fb4f839SGregory Neil Shapiro 	if (sm_debug_active(&SmExpensiveRequire, 3))
110*2fb4f839SGregory Neil Shapiro 		SM_REQUIRE(!asciistr(str));
111*2fb4f839SGregory Neil Shapiro # endif
112*2fb4f839SGregory Neil Shapiro 	/* an empty string is always ASCII */
113*2fb4f839SGregory Neil Shapiro 	SM_REQUIRE(NULL != str && '\0' != *str);
114*2fb4f839SGregory Neil Shapiro 
115*2fb4f839SGregory Neil Shapiro 	if (NULL == csm)
116*2fb4f839SGregory Neil Shapiro 	{
117*2fb4f839SGregory Neil Shapiro 		error = U_ZERO_ERROR;
118*2fb4f839SGregory Neil Shapiro 		csm = ucasemap_open("en_US", U_FOLD_CASE_DEFAULT, &error);
119*2fb4f839SGregory Neil Shapiro 		if (U_SUCCESS(error) == 0)
120*2fb4f839SGregory Neil Shapiro 		{
121*2fb4f839SGregory Neil Shapiro 			/* syserr("ucasemap_open error: %s", u_errorName(error)); */
122*2fb4f839SGregory Neil Shapiro 			return NULL;
123*2fb4f839SGregory Neil Shapiro 		}
124*2fb4f839SGregory Neil Shapiro 	}
125*2fb4f839SGregory Neil Shapiro 
126*2fb4f839SGregory Neil Shapiro 	ilen = strlen(str);
127*2fb4f839SGregory Neil Shapiro 	olen = ilen + 1;
128*2fb4f839SGregory Neil Shapiro 	if (olen > outlen)
129*2fb4f839SGregory Neil Shapiro 	{
130*2fb4f839SGregory Neil Shapiro 		outlen = olen;
131*2fb4f839SGregory Neil Shapiro 		out = sm_realloc_x(out, outlen);
132*2fb4f839SGregory Neil Shapiro 	}
133*2fb4f839SGregory Neil Shapiro 
134*2fb4f839SGregory Neil Shapiro 	for (n = 0; n < 3; n++)
135*2fb4f839SGregory Neil Shapiro 	{
136*2fb4f839SGregory Neil Shapiro 		error = U_ZERO_ERROR;
137*2fb4f839SGregory Neil Shapiro 		req = ucasemap_utf8FoldCase(csm, out, olen, str, ilen, &error);
138*2fb4f839SGregory Neil Shapiro 		if (U_SUCCESS(error))
139*2fb4f839SGregory Neil Shapiro 		{
140*2fb4f839SGregory Neil Shapiro 			if (req >= olen)
141*2fb4f839SGregory Neil Shapiro 			{
142*2fb4f839SGregory Neil Shapiro 				outlen = req + 1;
143*2fb4f839SGregory Neil Shapiro 				out = sm_realloc_x(out, outlen);
144*2fb4f839SGregory Neil Shapiro 				out[req] = '\0';
145*2fb4f839SGregory Neil Shapiro 			}
146*2fb4f839SGregory Neil Shapiro 			break;
147*2fb4f839SGregory Neil Shapiro 		}
148*2fb4f839SGregory Neil Shapiro 		else if (error == U_BUFFER_OVERFLOW_ERROR)
149*2fb4f839SGregory Neil Shapiro 		{
150*2fb4f839SGregory Neil Shapiro 			outlen = req + 1;
151*2fb4f839SGregory Neil Shapiro 			out = sm_realloc_x(out, outlen);
152*2fb4f839SGregory Neil Shapiro 			olen = outlen;
153*2fb4f839SGregory Neil Shapiro 		}
154*2fb4f839SGregory Neil Shapiro 		else
155*2fb4f839SGregory Neil Shapiro 		{
156*2fb4f839SGregory Neil Shapiro 			/* syserr("conversion error for \"%s\": %s", str, u_errorName(error)); */
157*2fb4f839SGregory Neil Shapiro 			return NULL;
158*2fb4f839SGregory Neil Shapiro 		}
159*2fb4f839SGregory Neil Shapiro 	}
160*2fb4f839SGregory Neil Shapiro 	return out;
161*2fb4f839SGregory Neil Shapiro }
162*2fb4f839SGregory Neil Shapiro #endif /* USE_EAI */
163