1 /* 2 * Copyright (c) 2020 Proofpoint, Inc. and its suppliers. 3 * All rights reserved. 4 * 5 * By using this file, you agree to the terms and conditions set 6 * forth in the LICENSE file which can be found at the top level of 7 * the sendmail distribution. 8 * 9 */ 10 11 #include <sm/gen.h> 12 #include <sm/sendmail.h> 13 14 #include <ctype.h> 15 #include <sm/string.h> 16 #include <sm/heap.h> 17 #if USE_EAI 18 # include <sm/limits.h> 19 # include <unicode/ucasemap.h> 20 # include <unicode/ustring.h> 21 # include <unicode/uchar.h> 22 # include <sm/ixlen.h> 23 24 /* 25 ** ASCIISTR -- check whether a string is printable ASCII 26 ** 27 ** Parameters: 28 ** str -- string 29 ** 30 ** Returns: 31 ** TRUE iff printable ASCII 32 */ 33 34 bool 35 asciistr(str) 36 const char *str; 37 { 38 unsigned char ch; 39 40 if (str == NULL) 41 return true; 42 while ((ch = (unsigned char)*str) != '\0' && ch >= 32 && ch < 127) 43 str++; 44 return ch == '\0'; 45 } 46 47 /* 48 ** ASCIINSTR -- check whether a string is printable ASCII up to len 49 ** 50 ** Parameters: 51 ** str -- string 52 ** len -- length to check 53 ** 54 ** Returns: 55 ** TRUE iff printable ASCII 56 */ 57 58 bool 59 asciinstr(str, len) 60 const char *str; 61 size_t len; 62 { 63 unsigned char ch; 64 int n; 65 66 if (str == NULL) 67 return true; 68 SM_REQUIRE(len < INT_MAX); 69 n = 0; 70 while (n < len && (ch = (unsigned char)*str) != '\0' 71 && ch >= 32 && ch < 127) 72 { 73 n++; 74 str++; 75 } 76 return n == len || ch == '\0'; 77 } 78 #endif /* USE_EAI */ 79 80 /* 81 ** MAKELOWER -- Translate a line into lower case 82 ** 83 ** Parameters: 84 ** p -- string to translate (modified in place if possible). [A] 85 ** 86 ** Returns: 87 ** lower cased string 88 ** 89 ** Side Effects: 90 ** String p is translated to lower case if possible. 91 */ 92 93 char * 94 makelower(p) 95 char *p; 96 { 97 char c; 98 char *orig; 99 100 if (p == NULL) 101 return p; 102 orig = p; 103 #if USE_EAI 104 if (!asciistr(p)) 105 return (char *)sm_lowercase(p); 106 #endif 107 for (; (c = *p) != '\0'; p++) 108 if (isascii(c) && isupper(c)) 109 *p = tolower(c); 110 return orig; 111 } 112 113 #if USE_EAI 114 /* 115 ** SM_LOWERCASE -- lower case a UTF-8 string 116 ** Note: this should ONLY be applied to a UTF-8 string, 117 ** i.e., the caller should check first if it isn't an ASCII string. 118 ** 119 ** Parameters: 120 ** str -- original string 121 ** 122 ** Returns: 123 ** lower case version of string [S] 124 ** 125 ** How to return an error description due to failed unicode calls? 126 ** However, is that even relevant? 127 */ 128 129 char * 130 sm_lowercase(str) 131 const char *str; 132 { 133 int olen, ilen; 134 UErrorCode error; 135 ssize_t req; 136 int n; 137 static UCaseMap *csm = NULL; 138 static char *out = NULL; 139 static int outlen = 0; 140 141 # if SM_CHECK_REQUIRE 142 if (sm_debug_active(&SmExpensiveRequire, 3)) 143 SM_REQUIRE(!asciistr(str)); 144 # endif 145 /* an empty string is always ASCII */ 146 SM_REQUIRE(NULL != str && '\0' != *str); 147 148 if (NULL == csm) 149 { 150 error = U_ZERO_ERROR; 151 csm = ucasemap_open("en_US", U_FOLD_CASE_DEFAULT, &error); 152 if (U_SUCCESS(error) == 0) 153 { 154 /* syserr("ucasemap_open error: %s", u_errorName(error)); */ 155 return NULL; 156 } 157 } 158 159 ilen = strlen(str); 160 olen = ilen + 1; 161 if (olen > outlen) 162 { 163 outlen = olen; 164 out = sm_realloc_x(out, outlen); 165 } 166 167 for (n = 0; n < 3; n++) 168 { 169 error = U_ZERO_ERROR; 170 req = ucasemap_utf8FoldCase(csm, out, olen, str, ilen, &error); 171 if (U_SUCCESS(error)) 172 { 173 if (req >= olen) 174 { 175 outlen = req + 1; 176 out = sm_realloc_x(out, outlen); 177 out[req] = '\0'; 178 } 179 break; 180 } 181 else if (error == U_BUFFER_OVERFLOW_ERROR) 182 { 183 outlen = req + 1; 184 out = sm_realloc_x(out, outlen); 185 olen = outlen; 186 } 187 else 188 { 189 /* syserr("conversion error for \"%s\": %s", str, u_errorName(error)); */ 190 return NULL; 191 } 192 } 193 return out; 194 } 195 #endif /* USE_EAI */ 196