1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifdef _KERNEL 27 #include <sys/types.h> 28 #include <sys/sunddi.h> 29 #else 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <strings.h> 34 #endif 35 #include <sys/u8_textprep.h> 36 #include <smbsrv/alloc.h> 37 #include <smbsrv/string.h> 38 #include <smbsrv/cp_usascii.h> 39 #include <smbsrv/cp_unicode.h> 40 41 #define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0])) 42 43 /* 44 * Global pointer to the current codepage: defaults to ASCII, 45 * and a flag indicating whether the codepage is Unicode or ASCII. 46 */ 47 static smb_codepage_t *current_codepage = usascii_codepage; 48 static boolean_t is_unicode = B_FALSE; 49 50 static smb_codepage_t *smb_unicode_init(void); 51 52 /* 53 * strsubst 54 * 55 * Scan a string replacing all occurrences of orgchar with newchar. 56 * Returns a pointer to s, or null of s is null. 57 */ 58 char * 59 strsubst(char *s, char orgchar, char newchar) 60 { 61 char *p = s; 62 63 if (p == 0) 64 return (0); 65 66 while (*p) { 67 if (*p == orgchar) 68 *p = newchar; 69 ++p; 70 } 71 72 return (s); 73 } 74 75 /* 76 * strcanon 77 * 78 * Normalize a string by reducing all the repeated characters in 79 * buf as defined by class. For example; 80 * 81 * char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt"); 82 * strcanon(buf, "/\\"); 83 * 84 * Would result in buf containing the following string: 85 * 86 * /d1/d2/d3\d4\f1.txt 87 * 88 * This function modifies the contents of buf in place and returns 89 * a pointer to buf. 90 */ 91 char * 92 strcanon(char *buf, const char *class) 93 { 94 char *p = buf; 95 char *q = buf; 96 char *r; 97 98 while (*p) { 99 *q++ = *p; 100 101 if ((r = strchr(class, *p)) != 0) { 102 while (*p == *r) 103 ++p; 104 } else 105 ++p; 106 } 107 108 *q = '\0'; 109 return (buf); 110 } 111 112 void 113 smb_codepage_init(void) 114 { 115 smb_codepage_t *cp; 116 117 if (is_unicode) 118 return; 119 120 if ((cp = smb_unicode_init()) != NULL) { 121 current_codepage = cp; 122 is_unicode = B_TRUE; 123 } else { 124 current_codepage = usascii_codepage; 125 is_unicode = B_FALSE; 126 } 127 } 128 129 /* 130 * Determine whether or not a character is an uppercase character. 131 * This function operates on the current codepage table. Returns 132 * non-zero if the character is uppercase. Otherwise returns zero. 133 */ 134 int 135 smb_isupper(int c) 136 { 137 uint16_t mask = is_unicode ? 0xffff : 0xff; 138 139 return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER); 140 } 141 142 /* 143 * Determine whether or not a character is an lowercase character. 144 * This function operates on the current codepage table. Returns 145 * non-zero if the character is lowercase. Otherwise returns zero. 146 */ 147 int 148 smb_islower(int c) 149 { 150 uint16_t mask = is_unicode ? 0xffff : 0xff; 151 152 return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER); 153 } 154 155 /* 156 * Convert individual characters to their uppercase equivalent value. 157 * If the specified character is lowercase, the uppercase value will 158 * be returned. Otherwise the original value will be returned. 159 */ 160 int 161 smb_toupper(int c) 162 { 163 uint16_t mask = is_unicode ? 0xffff : 0xff; 164 165 return (current_codepage[c & mask].upper); 166 } 167 168 /* 169 * Convert individual characters to their lowercase equivalent value. 170 * If the specified character is uppercase, the lowercase value will 171 * be returned. Otherwise the original value will be returned. 172 */ 173 int 174 smb_tolower(int c) 175 { 176 uint16_t mask = is_unicode ? 0xffff : 0xff; 177 178 return (current_codepage[c & mask].lower); 179 } 180 181 /* 182 * Convert a string to uppercase using the appropriate codepage. The 183 * string is converted in place. A pointer to the string is returned. 184 * There is an assumption here that uppercase and lowercase values 185 * always result encode to the same length. 186 */ 187 char * 188 smb_strupr(char *s) 189 { 190 smb_wchar_t c; 191 char *p = s; 192 193 while (*p) { 194 if (smb_isascii(*p)) { 195 *p = smb_toupper(*p); 196 p++; 197 } else { 198 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 199 return (0); 200 201 if (c == 0) 202 break; 203 204 c = smb_toupper(c); 205 p += smb_wctomb(p, c); 206 } 207 } 208 209 return (s); 210 } 211 212 /* 213 * Convert a string to lowercase using the appropriate codepage. The 214 * string is converted in place. A pointer to the string is returned. 215 * There is an assumption here that uppercase and lowercase values 216 * always result encode to the same length. 217 */ 218 char * 219 smb_strlwr(char *s) 220 { 221 smb_wchar_t c; 222 char *p = s; 223 224 while (*p) { 225 if (smb_isascii(*p)) { 226 *p = smb_tolower(*p); 227 p++; 228 } else { 229 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 230 return (0); 231 232 if (c == 0) 233 break; 234 235 c = smb_tolower(c); 236 p += smb_wctomb(p, c); 237 } 238 } 239 240 return (s); 241 } 242 243 /* 244 * Returns 1 if string contains NO uppercase chars 0 otherwise. However, 245 * -1 is returned if "s" is not a valid multi-byte string. 246 */ 247 int 248 smb_isstrlwr(const char *s) 249 { 250 smb_wchar_t c; 251 int n; 252 const char *p = s; 253 254 while (*p) { 255 if (smb_isascii(*p) && smb_isupper(*p)) 256 return (0); 257 else { 258 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 259 return (-1); 260 261 if (c == 0) 262 break; 263 264 if (smb_isupper(c)) 265 return (0); 266 267 p += n; 268 } 269 } 270 271 return (1); 272 } 273 274 /* 275 * Returns 1 if string contains NO lowercase chars 0 otherwise. However, 276 * -1 is returned if "s" is not a valid multi-byte string. 277 */ 278 int 279 smb_isstrupr(const char *s) 280 { 281 smb_wchar_t c; 282 int n; 283 const char *p = s; 284 285 while (*p) { 286 if (smb_isascii(*p) && smb_islower(*p)) 287 return (0); 288 else { 289 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 290 return (-1); 291 292 if (c == 0) 293 break; 294 295 if (smb_islower(c)) 296 return (0); 297 298 p += n; 299 } 300 } 301 302 return (1); 303 } 304 305 /* 306 * Compare the null-terminated strings s1 and s2 and return an integer 307 * greater than, equal to or less than 0 dependent on whether s1 is 308 * lexicographically greater than, equal to or less than s2 after 309 * translation of each character to lowercase. The original strings 310 * are not modified. 311 * 312 * If n is non-zero, at most n bytes are compared. Otherwise, the strings 313 * are compared until a null terminator is encountered. 314 * 315 * Out: 0 if strings are equal 316 * < 0 if first string < second string 317 * > 0 if first string > second string 318 */ 319 int 320 smb_strcasecmp(const char *s1, const char *s2, size_t n) 321 { 322 int err = 0; 323 int rc; 324 325 rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err); 326 if (err != 0) 327 return (-1); 328 return (rc); 329 } 330 331 /* 332 * First build a codepage based on cp_unicode.h. Then build the unicode 333 * codepage from this interim codepage by copying the entries over while 334 * fixing them and filling in the gaps. 335 */ 336 static smb_codepage_t * 337 smb_unicode_init(void) 338 { 339 smb_codepage_t *unicode; 340 uint32_t a = 0; 341 uint32_t b = 0; 342 343 unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16); 344 if (unicode == NULL) 345 return (NULL); 346 347 while (b != 0xffff) { 348 /* 349 * If there is a gap in the standard, 350 * fill in the gap with no-case entries. 351 */ 352 if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) { 353 unicode[b].ctype = CODEPAGE_ISNONE; 354 unicode[b].upper = (smb_wchar_t)b; 355 unicode[b].lower = (smb_wchar_t)b; 356 b++; 357 continue; 358 } 359 360 /* 361 * Copy the entry and fixup as required. 362 */ 363 switch (a_unicode[a].ctype) { 364 case CODEPAGE_ISNONE: 365 /* 366 * Replace 0xffff in upper/lower fields with its val. 367 */ 368 unicode[b].ctype = CODEPAGE_ISNONE; 369 unicode[b].upper = (smb_wchar_t)b; 370 unicode[b].lower = (smb_wchar_t)b; 371 break; 372 case CODEPAGE_ISUPPER: 373 /* 374 * Some characters may have case yet not have 375 * case conversion. Treat them as no-case. 376 */ 377 if (a_unicode[a].lower == 0xffff) { 378 unicode[b].ctype = CODEPAGE_ISNONE; 379 unicode[b].upper = (smb_wchar_t)b; 380 unicode[b].lower = (smb_wchar_t)b; 381 } else { 382 unicode[b].ctype = CODEPAGE_ISUPPER; 383 unicode[b].upper = (smb_wchar_t)b; 384 unicode[b].lower = a_unicode[a].lower; 385 } 386 break; 387 case CODEPAGE_ISLOWER: 388 /* 389 * Some characters may have case yet not have 390 * case conversion. Treat them as no-case. 391 */ 392 if (a_unicode[a].upper == 0xffff) { 393 unicode[b].ctype = CODEPAGE_ISNONE; 394 unicode[b].upper = (smb_wchar_t)b; 395 unicode[b].lower = (smb_wchar_t)b; 396 } else { 397 unicode[b].ctype = CODEPAGE_ISLOWER; 398 unicode[b].upper = a_unicode[a].upper; 399 unicode[b].lower = (smb_wchar_t)b; 400 } 401 break; 402 default: 403 MEM_FREE("unicode", unicode); 404 return (NULL); 405 } 406 407 a++; 408 b++; 409 }; 410 411 return (unicode); 412 } 413