1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifdef _KERNEL 27 #include <sys/types.h> 28 #include <sys/sunddi.h> 29 #else 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <strings.h> 34 #endif 35 #include <sys/u8_textprep.h> 36 #include <smbsrv/alloc.h> 37 #include <sys/errno.h> 38 #include <smbsrv/string.h> 39 #include <smbsrv/cp_usascii.h> 40 #include <smbsrv/cp_unicode.h> 41 42 #define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0])) 43 44 /* 45 * Global pointer to the current codepage: defaults to ASCII, 46 * and a flag indicating whether the codepage is Unicode or ASCII. 47 */ 48 static smb_codepage_t *current_codepage = usascii_codepage; 49 static boolean_t is_unicode = B_FALSE; 50 51 static smb_codepage_t *smb_unicode_init(void); 52 53 /* 54 * strsubst 55 * 56 * Scan a string replacing all occurrences of orgchar with newchar. 57 * Returns a pointer to s, or null of s is null. 58 */ 59 char * 60 strsubst(char *s, char orgchar, char newchar) 61 { 62 char *p = s; 63 64 if (p == 0) 65 return (0); 66 67 while (*p) { 68 if (*p == orgchar) 69 *p = newchar; 70 ++p; 71 } 72 73 return (s); 74 } 75 76 /* 77 * strcanon 78 * 79 * Normalize a string by reducing all the repeated characters in 80 * buf as defined by class. For example; 81 * 82 * char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt"); 83 * strcanon(buf, "/\\"); 84 * 85 * Would result in buf containing the following string: 86 * 87 * /d1/d2/d3\d4\f1.txt 88 * 89 * This function modifies the contents of buf in place and returns 90 * a pointer to buf. 91 */ 92 char * 93 strcanon(char *buf, const char *class) 94 { 95 char *p = buf; 96 char *q = buf; 97 char *r; 98 99 while (*p) { 100 *q++ = *p; 101 102 if ((r = strchr(class, *p)) != 0) { 103 while (*p == *r) 104 ++p; 105 } else 106 ++p; 107 } 108 109 *q = '\0'; 110 return (buf); 111 } 112 113 void 114 smb_codepage_init(void) 115 { 116 smb_codepage_t *cp; 117 118 if (is_unicode) 119 return; 120 121 if ((cp = smb_unicode_init()) != NULL) { 122 current_codepage = cp; 123 is_unicode = B_TRUE; 124 } else { 125 current_codepage = usascii_codepage; 126 is_unicode = B_FALSE; 127 } 128 } 129 130 /* 131 * Determine whether or not a character is an uppercase character. 132 * This function operates on the current codepage table. Returns 133 * non-zero if the character is uppercase. Otherwise returns zero. 134 */ 135 int 136 smb_isupper(int c) 137 { 138 uint16_t mask = is_unicode ? 0xffff : 0xff; 139 140 return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER); 141 } 142 143 /* 144 * Determine whether or not a character is an lowercase character. 145 * This function operates on the current codepage table. Returns 146 * non-zero if the character is lowercase. Otherwise returns zero. 147 */ 148 int 149 smb_islower(int c) 150 { 151 uint16_t mask = is_unicode ? 0xffff : 0xff; 152 153 return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER); 154 } 155 156 /* 157 * Convert individual characters to their uppercase equivalent value. 158 * If the specified character is lowercase, the uppercase value will 159 * be returned. Otherwise the original value will be returned. 160 */ 161 int 162 smb_toupper(int c) 163 { 164 uint16_t mask = is_unicode ? 0xffff : 0xff; 165 166 return (current_codepage[c & mask].upper); 167 } 168 169 /* 170 * Convert individual characters to their lowercase equivalent value. 171 * If the specified character is uppercase, the lowercase value will 172 * be returned. Otherwise the original value will be returned. 173 */ 174 int 175 smb_tolower(int c) 176 { 177 uint16_t mask = is_unicode ? 0xffff : 0xff; 178 179 return (current_codepage[c & mask].lower); 180 } 181 182 /* 183 * Convert a string to uppercase using the appropriate codepage. The 184 * string is converted in place. A pointer to the string is returned. 185 * There is an assumption here that uppercase and lowercase values 186 * always result encode to the same length. 187 */ 188 char * 189 smb_strupr(char *s) 190 { 191 smb_wchar_t c; 192 char *p = s; 193 194 while (*p) { 195 if (smb_isascii(*p)) { 196 *p = smb_toupper(*p); 197 p++; 198 } else { 199 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 200 return (0); 201 202 if (c == 0) 203 break; 204 205 c = smb_toupper(c); 206 p += smb_wctomb(p, c); 207 } 208 } 209 210 return (s); 211 } 212 213 /* 214 * Convert a string to lowercase using the appropriate codepage. The 215 * string is converted in place. A pointer to the string is returned. 216 * There is an assumption here that uppercase and lowercase values 217 * always result encode to the same length. 218 */ 219 char * 220 smb_strlwr(char *s) 221 { 222 smb_wchar_t c; 223 char *p = s; 224 225 while (*p) { 226 if (smb_isascii(*p)) { 227 *p = smb_tolower(*p); 228 p++; 229 } else { 230 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 231 return (0); 232 233 if (c == 0) 234 break; 235 236 c = smb_tolower(c); 237 p += smb_wctomb(p, c); 238 } 239 } 240 241 return (s); 242 } 243 244 /* 245 * Returns 1 if string contains NO uppercase chars 0 otherwise. However, 246 * -1 is returned if "s" is not a valid multi-byte string. 247 */ 248 int 249 smb_isstrlwr(const char *s) 250 { 251 smb_wchar_t c; 252 int n; 253 const char *p = s; 254 255 while (*p) { 256 if (smb_isascii(*p) && smb_isupper(*p)) 257 return (0); 258 else { 259 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 260 return (-1); 261 262 if (c == 0) 263 break; 264 265 if (smb_isupper(c)) 266 return (0); 267 268 p += n; 269 } 270 } 271 272 return (1); 273 } 274 275 /* 276 * Returns 1 if string contains NO lowercase chars 0 otherwise. However, 277 * -1 is returned if "s" is not a valid multi-byte string. 278 */ 279 int 280 smb_isstrupr(const char *s) 281 { 282 smb_wchar_t c; 283 int n; 284 const char *p = s; 285 286 while (*p) { 287 if (smb_isascii(*p) && smb_islower(*p)) 288 return (0); 289 else { 290 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 291 return (-1); 292 293 if (c == 0) 294 break; 295 296 if (smb_islower(c)) 297 return (0); 298 299 p += n; 300 } 301 } 302 303 return (1); 304 } 305 306 /* 307 * Compare the null-terminated strings s1 and s2 and return an integer 308 * greater than, equal to or less than 0 dependent on whether s1 is 309 * lexicographically greater than, equal to or less than s2 after 310 * translation of each character to lowercase. The original strings 311 * are not modified. 312 * 313 * If n is non-zero, at most n bytes are compared. Otherwise, the strings 314 * are compared until a null terminator is encountered. 315 * 316 * Out: 0 if strings are equal 317 * < 0 if first string < second string 318 * > 0 if first string > second string 319 */ 320 int 321 smb_strcasecmp(const char *s1, const char *s2, size_t n) 322 { 323 int err = 0; 324 int rc; 325 326 rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err); 327 if (err != 0) 328 return (-1); 329 return (rc); 330 } 331 332 /* 333 * First build a codepage based on cp_unicode.h. Then build the unicode 334 * codepage from this interim codepage by copying the entries over while 335 * fixing them and filling in the gaps. 336 */ 337 static smb_codepage_t * 338 smb_unicode_init(void) 339 { 340 smb_codepage_t *unicode; 341 uint32_t a = 0; 342 uint32_t b = 0; 343 344 unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16); 345 if (unicode == NULL) 346 return (NULL); 347 348 while (b != 0xffff) { 349 /* 350 * If there is a gap in the standard, 351 * fill in the gap with no-case entries. 352 */ 353 if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) { 354 unicode[b].ctype = CODEPAGE_ISNONE; 355 unicode[b].upper = (smb_wchar_t)b; 356 unicode[b].lower = (smb_wchar_t)b; 357 b++; 358 continue; 359 } 360 361 /* 362 * Copy the entry and fixup as required. 363 */ 364 switch (a_unicode[a].ctype) { 365 case CODEPAGE_ISNONE: 366 /* 367 * Replace 0xffff in upper/lower fields with its val. 368 */ 369 unicode[b].ctype = CODEPAGE_ISNONE; 370 unicode[b].upper = (smb_wchar_t)b; 371 unicode[b].lower = (smb_wchar_t)b; 372 break; 373 case CODEPAGE_ISUPPER: 374 /* 375 * Some characters may have case yet not have 376 * case conversion. Treat them as no-case. 377 */ 378 if (a_unicode[a].lower == 0xffff) { 379 unicode[b].ctype = CODEPAGE_ISNONE; 380 unicode[b].upper = (smb_wchar_t)b; 381 unicode[b].lower = (smb_wchar_t)b; 382 } else { 383 unicode[b].ctype = CODEPAGE_ISUPPER; 384 unicode[b].upper = (smb_wchar_t)b; 385 unicode[b].lower = a_unicode[a].lower; 386 } 387 break; 388 case CODEPAGE_ISLOWER: 389 /* 390 * Some characters may have case yet not have 391 * case conversion. Treat them as no-case. 392 */ 393 if (a_unicode[a].upper == 0xffff) { 394 unicode[b].ctype = CODEPAGE_ISNONE; 395 unicode[b].upper = (smb_wchar_t)b; 396 unicode[b].lower = (smb_wchar_t)b; 397 } else { 398 unicode[b].ctype = CODEPAGE_ISLOWER; 399 unicode[b].upper = a_unicode[a].upper; 400 unicode[b].lower = (smb_wchar_t)b; 401 } 402 break; 403 default: 404 MEM_FREE("unicode", unicode); 405 return (NULL); 406 } 407 408 a++; 409 b++; 410 }; 411 412 return (unicode); 413 } 414 415 /* 416 * Parse a UNC path (\\server\share\path) into its components. 417 * Although a standard UNC path starts with two '\', in DFS 418 * all UNC paths start with one '\'. So, this function only 419 * checks for one. 420 * 421 * A valid UNC must at least contain two components i.e. server 422 * and share. The path is parsed to: 423 * 424 * unc_server server or domain name with no leading/trailing '\' 425 * unc_share share name with no leading/trailing '\' 426 * unc_path relative path to the share with no leading/trailing '\' 427 * it is valid for unc_path to be NULL. 428 * 429 * Upon successful return of this function, smb_unc_free() 430 * MUST be called when returned 'unc' is no longer needed. 431 * 432 * Returns 0 on success, otherwise returns an errno code. 433 */ 434 int 435 smb_unc_init(const char *path, smb_unc_t *unc) 436 { 437 char *p; 438 439 if (path == NULL || unc == NULL || (*path != '\\' && *path != '/')) 440 return (EINVAL); 441 442 bzero(unc, sizeof (smb_unc_t)); 443 444 #ifdef _KERNEL 445 unc->unc_buf = smb_mem_strdup(path); 446 #else 447 if ((unc->unc_buf = strdup(path)) == NULL) 448 return (ENOMEM); 449 #endif 450 451 (void) strsubst(unc->unc_buf, '\\', '/'); 452 (void) strcanon(unc->unc_buf, "/"); 453 454 unc->unc_server = unc->unc_buf + 1; 455 if (*unc->unc_server == '\0') { 456 smb_unc_free(unc); 457 return (EINVAL); 458 } 459 460 if ((p = strchr(unc->unc_server, '/')) == NULL) { 461 smb_unc_free(unc); 462 return (EINVAL); 463 } 464 465 *p++ = '\0'; 466 unc->unc_share = p; 467 468 if (*unc->unc_share == '\0') { 469 smb_unc_free(unc); 470 return (EINVAL); 471 } 472 473 unc->unc_path = strchr(unc->unc_share, '/'); 474 if ((p = unc->unc_path) == NULL) 475 return (0); 476 477 unc->unc_path++; 478 *p = '\0'; 479 480 /* remove the last '/' if any */ 481 if ((p = strchr(unc->unc_path, '\0')) != NULL) { 482 if (*(--p) == '/') 483 *p = '\0'; 484 } 485 486 return (0); 487 } 488 489 void 490 smb_unc_free(smb_unc_t *unc) 491 { 492 if (unc == NULL) 493 return; 494 495 #ifdef _KERNEL 496 smb_mem_free(unc->unc_buf); 497 #else 498 free(unc->unc_buf); 499 #endif 500 unc->unc_buf = NULL; 501 } 502