1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifdef _KERNEL 28 #include <sys/types.h> 29 #include <sys/sunddi.h> 30 #else 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <strings.h> 35 #endif 36 #include <sys/u8_textprep.h> 37 #include <smbsrv/alloc.h> 38 #include <sys/errno.h> 39 #include <smbsrv/string.h> 40 #include <smbsrv/cp_usascii.h> 41 #include <smbsrv/cp_unicode.h> 42 43 #define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0])) 44 45 /* 46 * Global pointer to the current codepage: defaults to ASCII, 47 * and a flag indicating whether the codepage is Unicode or ASCII. 48 */ 49 static const smb_codepage_t *current_codepage = usascii_codepage; 50 static boolean_t is_unicode = B_FALSE; 51 52 static smb_codepage_t *smb_unicode_init(void); 53 54 /* 55 * strsubst 56 * 57 * Scan a string replacing all occurrences of orgchar with newchar. 58 * Returns a pointer to s, or null of s is null. 59 */ 60 char * 61 strsubst(char *s, char orgchar, char newchar) 62 { 63 char *p = s; 64 65 if (p == 0) 66 return (0); 67 68 while (*p) { 69 if (*p == orgchar) 70 *p = newchar; 71 ++p; 72 } 73 74 return (s); 75 } 76 77 /* 78 * strcanon 79 * 80 * Normalize a string by reducing all the repeated characters in 81 * buf as defined by class. For example; 82 * 83 * char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt"); 84 * strcanon(buf, "/\\"); 85 * 86 * Would result in buf containing the following string: 87 * 88 * /d1/d2/d3\d4\f1.txt 89 * 90 * This function modifies the contents of buf in place and returns 91 * a pointer to buf. 92 */ 93 char * 94 strcanon(char *buf, const char *class) 95 { 96 char *p = buf; 97 char *q = buf; 98 char *r; 99 100 while (*p) { 101 *q++ = *p; 102 103 if ((r = strchr(class, *p)) != 0) { 104 while (*p == *r) 105 ++p; 106 } else 107 ++p; 108 } 109 110 *q = '\0'; 111 return (buf); 112 } 113 114 void 115 smb_codepage_init(void) 116 { 117 const smb_codepage_t *cp; 118 119 if (is_unicode) 120 return; 121 122 if ((cp = smb_unicode_init()) != NULL) { 123 current_codepage = cp; 124 is_unicode = B_TRUE; 125 } else { 126 current_codepage = usascii_codepage; 127 is_unicode = B_FALSE; 128 } 129 } 130 131 /* 132 * Determine whether or not a character is an uppercase character. 133 * This function operates on the current codepage table. Returns 134 * non-zero if the character is uppercase. Otherwise returns zero. 135 */ 136 int 137 smb_isupper(int c) 138 { 139 uint16_t mask = is_unicode ? 0xffff : 0xff; 140 141 return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER); 142 } 143 144 /* 145 * Determine whether or not a character is an lowercase character. 146 * This function operates on the current codepage table. Returns 147 * non-zero if the character is lowercase. Otherwise returns zero. 148 */ 149 int 150 smb_islower(int c) 151 { 152 uint16_t mask = is_unicode ? 0xffff : 0xff; 153 154 return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER); 155 } 156 157 /* 158 * Convert individual characters to their uppercase equivalent value. 159 * If the specified character is lowercase, the uppercase value will 160 * be returned. Otherwise the original value will be returned. 161 */ 162 int 163 smb_toupper(int c) 164 { 165 uint16_t mask = is_unicode ? 0xffff : 0xff; 166 167 return (current_codepage[c & mask].upper); 168 } 169 170 /* 171 * Convert individual characters to their lowercase equivalent value. 172 * If the specified character is uppercase, the lowercase value will 173 * be returned. Otherwise the original value will be returned. 174 */ 175 int 176 smb_tolower(int c) 177 { 178 uint16_t mask = is_unicode ? 0xffff : 0xff; 179 180 return (current_codepage[c & mask].lower); 181 } 182 183 /* 184 * Convert a string to uppercase using the appropriate codepage. The 185 * string is converted in place. A pointer to the string is returned. 186 * There is an assumption here that uppercase and lowercase values 187 * always result encode to the same length. 188 */ 189 char * 190 smb_strupr(char *s) 191 { 192 smb_wchar_t c; 193 char *p = s; 194 195 while (*p) { 196 if (smb_isascii(*p)) { 197 *p = smb_toupper(*p); 198 p++; 199 } else { 200 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 201 return (0); 202 203 if (c == 0) 204 break; 205 206 c = smb_toupper(c); 207 p += smb_wctomb(p, c); 208 } 209 } 210 211 return (s); 212 } 213 214 /* 215 * Convert a string to lowercase using the appropriate codepage. The 216 * string is converted in place. A pointer to the string is returned. 217 * There is an assumption here that uppercase and lowercase values 218 * always result encode to the same length. 219 */ 220 char * 221 smb_strlwr(char *s) 222 { 223 smb_wchar_t c; 224 char *p = s; 225 226 while (*p) { 227 if (smb_isascii(*p)) { 228 *p = smb_tolower(*p); 229 p++; 230 } else { 231 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 232 return (0); 233 234 if (c == 0) 235 break; 236 237 c = smb_tolower(c); 238 p += smb_wctomb(p, c); 239 } 240 } 241 242 return (s); 243 } 244 245 /* 246 * Returns 1 if string contains NO uppercase chars 0 otherwise. However, 247 * -1 is returned if "s" is not a valid multi-byte string. 248 */ 249 int 250 smb_isstrlwr(const char *s) 251 { 252 smb_wchar_t c; 253 int n; 254 const char *p = s; 255 256 while (*p) { 257 if (smb_isascii(*p) && smb_isupper(*p)) 258 return (0); 259 else { 260 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 261 return (-1); 262 263 if (c == 0) 264 break; 265 266 if (smb_isupper(c)) 267 return (0); 268 269 p += n; 270 } 271 } 272 273 return (1); 274 } 275 276 /* 277 * Returns 1 if string contains NO lowercase chars 0 otherwise. However, 278 * -1 is returned if "s" is not a valid multi-byte string. 279 */ 280 int 281 smb_isstrupr(const char *s) 282 { 283 smb_wchar_t c; 284 int n; 285 const char *p = s; 286 287 while (*p) { 288 if (smb_isascii(*p) && smb_islower(*p)) 289 return (0); 290 else { 291 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 292 return (-1); 293 294 if (c == 0) 295 break; 296 297 if (smb_islower(c)) 298 return (0); 299 300 p += n; 301 } 302 } 303 304 return (1); 305 } 306 307 /* 308 * Compare the null-terminated strings s1 and s2 and return an integer 309 * greater than, equal to or less than 0 dependent on whether s1 is 310 * lexicographically greater than, equal to or less than s2 after 311 * translation of each character to lowercase. The original strings 312 * are not modified. 313 * 314 * If n is non-zero, at most n bytes are compared. Otherwise, the strings 315 * are compared until a null terminator is encountered. 316 * 317 * Out: 0 if strings are equal 318 * < 0 if first string < second string 319 * > 0 if first string > second string 320 */ 321 int 322 smb_strcasecmp(const char *s1, const char *s2, size_t n) 323 { 324 int err = 0; 325 int rc; 326 327 rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err); 328 if (err != 0) 329 return (-1); 330 return (rc); 331 } 332 333 /* 334 * First build a codepage based on cp_unicode.h. Then build the unicode 335 * codepage from this interim codepage by copying the entries over while 336 * fixing them and filling in the gaps. 337 */ 338 static smb_codepage_t * 339 smb_unicode_init(void) 340 { 341 smb_codepage_t *unicode; 342 uint32_t a = 0; 343 uint32_t b = 0; 344 345 unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16); 346 if (unicode == NULL) 347 return (NULL); 348 349 while (b != 0xffff) { 350 /* 351 * If there is a gap in the standard, 352 * fill in the gap with no-case entries. 353 */ 354 if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) { 355 unicode[b].ctype = CODEPAGE_ISNONE; 356 unicode[b].upper = (smb_wchar_t)b; 357 unicode[b].lower = (smb_wchar_t)b; 358 b++; 359 continue; 360 } 361 362 /* 363 * Copy the entry and fixup as required. 364 */ 365 switch (a_unicode[a].ctype) { 366 case CODEPAGE_ISNONE: 367 /* 368 * Replace 0xffff in upper/lower fields with its val. 369 */ 370 unicode[b].ctype = CODEPAGE_ISNONE; 371 unicode[b].upper = (smb_wchar_t)b; 372 unicode[b].lower = (smb_wchar_t)b; 373 break; 374 case CODEPAGE_ISUPPER: 375 /* 376 * Some characters may have case yet not have 377 * case conversion. Treat them as no-case. 378 */ 379 if (a_unicode[a].lower == 0xffff) { 380 unicode[b].ctype = CODEPAGE_ISNONE; 381 unicode[b].upper = (smb_wchar_t)b; 382 unicode[b].lower = (smb_wchar_t)b; 383 } else { 384 unicode[b].ctype = CODEPAGE_ISUPPER; 385 unicode[b].upper = (smb_wchar_t)b; 386 unicode[b].lower = a_unicode[a].lower; 387 } 388 break; 389 case CODEPAGE_ISLOWER: 390 /* 391 * Some characters may have case yet not have 392 * case conversion. Treat them as no-case. 393 */ 394 if (a_unicode[a].upper == 0xffff) { 395 unicode[b].ctype = CODEPAGE_ISNONE; 396 unicode[b].upper = (smb_wchar_t)b; 397 unicode[b].lower = (smb_wchar_t)b; 398 } else { 399 unicode[b].ctype = CODEPAGE_ISLOWER; 400 unicode[b].upper = a_unicode[a].upper; 401 unicode[b].lower = (smb_wchar_t)b; 402 } 403 break; 404 default: 405 MEM_FREE("unicode", unicode); 406 return (NULL); 407 } 408 409 a++; 410 b++; 411 }; 412 413 return (unicode); 414 } 415 416 /* 417 * Parse a UNC path (\\server\share\path) into its components. 418 * Although a standard UNC path starts with two '\', in DFS 419 * all UNC paths start with one '\'. So, this function only 420 * checks for one. 421 * 422 * A valid UNC must at least contain two components i.e. server 423 * and share. The path is parsed to: 424 * 425 * unc_server server or domain name with no leading/trailing '\' 426 * unc_share share name with no leading/trailing '\' 427 * unc_path relative path to the share with no leading/trailing '\' 428 * it is valid for unc_path to be NULL. 429 * 430 * Upon successful return of this function, smb_unc_free() 431 * MUST be called when returned 'unc' is no longer needed. 432 * 433 * Returns 0 on success, otherwise returns an errno code. 434 */ 435 int 436 smb_unc_init(const char *path, smb_unc_t *unc) 437 { 438 char *p; 439 440 if (path == NULL || unc == NULL || (*path != '\\' && *path != '/')) 441 return (EINVAL); 442 443 bzero(unc, sizeof (smb_unc_t)); 444 445 #ifdef _KERNEL 446 unc->unc_buf = smb_mem_strdup(path); 447 #else 448 if ((unc->unc_buf = strdup(path)) == NULL) 449 return (ENOMEM); 450 #endif 451 452 (void) strsubst(unc->unc_buf, '\\', '/'); 453 (void) strcanon(unc->unc_buf, "/"); 454 455 unc->unc_server = unc->unc_buf + 1; 456 if (*unc->unc_server == '\0') { 457 smb_unc_free(unc); 458 return (EINVAL); 459 } 460 461 if ((p = strchr(unc->unc_server, '/')) == NULL) { 462 smb_unc_free(unc); 463 return (EINVAL); 464 } 465 466 *p++ = '\0'; 467 unc->unc_share = p; 468 469 if (*unc->unc_share == '\0') { 470 smb_unc_free(unc); 471 return (EINVAL); 472 } 473 474 unc->unc_path = strchr(unc->unc_share, '/'); 475 if ((p = unc->unc_path) == NULL) 476 return (0); 477 478 unc->unc_path++; 479 *p = '\0'; 480 481 /* remove the last '/' if any */ 482 if ((p = strchr(unc->unc_path, '\0')) != NULL) { 483 if (*(--p) == '/') 484 *p = '\0'; 485 } 486 487 return (0); 488 } 489 490 void 491 smb_unc_free(smb_unc_t *unc) 492 { 493 if (unc == NULL) 494 return; 495 496 #ifdef _KERNEL 497 smb_mem_free(unc->unc_buf); 498 #else 499 free(unc->unc_buf); 500 #endif 501 unc->unc_buf = NULL; 502 } 503