1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #if defined(_KERNEL) || defined(_FAKE_KERNEL) 29 #include <sys/types.h> 30 #include <sys/sunddi.h> 31 #else 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <strings.h> 36 #endif 37 #include <sys/u8_textprep.h> 38 #include <smbsrv/alloc.h> 39 #include <sys/errno.h> 40 #include <smbsrv/string.h> 41 #include <smbsrv/cp_usascii.h> 42 #include <smbsrv/cp_unicode.h> 43 44 #define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0])) 45 46 /* 47 * Global pointer to the current codepage: defaults to ASCII, 48 * and a flag indicating whether the codepage is Unicode or ASCII. 49 */ 50 static const smb_codepage_t *current_codepage = usascii_codepage; 51 static boolean_t is_unicode = B_FALSE; 52 53 static smb_codepage_t *smb_unicode_init(void); 54 55 /* 56 * strsubst 57 * 58 * Scan a string replacing all occurrences of orgchar with newchar. 59 * Returns a pointer to s, or null of s is null. 60 */ 61 char * 62 strsubst(char *s, char orgchar, char newchar) 63 { 64 char *p = s; 65 66 if (p == 0) 67 return (0); 68 69 while (*p) { 70 if (*p == orgchar) 71 *p = newchar; 72 ++p; 73 } 74 75 return (s); 76 } 77 78 /* 79 * strcanon 80 * 81 * Normalize a string by reducing all the repeated characters in 82 * buf as defined by class. For example; 83 * 84 * char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt"); 85 * strcanon(buf, "/\\"); 86 * 87 * Would result in buf containing the following string: 88 * 89 * /d1/d2/d3\d4\f1.txt 90 * 91 * This function modifies the contents of buf in place and returns 92 * a pointer to buf. 93 */ 94 char * 95 strcanon(char *buf, const char *class) 96 { 97 char *p = buf; 98 char *q = buf; 99 char *r; 100 101 while (*p) { 102 *q++ = *p; 103 104 if ((r = strchr(class, *p)) != 0) { 105 while (*p == *r) 106 ++p; 107 } else 108 ++p; 109 } 110 111 *q = '\0'; 112 return (buf); 113 } 114 115 void 116 smb_codepage_init(void) 117 { 118 const smb_codepage_t *cp; 119 120 if (is_unicode) 121 return; 122 123 if ((cp = smb_unicode_init()) != NULL) { 124 current_codepage = cp; 125 is_unicode = B_TRUE; 126 } else { 127 current_codepage = usascii_codepage; 128 is_unicode = B_FALSE; 129 } 130 } 131 132 /* 133 * Determine whether or not a character is an uppercase character. 134 * This function operates on the current codepage table. Returns 135 * non-zero if the character is uppercase. Otherwise returns zero. 136 */ 137 int 138 smb_isupper(int c) 139 { 140 uint16_t mask = is_unicode ? 0xffff : 0xff; 141 142 return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER); 143 } 144 145 /* 146 * Determine whether or not a character is an lowercase character. 147 * This function operates on the current codepage table. Returns 148 * non-zero if the character is lowercase. Otherwise returns zero. 149 */ 150 int 151 smb_islower(int c) 152 { 153 uint16_t mask = is_unicode ? 0xffff : 0xff; 154 155 return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER); 156 } 157 158 /* 159 * Convert individual characters to their uppercase equivalent value. 160 * If the specified character is lowercase, the uppercase value will 161 * be returned. Otherwise the original value will be returned. 162 */ 163 int 164 smb_toupper(int c) 165 { 166 uint16_t mask = is_unicode ? 0xffff : 0xff; 167 168 return (current_codepage[c & mask].upper); 169 } 170 171 /* 172 * Convert individual characters to their lowercase equivalent value. 173 * If the specified character is uppercase, the lowercase value will 174 * be returned. Otherwise the original value will be returned. 175 */ 176 int 177 smb_tolower(int c) 178 { 179 uint16_t mask = is_unicode ? 0xffff : 0xff; 180 181 return (current_codepage[c & mask].lower); 182 } 183 184 /* 185 * Convert a string to uppercase using the appropriate codepage. The 186 * string is converted in place. A pointer to the string is returned. 187 * There is an assumption here that uppercase and lowercase values 188 * always result encode to the same length. 189 */ 190 char * 191 smb_strupr(char *s) 192 { 193 smb_wchar_t c; 194 char *p = s; 195 196 while (*p) { 197 if (smb_isascii(*p)) { 198 *p = smb_toupper(*p); 199 p++; 200 } else { 201 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 202 return (0); 203 204 if (c == 0) 205 break; 206 207 c = smb_toupper(c); 208 p += smb_wctomb(p, c); 209 } 210 } 211 212 return (s); 213 } 214 215 /* 216 * Convert a string to lowercase using the appropriate codepage. The 217 * string is converted in place. A pointer to the string is returned. 218 * There is an assumption here that uppercase and lowercase values 219 * always result encode to the same length. 220 */ 221 char * 222 smb_strlwr(char *s) 223 { 224 smb_wchar_t c; 225 char *p = s; 226 227 while (*p) { 228 if (smb_isascii(*p)) { 229 *p = smb_tolower(*p); 230 p++; 231 } else { 232 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 233 return (0); 234 235 if (c == 0) 236 break; 237 238 c = smb_tolower(c); 239 p += smb_wctomb(p, c); 240 } 241 } 242 243 return (s); 244 } 245 246 /* 247 * Returns 1 if string contains NO uppercase chars 0 otherwise. However, 248 * -1 is returned if "s" is not a valid multi-byte string. 249 */ 250 int 251 smb_isstrlwr(const char *s) 252 { 253 smb_wchar_t c; 254 int n; 255 const char *p = s; 256 257 while (*p) { 258 if (smb_isascii(*p) && smb_isupper(*p)) 259 return (0); 260 else { 261 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 262 return (-1); 263 264 if (c == 0) 265 break; 266 267 if (smb_isupper(c)) 268 return (0); 269 270 p += n; 271 } 272 } 273 274 return (1); 275 } 276 277 /* 278 * Returns 1 if string contains NO lowercase chars 0 otherwise. However, 279 * -1 is returned if "s" is not a valid multi-byte string. 280 */ 281 int 282 smb_isstrupr(const char *s) 283 { 284 smb_wchar_t c; 285 int n; 286 const char *p = s; 287 288 while (*p) { 289 if (smb_isascii(*p) && smb_islower(*p)) 290 return (0); 291 else { 292 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 293 return (-1); 294 295 if (c == 0) 296 break; 297 298 if (smb_islower(c)) 299 return (0); 300 301 p += n; 302 } 303 } 304 305 return (1); 306 } 307 308 /* 309 * Compare the null-terminated strings s1 and s2 and return an integer 310 * greater than, equal to or less than 0 dependent on whether s1 is 311 * lexicographically greater than, equal to or less than s2 after 312 * translation of each character to lowercase. The original strings 313 * are not modified. 314 * 315 * If n is non-zero, at most n bytes are compared. Otherwise, the strings 316 * are compared until a null terminator is encountered. 317 * 318 * Out: 0 if strings are equal 319 * < 0 if first string < second string 320 * > 0 if first string > second string 321 */ 322 int 323 smb_strcasecmp(const char *s1, const char *s2, size_t n) 324 { 325 int err = 0; 326 int rc; 327 328 rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err); 329 if (err != 0) 330 return (-1); 331 return (rc); 332 } 333 334 /* 335 * First build a codepage based on cp_unicode.h. Then build the unicode 336 * codepage from this interim codepage by copying the entries over while 337 * fixing them and filling in the gaps. 338 */ 339 static smb_codepage_t * 340 smb_unicode_init(void) 341 { 342 smb_codepage_t *unicode; 343 uint32_t a = 0; 344 uint32_t b = 0; 345 346 unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16); 347 if (unicode == NULL) 348 return (NULL); 349 350 while (b != 0xffff) { 351 /* 352 * If there is a gap in the standard, 353 * fill in the gap with no-case entries. 354 */ 355 if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) { 356 unicode[b].ctype = CODEPAGE_ISNONE; 357 unicode[b].upper = (smb_wchar_t)b; 358 unicode[b].lower = (smb_wchar_t)b; 359 b++; 360 continue; 361 } 362 363 /* 364 * Copy the entry and fixup as required. 365 */ 366 switch (a_unicode[a].ctype) { 367 case CODEPAGE_ISNONE: 368 /* 369 * Replace 0xffff in upper/lower fields with its val. 370 */ 371 unicode[b].ctype = CODEPAGE_ISNONE; 372 unicode[b].upper = (smb_wchar_t)b; 373 unicode[b].lower = (smb_wchar_t)b; 374 break; 375 case CODEPAGE_ISUPPER: 376 /* 377 * Some characters may have case yet not have 378 * case conversion. Treat them as no-case. 379 */ 380 if (a_unicode[a].lower == 0xffff) { 381 unicode[b].ctype = CODEPAGE_ISNONE; 382 unicode[b].upper = (smb_wchar_t)b; 383 unicode[b].lower = (smb_wchar_t)b; 384 } else { 385 unicode[b].ctype = CODEPAGE_ISUPPER; 386 unicode[b].upper = (smb_wchar_t)b; 387 unicode[b].lower = a_unicode[a].lower; 388 } 389 break; 390 case CODEPAGE_ISLOWER: 391 /* 392 * Some characters may have case yet not have 393 * case conversion. Treat them as no-case. 394 */ 395 if (a_unicode[a].upper == 0xffff) { 396 unicode[b].ctype = CODEPAGE_ISNONE; 397 unicode[b].upper = (smb_wchar_t)b; 398 unicode[b].lower = (smb_wchar_t)b; 399 } else { 400 unicode[b].ctype = CODEPAGE_ISLOWER; 401 unicode[b].upper = a_unicode[a].upper; 402 unicode[b].lower = (smb_wchar_t)b; 403 } 404 break; 405 default: 406 MEM_FREE("unicode", unicode); 407 return (NULL); 408 } 409 410 a++; 411 b++; 412 }; 413 414 return (unicode); 415 } 416 417 /* 418 * Parse a UNC path (\\server\share\path) into its components. 419 * Although a standard UNC path starts with two '\', in DFS 420 * all UNC paths start with one '\'. So, this function only 421 * checks for one. 422 * 423 * A valid UNC must at least contain two components i.e. server 424 * and share. The path is parsed to: 425 * 426 * unc_server server or domain name with no leading/trailing '\' 427 * unc_share share name with no leading/trailing '\' 428 * unc_path relative path to the share with no leading/trailing '\' 429 * it is valid for unc_path to be NULL. 430 * 431 * Upon successful return of this function, smb_unc_free() 432 * MUST be called when returned 'unc' is no longer needed. 433 * 434 * Returns 0 on success, otherwise returns an errno code. 435 */ 436 int 437 smb_unc_init(const char *path, smb_unc_t *unc) 438 { 439 char *p; 440 441 if (path == NULL || unc == NULL || (*path != '\\' && *path != '/')) 442 return (EINVAL); 443 444 bzero(unc, sizeof (smb_unc_t)); 445 446 #if defined(_KERNEL) || defined(_FAKE_KERNEL) 447 unc->unc_buf = smb_mem_strdup(path); 448 #else 449 if ((unc->unc_buf = strdup(path)) == NULL) 450 return (ENOMEM); 451 #endif 452 453 (void) strsubst(unc->unc_buf, '\\', '/'); 454 (void) strcanon(unc->unc_buf, "/"); 455 456 unc->unc_server = unc->unc_buf + 1; 457 if (*unc->unc_server == '\0') { 458 smb_unc_free(unc); 459 return (EINVAL); 460 } 461 462 if ((p = strchr(unc->unc_server, '/')) == NULL) { 463 smb_unc_free(unc); 464 return (EINVAL); 465 } 466 467 *p++ = '\0'; 468 unc->unc_share = p; 469 470 if (*unc->unc_share == '\0') { 471 smb_unc_free(unc); 472 return (EINVAL); 473 } 474 475 unc->unc_path = strchr(unc->unc_share, '/'); 476 if ((p = unc->unc_path) == NULL) 477 return (0); 478 479 unc->unc_path++; 480 *p = '\0'; 481 482 /* remove the last '/' if any */ 483 if ((p = strchr(unc->unc_path, '\0')) != NULL) { 484 if (*(--p) == '/') 485 *p = '\0'; 486 } 487 488 return (0); 489 } 490 491 void 492 smb_unc_free(smb_unc_t *unc) 493 { 494 if (unc == NULL) 495 return; 496 497 #if defined(_KERNEL) || defined(_FAKE_KERNEL) 498 smb_mem_free(unc->unc_buf); 499 #else 500 free(unc->unc_buf); 501 #endif 502 unc->unc_buf = NULL; 503 } 504