1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * 4 * Copyright (c) International Business Machines Corp., 2000,2009 5 * Modified by Steve French (sfrench@us.ibm.com) 6 */ 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include "cifs_fs_sb.h" 10 #include "cifs_unicode.h" 11 #include "cifsglob.h" 12 #include "cifs_debug.h" 13 14 /* Convert character using the SFU - "Services for Unix" remapping range */ 15 static bool 16 convert_sfu_char(const __u16 src_char, char *target) 17 { 18 /* 19 * BB: Cannot handle remapping UNI_SLASH until all the calls to 20 * build_path_from_dentry are modified, as they use slash as 21 * separator. 22 */ 23 switch (src_char) { 24 case UNI_COLON: 25 *target = ':'; 26 break; 27 case UNI_ASTERISK: 28 *target = '*'; 29 break; 30 case UNI_QUESTION: 31 *target = '?'; 32 break; 33 case UNI_PIPE: 34 *target = '|'; 35 break; 36 case UNI_GRTRTHAN: 37 *target = '>'; 38 break; 39 case UNI_LESSTHAN: 40 *target = '<'; 41 break; 42 default: 43 return false; 44 } 45 return true; 46 } 47 48 /* Convert character using the SFM - "Services for Mac" remapping range */ 49 static bool 50 convert_sfm_char(const __u16 src_char, char *target) 51 { 52 if (src_char >= 0xF001 && src_char <= 0xF01F) { 53 *target = src_char - 0xF000; 54 return true; 55 } 56 switch (src_char) { 57 case SFM_COLON: 58 *target = ':'; 59 break; 60 case SFM_DOUBLEQUOTE: 61 *target = '"'; 62 break; 63 case SFM_ASTERISK: 64 *target = '*'; 65 break; 66 case SFM_QUESTION: 67 *target = '?'; 68 break; 69 case SFM_PIPE: 70 *target = '|'; 71 break; 72 case SFM_GRTRTHAN: 73 *target = '>'; 74 break; 75 case SFM_LESSTHAN: 76 *target = '<'; 77 break; 78 case SFM_SPACE: 79 *target = ' '; 80 break; 81 case SFM_PERIOD: 82 *target = '.'; 83 break; 84 default: 85 return false; 86 } 87 return true; 88 } 89 90 91 /* 92 * cifs_mapchar - convert a host-endian char to proper char in codepage 93 * @target - where converted character should be copied 94 * @src_char - 2 byte host-endian source character 95 * @cp - codepage to which character should be converted 96 * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2? 97 * 98 * This function handles the conversion of a single character. It is the 99 * responsibility of the caller to ensure that the target buffer is large 100 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). 101 */ 102 static int 103 cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp, 104 int maptype) 105 { 106 int len = 1; 107 __u16 src_char; 108 109 src_char = *from; 110 111 if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) 112 return len; 113 else if ((maptype == SFU_MAP_UNI_RSVD) && 114 convert_sfu_char(src_char, target)) 115 return len; 116 117 /* if character not one of seven in special remap set */ 118 len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); 119 if (len <= 0) 120 goto surrogate_pair; 121 122 return len; 123 124 surrogate_pair: 125 /* convert SURROGATE_PAIR and IVS */ 126 if (strcmp(cp->charset, "utf8")) 127 goto unknown; 128 len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6); 129 if (len <= 0) 130 goto unknown; 131 return len; 132 133 unknown: 134 *target = '?'; 135 len = 1; 136 return len; 137 } 138 139 /* 140 * cifs_from_utf16 - convert utf16le string to local charset 141 * @to - destination buffer 142 * @from - source buffer 143 * @tolen - destination buffer size (in bytes) 144 * @fromlen - source buffer size (in bytes) 145 * @codepage - codepage to which characters should be converted 146 * @mapchar - should characters be remapped according to the mapchars option? 147 * 148 * Convert a little-endian utf16le string (as sent by the server) to a string 149 * in the provided codepage. The tolen and fromlen parameters are to ensure 150 * that the code doesn't walk off of the end of the buffer (which is always 151 * a danger if the alignment of the source buffer is off). The destination 152 * string is always properly null terminated and fits in the destination 153 * buffer. Returns the length of the destination string in bytes (including 154 * null terminator). 155 * 156 * Note that some windows versions actually send multiword UTF-16 characters 157 * instead of straight UTF16-2. The linux nls routines however aren't able to 158 * deal with those characters properly. In the event that we get some of 159 * those characters, they won't be translated properly. 160 */ 161 int 162 cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, 163 const struct nls_table *codepage, int map_type) 164 { 165 int i, charlen, safelen; 166 int outlen = 0; 167 int nullsize = nls_nullsize(codepage); 168 int fromwords = fromlen / 2; 169 char tmp[NLS_MAX_CHARSET_SIZE]; 170 __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ 171 172 /* 173 * because the chars can be of varying widths, we need to take care 174 * not to overflow the destination buffer when we get close to the 175 * end of it. Until we get to this offset, we don't need to check 176 * for overflow however. 177 */ 178 safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); 179 180 for (i = 0; i < fromwords; i++) { 181 ftmp[0] = get_unaligned_le16(&from[i]); 182 if (ftmp[0] == 0) 183 break; 184 if (i + 1 < fromwords) 185 ftmp[1] = get_unaligned_le16(&from[i + 1]); 186 else 187 ftmp[1] = 0; 188 if (i + 2 < fromwords) 189 ftmp[2] = get_unaligned_le16(&from[i + 2]); 190 else 191 ftmp[2] = 0; 192 193 /* 194 * check to see if converting this character might make the 195 * conversion bleed into the null terminator 196 */ 197 if (outlen >= safelen) { 198 charlen = cifs_mapchar(tmp, ftmp, codepage, map_type); 199 if ((outlen + charlen) > (tolen - nullsize)) 200 break; 201 } 202 203 /* put converted char into 'to' buffer */ 204 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); 205 outlen += charlen; 206 207 /* charlen (=bytes of UTF-8 for 1 character) 208 * 4bytes UTF-8(surrogate pair) is charlen=4 209 * (4bytes UTF-16 code) 210 * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 211 * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */ 212 if (charlen == 4) 213 i++; 214 else if (charlen >= 5) 215 /* 5-6bytes UTF-8 */ 216 i += 2; 217 } 218 219 /* properly null-terminate string */ 220 for (i = 0; i < nullsize; i++) 221 to[outlen++] = 0; 222 223 return outlen; 224 } 225 226 /* 227 * NAME: cifs_strtoUTF16() 228 * 229 * FUNCTION: Convert character string to unicode string 230 * 231 */ 232 int 233 cifs_strtoUTF16(__le16 *to, const char *from, int len, 234 const struct nls_table *codepage) 235 { 236 int charlen; 237 int i; 238 wchar_t wchar_to; /* needed to quiet sparse */ 239 240 /* special case for utf8 to handle no plane0 chars */ 241 if (!strcmp(codepage->charset, "utf8")) { 242 /* 243 * convert utf8 -> utf16, we assume we have enough space 244 * as caller should have assumed conversion does not overflow 245 * in destination len is length in wchar_t units (16bits) 246 */ 247 i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN, 248 (wchar_t *) to, len); 249 250 /* if success terminate and exit */ 251 if (i >= 0) 252 goto success; 253 /* 254 * if fails fall back to UCS encoding as this 255 * function should not return negative values 256 * currently can fail only if source contains 257 * invalid encoded characters 258 */ 259 } 260 261 for (i = 0; len && *from; i++, from += charlen, len -= charlen) { 262 charlen = codepage->char2uni(from, len, &wchar_to); 263 if (charlen < 1) { 264 cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n", 265 *from, charlen); 266 /* A question mark */ 267 wchar_to = 0x003f; 268 charlen = 1; 269 } 270 put_unaligned_le16(wchar_to, &to[i]); 271 } 272 273 success: 274 put_unaligned_le16(0, &to[i]); 275 return i; 276 } 277 278 /* 279 * cifs_utf16_bytes - how long will a string be after conversion? 280 * @utf16 - pointer to input string 281 * @maxbytes - don't go past this many bytes of input string 282 * @codepage - destination codepage 283 * 284 * Walk a utf16le string and return the number of bytes that the string will 285 * be after being converted to the given charset, not including any null 286 * termination required. Don't walk past maxbytes in the source buffer. 287 */ 288 int 289 cifs_utf16_bytes(const __le16 *from, int maxbytes, 290 const struct nls_table *codepage) 291 { 292 int i; 293 int charlen, outlen = 0; 294 int maxwords = maxbytes / 2; 295 char tmp[NLS_MAX_CHARSET_SIZE]; 296 __u16 ftmp[3]; 297 298 for (i = 0; i < maxwords; i++) { 299 ftmp[0] = get_unaligned_le16(&from[i]); 300 if (ftmp[0] == 0) 301 break; 302 if (i + 1 < maxwords) 303 ftmp[1] = get_unaligned_le16(&from[i + 1]); 304 else 305 ftmp[1] = 0; 306 if (i + 2 < maxwords) 307 ftmp[2] = get_unaligned_le16(&from[i + 2]); 308 else 309 ftmp[2] = 0; 310 311 charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD); 312 outlen += charlen; 313 } 314 315 return outlen; 316 } 317 318 /* 319 * cifs_strndup_from_utf16 - copy a string from wire format to the local 320 * codepage 321 * @src - source string 322 * @maxlen - don't walk past this many bytes in the source string 323 * @is_unicode - is this a unicode string? 324 * @codepage - destination codepage 325 * 326 * Take a string given by the server, convert it to the local codepage and 327 * put it in a new buffer. Returns a pointer to the new string or NULL on 328 * error. 329 */ 330 char * 331 cifs_strndup_from_utf16(const char *src, const int maxlen, 332 const bool is_unicode, const struct nls_table *codepage) 333 { 334 int len; 335 char *dst; 336 337 if (is_unicode) { 338 len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage); 339 len += nls_nullsize(codepage); 340 dst = kmalloc(len, GFP_KERNEL); 341 if (!dst) 342 return NULL; 343 cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, 344 NO_MAP_UNI_RSVD); 345 } else { 346 dst = kstrndup(src, maxlen, GFP_KERNEL); 347 } 348 349 return dst; 350 } 351 352 static __le16 convert_to_sfu_char(char src_char) 353 { 354 __le16 dest_char; 355 356 switch (src_char) { 357 case ':': 358 dest_char = cpu_to_le16(UNI_COLON); 359 break; 360 case '*': 361 dest_char = cpu_to_le16(UNI_ASTERISK); 362 break; 363 case '?': 364 dest_char = cpu_to_le16(UNI_QUESTION); 365 break; 366 case '<': 367 dest_char = cpu_to_le16(UNI_LESSTHAN); 368 break; 369 case '>': 370 dest_char = cpu_to_le16(UNI_GRTRTHAN); 371 break; 372 case '|': 373 dest_char = cpu_to_le16(UNI_PIPE); 374 break; 375 default: 376 dest_char = 0; 377 } 378 379 return dest_char; 380 } 381 382 static __le16 convert_to_sfm_char(char src_char, bool end_of_string) 383 { 384 __le16 dest_char; 385 386 if (src_char >= 0x01 && src_char <= 0x1F) { 387 dest_char = cpu_to_le16(src_char + 0xF000); 388 return dest_char; 389 } 390 switch (src_char) { 391 case ':': 392 dest_char = cpu_to_le16(SFM_COLON); 393 break; 394 case '"': 395 dest_char = cpu_to_le16(SFM_DOUBLEQUOTE); 396 break; 397 case '*': 398 dest_char = cpu_to_le16(SFM_ASTERISK); 399 break; 400 case '?': 401 dest_char = cpu_to_le16(SFM_QUESTION); 402 break; 403 case '<': 404 dest_char = cpu_to_le16(SFM_LESSTHAN); 405 break; 406 case '>': 407 dest_char = cpu_to_le16(SFM_GRTRTHAN); 408 break; 409 case '|': 410 dest_char = cpu_to_le16(SFM_PIPE); 411 break; 412 case '.': 413 if (end_of_string) 414 dest_char = cpu_to_le16(SFM_PERIOD); 415 else 416 dest_char = 0; 417 break; 418 case ' ': 419 if (end_of_string) 420 dest_char = cpu_to_le16(SFM_SPACE); 421 else 422 dest_char = 0; 423 break; 424 default: 425 dest_char = 0; 426 } 427 428 return dest_char; 429 } 430 431 /* 432 * Convert 16 bit Unicode pathname to wire format from string in current code 433 * page. Conversion may involve remapping up the six characters that are 434 * only legal in POSIX-like OS (if they are present in the string). Path 435 * names are little endian 16 bit Unicode on the wire 436 */ 437 int 438 cifsConvertToUTF16(__le16 *target, const char *source, int srclen, 439 const struct nls_table *cp, int map_chars) 440 { 441 int i, charlen; 442 int j = 0; 443 char src_char; 444 __le16 dst_char; 445 wchar_t tmp; 446 wchar_t *wchar_to; /* UTF-16 */ 447 int ret; 448 unicode_t u; 449 450 if (map_chars == NO_MAP_UNI_RSVD) 451 return cifs_strtoUTF16(target, source, PATH_MAX, cp); 452 453 wchar_to = kzalloc(6, GFP_KERNEL); 454 455 for (i = 0; i < srclen; j++) { 456 src_char = source[i]; 457 charlen = 1; 458 459 /* check if end of string */ 460 if (src_char == 0) 461 goto ctoUTF16_out; 462 463 /* see if we must remap this char */ 464 if (map_chars == SFU_MAP_UNI_RSVD) 465 dst_char = convert_to_sfu_char(src_char); 466 else if (map_chars == SFM_MAP_UNI_RSVD) { 467 bool end_of_string; 468 469 /** 470 * Remap spaces and periods found at the end of every 471 * component of the path. The special cases of '.' and 472 * '..' are need to be handled because of symlinks. 473 * They are treated as non-end-of-string to avoid 474 * remapping and breaking symlinks pointing to . or .. 475 **/ 476 if ((i == 0 || source[i-1] == '\\') && 477 source[i] == '.' && 478 (i == srclen-1 || source[i+1] == '\\')) 479 end_of_string = false; /* "." case */ 480 else if (i >= 1 && 481 (i == 1 || source[i-2] == '\\') && 482 source[i-1] == '.' && 483 source[i] == '.' && 484 (i == srclen-1 || source[i+1] == '\\')) 485 end_of_string = false; /* ".." case */ 486 else if ((i == srclen - 1) || (source[i+1] == '\\')) 487 end_of_string = true; 488 else 489 end_of_string = false; 490 491 dst_char = convert_to_sfm_char(src_char, end_of_string); 492 } else 493 dst_char = 0; 494 /* 495 * FIXME: We can not handle remapping backslash (UNI_SLASH) 496 * until all the calls to build_path_from_dentry are modified, 497 * as they use backslash as separator. 498 */ 499 if (dst_char == 0) { 500 charlen = cp->char2uni(source + i, srclen - i, &tmp); 501 dst_char = cpu_to_le16(tmp); 502 503 /* 504 * if no match, use question mark, which at least in 505 * some cases serves as wild card 506 */ 507 if (charlen > 0) 508 goto ctoUTF16; 509 510 /* convert SURROGATE_PAIR */ 511 if (strcmp(cp->charset, "utf8") || !wchar_to) 512 goto unknown; 513 if (*(source + i) & 0x80) { 514 charlen = utf8_to_utf32(source + i, 6, &u); 515 if (charlen < 0) 516 goto unknown; 517 } else 518 goto unknown; 519 ret = utf8s_to_utf16s(source + i, charlen, 520 UTF16_LITTLE_ENDIAN, 521 wchar_to, 6); 522 if (ret < 0) 523 goto unknown; 524 525 i += charlen; 526 dst_char = cpu_to_le16(*wchar_to); 527 if (charlen <= 3) 528 /* 1-3bytes UTF-8 to 2bytes UTF-16 */ 529 put_unaligned(dst_char, &target[j]); 530 else if (charlen == 4) { 531 /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16 532 * 7-8bytes UTF-8(IVS) divided to 2 UTF-16 533 * (charlen=3+4 or 4+4) */ 534 put_unaligned(dst_char, &target[j]); 535 dst_char = cpu_to_le16(*(wchar_to + 1)); 536 j++; 537 put_unaligned(dst_char, &target[j]); 538 } else if (charlen >= 5) { 539 /* 5-6bytes UTF-8 to 6bytes UTF-16 */ 540 put_unaligned(dst_char, &target[j]); 541 dst_char = cpu_to_le16(*(wchar_to + 1)); 542 j++; 543 put_unaligned(dst_char, &target[j]); 544 dst_char = cpu_to_le16(*(wchar_to + 2)); 545 j++; 546 put_unaligned(dst_char, &target[j]); 547 } 548 continue; 549 550 unknown: 551 dst_char = cpu_to_le16(0x003f); 552 charlen = 1; 553 } 554 555 ctoUTF16: 556 /* 557 * character may take more than one byte in the source string, 558 * but will take exactly two bytes in the target string 559 */ 560 i += charlen; 561 put_unaligned(dst_char, &target[j]); 562 } 563 564 ctoUTF16_out: 565 put_unaligned(0, &target[j]); /* Null terminate target unicode string */ 566 kfree(wchar_to); 567 return j; 568 } 569 570 /* 571 * cifs_local_to_utf16_bytes - how long will a string be after conversion? 572 * @from - pointer to input string 573 * @maxbytes - don't go past this many bytes of input string 574 * @codepage - source codepage 575 * 576 * Walk a string and return the number of bytes that the string will 577 * be after being converted to the given charset, not including any null 578 * termination required. Don't walk past maxbytes in the source buffer. 579 */ 580 581 static int 582 cifs_local_to_utf16_bytes(const char *from, int len, 583 const struct nls_table *codepage) 584 { 585 int charlen; 586 int i; 587 wchar_t wchar_to; 588 589 for (i = 0; len && *from; i++, from += charlen, len -= charlen) { 590 charlen = codepage->char2uni(from, len, &wchar_to); 591 /* Failed conversion defaults to a question mark */ 592 if (charlen < 1) 593 charlen = 1; 594 } 595 return 2 * i; /* UTF16 characters are two bytes */ 596 } 597 598 /* 599 * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage 600 * @src - source string 601 * @maxlen - don't walk past this many bytes in the source string 602 * @utf16_len - the length of the allocated string in bytes (including null) 603 * @cp - source codepage 604 * @remap - map special chars 605 * 606 * Take a string convert it from the local codepage to UTF16 and 607 * put it in a new buffer. Returns a pointer to the new string or NULL on 608 * error. 609 */ 610 __le16 * 611 cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, 612 const struct nls_table *cp, int remap) 613 { 614 int len; 615 __le16 *dst; 616 617 if (!src) 618 return NULL; 619 620 len = cifs_local_to_utf16_bytes(src, maxlen, cp); 621 len += 2; /* NULL */ 622 dst = kmalloc(len, GFP_KERNEL); 623 if (!dst) { 624 *utf16_len = 0; 625 return NULL; 626 } 627 cifsConvertToUTF16(dst, src, strlen(src), cp, remap); 628 *utf16_len = len; 629 return dst; 630 } 631