1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * 4 * Copyright (c) International Business Machines Corp., 2000,2009 5 * Modified by Steve French (sfrench@us.ibm.com) 6 */ 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include <linux/unaligned.h> 10 #include "cifs_fs_sb.h" 11 #include "cifs_unicode.h" 12 #include "cifsglob.h" 13 #include "cifs_debug.h" 14 15 /* Convert character using the SFU - "Services for Unix" remapping range */ 16 static bool 17 convert_sfu_char(const __u16 src_char, char *target) 18 { 19 /* 20 * BB: Cannot handle remapping UNI_SLASH until all the calls to 21 * build_path_from_dentry are modified, as they use slash as 22 * separator. 23 */ 24 switch (src_char) { 25 case UNI_COLON: 26 *target = ':'; 27 break; 28 case UNI_ASTERISK: 29 *target = '*'; 30 break; 31 case UNI_QUESTION: 32 *target = '?'; 33 break; 34 case UNI_PIPE: 35 *target = '|'; 36 break; 37 case UNI_GRTRTHAN: 38 *target = '>'; 39 break; 40 case UNI_LESSTHAN: 41 *target = '<'; 42 break; 43 default: 44 return false; 45 } 46 return true; 47 } 48 49 /* Convert character using the SFM - "Services for Mac" remapping range */ 50 static bool 51 convert_sfm_char(const __u16 src_char, char *target) 52 { 53 if (src_char >= 0xF001 && src_char <= 0xF01F) { 54 *target = src_char - 0xF000; 55 return true; 56 } 57 switch (src_char) { 58 case SFM_COLON: 59 *target = ':'; 60 break; 61 case SFM_DOUBLEQUOTE: 62 *target = '"'; 63 break; 64 case SFM_ASTERISK: 65 *target = '*'; 66 break; 67 case SFM_QUESTION: 68 *target = '?'; 69 break; 70 case SFM_PIPE: 71 *target = '|'; 72 break; 73 case SFM_GRTRTHAN: 74 *target = '>'; 75 break; 76 case SFM_LESSTHAN: 77 *target = '<'; 78 break; 79 case SFM_SPACE: 80 *target = ' '; 81 break; 82 case SFM_PERIOD: 83 *target = '.'; 84 break; 85 default: 86 return false; 87 } 88 return true; 89 } 90 91 92 /* 93 * cifs_mapchar - convert a host-endian char to proper char in codepage 94 * @target - where converted character should be copied 95 * @src_char - 2 byte host-endian source character 96 * @cp - codepage to which character should be converted 97 * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2? 98 * 99 * This function handles the conversion of a single character. It is the 100 * responsibility of the caller to ensure that the target buffer is large 101 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). 102 */ 103 static int 104 cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp, 105 int maptype) 106 { 107 int len = 1; 108 __u16 src_char; 109 110 src_char = *from; 111 112 if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) 113 return len; 114 else if ((maptype == SFU_MAP_UNI_RSVD) && 115 convert_sfu_char(src_char, target)) 116 return len; 117 118 /* if character not one of seven in special remap set */ 119 len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); 120 if (len <= 0) 121 goto surrogate_pair; 122 123 return len; 124 125 surrogate_pair: 126 /* convert SURROGATE_PAIR and IVS */ 127 if (strcmp(cp->charset, "utf8")) 128 goto unknown; 129 len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6); 130 if (len <= 0) 131 goto unknown; 132 return len; 133 134 unknown: 135 *target = '?'; 136 len = 1; 137 return len; 138 } 139 140 /* 141 * cifs_from_utf16 - convert utf16le string to local charset 142 * @to - destination buffer 143 * @from - source buffer 144 * @tolen - destination buffer size (in bytes) 145 * @fromlen - source buffer size (in bytes) 146 * @codepage - codepage to which characters should be converted 147 * @mapchar - should characters be remapped according to the mapchars option? 148 * 149 * Convert a little-endian utf16le string (as sent by the server) to a string 150 * in the provided codepage. The tolen and fromlen parameters are to ensure 151 * that the code doesn't walk off of the end of the buffer (which is always 152 * a danger if the alignment of the source buffer is off). The destination 153 * string is always properly null terminated and fits in the destination 154 * buffer. Returns the length of the destination string in bytes (including 155 * null terminator). 156 * 157 * Note that some windows versions actually send multiword UTF-16 characters 158 * instead of straight UTF16-2. The linux nls routines however aren't able to 159 * deal with those characters properly. In the event that we get some of 160 * those characters, they won't be translated properly. 161 */ 162 int 163 cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, 164 const struct nls_table *codepage, int map_type) 165 { 166 int i, charlen, safelen; 167 int outlen = 0; 168 int nullsize = nls_nullsize(codepage); 169 int fromwords = fromlen / 2; 170 char tmp[NLS_MAX_CHARSET_SIZE]; 171 __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ 172 173 /* 174 * because the chars can be of varying widths, we need to take care 175 * not to overflow the destination buffer when we get close to the 176 * end of it. Until we get to this offset, we don't need to check 177 * for overflow however. 178 */ 179 safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); 180 181 for (i = 0; i < fromwords; i++) { 182 ftmp[0] = get_unaligned_le16(&from[i]); 183 if (ftmp[0] == 0) 184 break; 185 if (i + 1 < fromwords) 186 ftmp[1] = get_unaligned_le16(&from[i + 1]); 187 else 188 ftmp[1] = 0; 189 if (i + 2 < fromwords) 190 ftmp[2] = get_unaligned_le16(&from[i + 2]); 191 else 192 ftmp[2] = 0; 193 194 /* 195 * check to see if converting this character might make the 196 * conversion bleed into the null terminator 197 */ 198 if (outlen >= safelen) { 199 charlen = cifs_mapchar(tmp, ftmp, codepage, map_type); 200 if ((outlen + charlen) > (tolen - nullsize)) 201 break; 202 } 203 204 /* put converted char into 'to' buffer */ 205 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); 206 outlen += charlen; 207 208 /* charlen (=bytes of UTF-8 for 1 character) 209 * 4bytes UTF-8(surrogate pair) is charlen=4 210 * (4bytes UTF-16 code) 211 * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 212 * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */ 213 if (charlen == 4) 214 i++; 215 else if (charlen >= 5) 216 /* 5-6bytes UTF-8 */ 217 i += 2; 218 } 219 220 /* properly null-terminate string */ 221 for (i = 0; i < nullsize; i++) 222 to[outlen++] = 0; 223 224 return outlen; 225 } 226 227 /* 228 * NAME: cifs_strtoUTF16() 229 * 230 * FUNCTION: Convert character string to unicode string 231 * 232 */ 233 int 234 cifs_strtoUTF16(__le16 *to, const char *from, int len, 235 const struct nls_table *codepage) 236 { 237 int charlen; 238 int i; 239 wchar_t wchar_to; /* needed to quiet sparse */ 240 241 /* special case for utf8 to handle no plane0 chars */ 242 if (!strcmp(codepage->charset, "utf8")) { 243 /* 244 * convert utf8 -> utf16, we assume we have enough space 245 * as caller should have assumed conversion does not overflow 246 * in destination len is length in wchar_t units (16bits) 247 */ 248 i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN, 249 (wchar_t *) to, len); 250 251 /* if success terminate and exit */ 252 if (i >= 0) 253 goto success; 254 /* 255 * if fails fall back to UCS encoding as this 256 * function should not return negative values 257 * currently can fail only if source contains 258 * invalid encoded characters 259 */ 260 } 261 262 for (i = 0; len && *from; i++, from += charlen, len -= charlen) { 263 charlen = codepage->char2uni(from, len, &wchar_to); 264 if (charlen < 1) { 265 cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n", 266 *from, charlen); 267 /* A question mark */ 268 wchar_to = 0x003f; 269 charlen = 1; 270 } 271 put_unaligned_le16(wchar_to, &to[i]); 272 } 273 274 success: 275 put_unaligned_le16(0, &to[i]); 276 return i; 277 } 278 279 /* 280 * cifs_utf16_bytes - how long will a string be after conversion? 281 * @utf16 - pointer to input string 282 * @maxbytes - don't go past this many bytes of input string 283 * @codepage - destination codepage 284 * 285 * Walk a utf16le string and return the number of bytes that the string will 286 * be after being converted to the given charset, not including any null 287 * termination required. Don't walk past maxbytes in the source buffer. 288 */ 289 int 290 cifs_utf16_bytes(const __le16 *from, int maxbytes, 291 const struct nls_table *codepage) 292 { 293 int i; 294 int charlen, outlen = 0; 295 int maxwords = maxbytes / 2; 296 char tmp[NLS_MAX_CHARSET_SIZE]; 297 __u16 ftmp[3]; 298 299 for (i = 0; i < maxwords; i++) { 300 ftmp[0] = get_unaligned_le16(&from[i]); 301 if (ftmp[0] == 0) 302 break; 303 if (i + 1 < maxwords) 304 ftmp[1] = get_unaligned_le16(&from[i + 1]); 305 else 306 ftmp[1] = 0; 307 if (i + 2 < maxwords) 308 ftmp[2] = get_unaligned_le16(&from[i + 2]); 309 else 310 ftmp[2] = 0; 311 312 charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD); 313 outlen += charlen; 314 } 315 316 return outlen; 317 } 318 319 /* 320 * cifs_strndup_from_utf16 - copy a string from wire format to the local 321 * codepage 322 * @src - source string 323 * @maxlen - don't walk past this many bytes in the source string 324 * @is_unicode - is this a unicode string? 325 * @codepage - destination codepage 326 * 327 * Take a string given by the server, convert it to the local codepage and 328 * put it in a new buffer. Returns a pointer to the new string or NULL on 329 * error. 330 */ 331 char * 332 cifs_strndup_from_utf16(const char *src, const int maxlen, 333 const bool is_unicode, const struct nls_table *codepage) 334 { 335 int len; 336 char *dst; 337 338 if (is_unicode) { 339 len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage); 340 len += nls_nullsize(codepage); 341 dst = kmalloc(len, GFP_KERNEL); 342 if (!dst) 343 return NULL; 344 cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, 345 NO_MAP_UNI_RSVD); 346 } else { 347 dst = kstrndup(src, maxlen, GFP_KERNEL); 348 } 349 350 return dst; 351 } 352 353 static __le16 convert_to_sfu_char(char src_char) 354 { 355 __le16 dest_char; 356 357 switch (src_char) { 358 case ':': 359 dest_char = cpu_to_le16(UNI_COLON); 360 break; 361 case '*': 362 dest_char = cpu_to_le16(UNI_ASTERISK); 363 break; 364 case '?': 365 dest_char = cpu_to_le16(UNI_QUESTION); 366 break; 367 case '<': 368 dest_char = cpu_to_le16(UNI_LESSTHAN); 369 break; 370 case '>': 371 dest_char = cpu_to_le16(UNI_GRTRTHAN); 372 break; 373 case '|': 374 dest_char = cpu_to_le16(UNI_PIPE); 375 break; 376 default: 377 dest_char = 0; 378 } 379 380 return dest_char; 381 } 382 383 static __le16 convert_to_sfm_char(char src_char, bool end_of_string) 384 { 385 __le16 dest_char; 386 387 if (src_char >= 0x01 && src_char <= 0x1F) { 388 dest_char = cpu_to_le16(src_char + 0xF000); 389 return dest_char; 390 } 391 switch (src_char) { 392 case ':': 393 dest_char = cpu_to_le16(SFM_COLON); 394 break; 395 case '"': 396 dest_char = cpu_to_le16(SFM_DOUBLEQUOTE); 397 break; 398 case '*': 399 dest_char = cpu_to_le16(SFM_ASTERISK); 400 break; 401 case '?': 402 dest_char = cpu_to_le16(SFM_QUESTION); 403 break; 404 case '<': 405 dest_char = cpu_to_le16(SFM_LESSTHAN); 406 break; 407 case '>': 408 dest_char = cpu_to_le16(SFM_GRTRTHAN); 409 break; 410 case '|': 411 dest_char = cpu_to_le16(SFM_PIPE); 412 break; 413 case '.': 414 if (end_of_string) 415 dest_char = cpu_to_le16(SFM_PERIOD); 416 else 417 dest_char = 0; 418 break; 419 case ' ': 420 if (end_of_string) 421 dest_char = cpu_to_le16(SFM_SPACE); 422 else 423 dest_char = 0; 424 break; 425 default: 426 dest_char = 0; 427 } 428 429 return dest_char; 430 } 431 432 /* 433 * Convert 16 bit Unicode pathname to wire format from string in current code 434 * page. Conversion may involve remapping up the six characters that are 435 * only legal in POSIX-like OS (if they are present in the string). Path 436 * names are little endian 16 bit Unicode on the wire 437 */ 438 int 439 cifsConvertToUTF16(__le16 *target, const char *source, int srclen, 440 const struct nls_table *cp, int map_chars) 441 { 442 int i, charlen; 443 int j = 0; 444 char src_char; 445 __le16 dst_char; 446 wchar_t tmp; 447 wchar_t *wchar_to; /* UTF-16 */ 448 int ret; 449 unicode_t u; 450 451 if (map_chars == NO_MAP_UNI_RSVD) 452 return cifs_strtoUTF16(target, source, PATH_MAX, cp); 453 454 wchar_to = kzalloc(6, GFP_KERNEL); 455 456 for (i = 0; i < srclen; j++) { 457 src_char = source[i]; 458 charlen = 1; 459 460 /* check if end of string */ 461 if (src_char == 0) 462 goto ctoUTF16_out; 463 464 /* see if we must remap this char */ 465 if (map_chars == SFU_MAP_UNI_RSVD) 466 dst_char = convert_to_sfu_char(src_char); 467 else if (map_chars == SFM_MAP_UNI_RSVD) { 468 bool end_of_string; 469 470 /** 471 * Remap spaces and periods found at the end of every 472 * component of the path. The special cases of '.' and 473 * '..' are need to be handled because of symlinks. 474 * They are treated as non-end-of-string to avoid 475 * remapping and breaking symlinks pointing to . or .. 476 **/ 477 if ((i == 0 || source[i-1] == '\\') && 478 source[i] == '.' && 479 (i == srclen-1 || source[i+1] == '\\')) 480 end_of_string = false; /* "." case */ 481 else if (i >= 1 && 482 (i == 1 || source[i-2] == '\\') && 483 source[i-1] == '.' && 484 source[i] == '.' && 485 (i == srclen-1 || source[i+1] == '\\')) 486 end_of_string = false; /* ".." case */ 487 else if ((i == srclen - 1) || (source[i+1] == '\\')) 488 end_of_string = true; 489 else 490 end_of_string = false; 491 492 dst_char = convert_to_sfm_char(src_char, end_of_string); 493 } else 494 dst_char = 0; 495 /* 496 * FIXME: We can not handle remapping backslash (UNI_SLASH) 497 * until all the calls to build_path_from_dentry are modified, 498 * as they use backslash as separator. 499 */ 500 if (dst_char == 0) { 501 charlen = cp->char2uni(source + i, srclen - i, &tmp); 502 dst_char = cpu_to_le16(tmp); 503 504 /* 505 * if no match, use question mark, which at least in 506 * some cases serves as wild card 507 */ 508 if (charlen > 0) 509 goto ctoUTF16; 510 511 /* convert SURROGATE_PAIR */ 512 if (strcmp(cp->charset, "utf8") || !wchar_to) 513 goto unknown; 514 if (*(source + i) & 0x80) { 515 charlen = utf8_to_utf32(source + i, 6, &u); 516 if (charlen < 0) 517 goto unknown; 518 } else 519 goto unknown; 520 ret = utf8s_to_utf16s(source + i, charlen, 521 UTF16_LITTLE_ENDIAN, 522 wchar_to, 6); 523 if (ret < 0) 524 goto unknown; 525 526 i += charlen; 527 dst_char = cpu_to_le16(*wchar_to); 528 if (charlen <= 3) 529 /* 1-3bytes UTF-8 to 2bytes UTF-16 */ 530 put_unaligned(dst_char, &target[j]); 531 else if (charlen == 4) { 532 /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16 533 * 7-8bytes UTF-8(IVS) divided to 2 UTF-16 534 * (charlen=3+4 or 4+4) */ 535 put_unaligned(dst_char, &target[j]); 536 dst_char = cpu_to_le16(*(wchar_to + 1)); 537 j++; 538 put_unaligned(dst_char, &target[j]); 539 } else if (charlen >= 5) { 540 /* 5-6bytes UTF-8 to 6bytes UTF-16 */ 541 put_unaligned(dst_char, &target[j]); 542 dst_char = cpu_to_le16(*(wchar_to + 1)); 543 j++; 544 put_unaligned(dst_char, &target[j]); 545 dst_char = cpu_to_le16(*(wchar_to + 2)); 546 j++; 547 put_unaligned(dst_char, &target[j]); 548 } 549 continue; 550 551 unknown: 552 dst_char = cpu_to_le16(0x003f); 553 charlen = 1; 554 } 555 556 ctoUTF16: 557 /* 558 * character may take more than one byte in the source string, 559 * but will take exactly two bytes in the target string 560 */ 561 i += charlen; 562 put_unaligned(dst_char, &target[j]); 563 } 564 565 ctoUTF16_out: 566 put_unaligned(0, &target[j]); /* Null terminate target unicode string */ 567 kfree(wchar_to); 568 return j; 569 } 570 571 /* 572 * cifs_local_to_utf16_bytes - how long will a string be after conversion? 573 * @from - pointer to input string 574 * @maxbytes - don't go past this many bytes of input string 575 * @codepage - source codepage 576 * 577 * Walk a string and return the number of bytes that the string will 578 * be after being converted to the given charset, not including any null 579 * termination required. Don't walk past maxbytes in the source buffer. 580 */ 581 582 static int 583 cifs_local_to_utf16_bytes(const char *from, int len, 584 const struct nls_table *codepage) 585 { 586 int charlen; 587 int i; 588 wchar_t wchar_to; 589 590 for (i = 0; len && *from; i++, from += charlen, len -= charlen) { 591 charlen = codepage->char2uni(from, len, &wchar_to); 592 /* Failed conversion defaults to a question mark */ 593 if (charlen < 1) 594 charlen = 1; 595 } 596 return 2 * i; /* UTF16 characters are two bytes */ 597 } 598 599 /* 600 * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage 601 * @src - source string 602 * @maxlen - don't walk past this many bytes in the source string 603 * @utf16_len - the length of the allocated string in bytes (including null) 604 * @cp - source codepage 605 * @remap - map special chars 606 * 607 * Take a string convert it from the local codepage to UTF16 and 608 * put it in a new buffer. Returns a pointer to the new string or NULL on 609 * error. 610 */ 611 __le16 * 612 cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, 613 const struct nls_table *cp, int remap) 614 { 615 int len; 616 __le16 *dst; 617 618 if (!src) 619 return NULL; 620 621 len = cifs_local_to_utf16_bytes(src, maxlen, cp); 622 len += 2; /* NULL */ 623 dst = kmalloc(len, GFP_KERNEL); 624 if (!dst) { 625 *utf16_len = 0; 626 return NULL; 627 } 628 cifsConvertToUTF16(dst, src, strlen(src), cp, remap); 629 *utf16_len = len; 630 return dst; 631 } 632