1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * 4 * Copyright (c) International Business Machines Corp., 2000,2009 5 * Modified by Steve French (sfrench@us.ibm.com) 6 */ 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include "cifs_fs_sb.h" 10 #include "cifs_unicode.h" 11 #include "cifs_uniupr.h" 12 #include "cifspdu.h" 13 #include "cifsglob.h" 14 #include "cifs_debug.h" 15 16 int cifs_remap(struct cifs_sb_info *cifs_sb) 17 { 18 int map_type; 19 20 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) 21 map_type = SFM_MAP_UNI_RSVD; 22 else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) 23 map_type = SFU_MAP_UNI_RSVD; 24 else 25 map_type = NO_MAP_UNI_RSVD; 26 27 return map_type; 28 } 29 30 /* Convert character using the SFU - "Services for Unix" remapping range */ 31 static bool 32 convert_sfu_char(const __u16 src_char, char *target) 33 { 34 /* 35 * BB: Cannot handle remapping UNI_SLASH until all the calls to 36 * build_path_from_dentry are modified, as they use slash as 37 * separator. 38 */ 39 switch (src_char) { 40 case UNI_COLON: 41 *target = ':'; 42 break; 43 case UNI_ASTERISK: 44 *target = '*'; 45 break; 46 case UNI_QUESTION: 47 *target = '?'; 48 break; 49 case UNI_PIPE: 50 *target = '|'; 51 break; 52 case UNI_GRTRTHAN: 53 *target = '>'; 54 break; 55 case UNI_LESSTHAN: 56 *target = '<'; 57 break; 58 default: 59 return false; 60 } 61 return true; 62 } 63 64 /* Convert character using the SFM - "Services for Mac" remapping range */ 65 static bool 66 convert_sfm_char(const __u16 src_char, char *target) 67 { 68 if (src_char >= 0xF001 && src_char <= 0xF01F) { 69 *target = src_char - 0xF000; 70 return true; 71 } 72 switch (src_char) { 73 case SFM_COLON: 74 *target = ':'; 75 break; 76 case SFM_DOUBLEQUOTE: 77 *target = '"'; 78 break; 79 case SFM_ASTERISK: 80 *target = '*'; 81 break; 82 case SFM_QUESTION: 83 *target = '?'; 84 break; 85 case SFM_PIPE: 86 *target = '|'; 87 break; 88 case SFM_GRTRTHAN: 89 *target = '>'; 90 break; 91 case SFM_LESSTHAN: 92 *target = '<'; 93 break; 94 case SFM_SPACE: 95 *target = ' '; 96 break; 97 case SFM_PERIOD: 98 *target = '.'; 99 break; 100 default: 101 return false; 102 } 103 return true; 104 } 105 106 107 /* 108 * cifs_mapchar - convert a host-endian char to proper char in codepage 109 * @target - where converted character should be copied 110 * @src_char - 2 byte host-endian source character 111 * @cp - codepage to which character should be converted 112 * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2? 113 * 114 * This function handles the conversion of a single character. It is the 115 * responsibility of the caller to ensure that the target buffer is large 116 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). 117 */ 118 static int 119 cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp, 120 int maptype) 121 { 122 int len = 1; 123 __u16 src_char; 124 125 src_char = *from; 126 127 if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) 128 return len; 129 else if ((maptype == SFU_MAP_UNI_RSVD) && 130 convert_sfu_char(src_char, target)) 131 return len; 132 133 /* if character not one of seven in special remap set */ 134 len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); 135 if (len <= 0) 136 goto surrogate_pair; 137 138 return len; 139 140 surrogate_pair: 141 /* convert SURROGATE_PAIR and IVS */ 142 if (strcmp(cp->charset, "utf8")) 143 goto unknown; 144 len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6); 145 if (len <= 0) 146 goto unknown; 147 return len; 148 149 unknown: 150 *target = '?'; 151 len = 1; 152 return len; 153 } 154 155 /* 156 * cifs_from_utf16 - convert utf16le string to local charset 157 * @to - destination buffer 158 * @from - source buffer 159 * @tolen - destination buffer size (in bytes) 160 * @fromlen - source buffer size (in bytes) 161 * @codepage - codepage to which characters should be converted 162 * @mapchar - should characters be remapped according to the mapchars option? 163 * 164 * Convert a little-endian utf16le string (as sent by the server) to a string 165 * in the provided codepage. The tolen and fromlen parameters are to ensure 166 * that the code doesn't walk off of the end of the buffer (which is always 167 * a danger if the alignment of the source buffer is off). The destination 168 * string is always properly null terminated and fits in the destination 169 * buffer. Returns the length of the destination string in bytes (including 170 * null terminator). 171 * 172 * Note that some windows versions actually send multiword UTF-16 characters 173 * instead of straight UTF16-2. The linux nls routines however aren't able to 174 * deal with those characters properly. In the event that we get some of 175 * those characters, they won't be translated properly. 176 */ 177 int 178 cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, 179 const struct nls_table *codepage, int map_type) 180 { 181 int i, charlen, safelen; 182 int outlen = 0; 183 int nullsize = nls_nullsize(codepage); 184 int fromwords = fromlen / 2; 185 char tmp[NLS_MAX_CHARSET_SIZE]; 186 __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ 187 188 /* 189 * because the chars can be of varying widths, we need to take care 190 * not to overflow the destination buffer when we get close to the 191 * end of it. Until we get to this offset, we don't need to check 192 * for overflow however. 193 */ 194 safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); 195 196 for (i = 0; i < fromwords; i++) { 197 ftmp[0] = get_unaligned_le16(&from[i]); 198 if (ftmp[0] == 0) 199 break; 200 if (i + 1 < fromwords) 201 ftmp[1] = get_unaligned_le16(&from[i + 1]); 202 else 203 ftmp[1] = 0; 204 if (i + 2 < fromwords) 205 ftmp[2] = get_unaligned_le16(&from[i + 2]); 206 else 207 ftmp[2] = 0; 208 209 /* 210 * check to see if converting this character might make the 211 * conversion bleed into the null terminator 212 */ 213 if (outlen >= safelen) { 214 charlen = cifs_mapchar(tmp, ftmp, codepage, map_type); 215 if ((outlen + charlen) > (tolen - nullsize)) 216 break; 217 } 218 219 /* put converted char into 'to' buffer */ 220 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); 221 outlen += charlen; 222 223 /* charlen (=bytes of UTF-8 for 1 character) 224 * 4bytes UTF-8(surrogate pair) is charlen=4 225 * (4bytes UTF-16 code) 226 * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 227 * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */ 228 if (charlen == 4) 229 i++; 230 else if (charlen >= 5) 231 /* 5-6bytes UTF-8 */ 232 i += 2; 233 } 234 235 /* properly null-terminate string */ 236 for (i = 0; i < nullsize; i++) 237 to[outlen++] = 0; 238 239 return outlen; 240 } 241 242 /* 243 * NAME: cifs_strtoUTF16() 244 * 245 * FUNCTION: Convert character string to unicode string 246 * 247 */ 248 int 249 cifs_strtoUTF16(__le16 *to, const char *from, int len, 250 const struct nls_table *codepage) 251 { 252 int charlen; 253 int i; 254 wchar_t wchar_to; /* needed to quiet sparse */ 255 256 /* special case for utf8 to handle no plane0 chars */ 257 if (!strcmp(codepage->charset, "utf8")) { 258 /* 259 * convert utf8 -> utf16, we assume we have enough space 260 * as caller should have assumed conversion does not overflow 261 * in destination len is length in wchar_t units (16bits) 262 */ 263 i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN, 264 (wchar_t *) to, len); 265 266 /* if success terminate and exit */ 267 if (i >= 0) 268 goto success; 269 /* 270 * if fails fall back to UCS encoding as this 271 * function should not return negative values 272 * currently can fail only if source contains 273 * invalid encoded characters 274 */ 275 } 276 277 for (i = 0; len && *from; i++, from += charlen, len -= charlen) { 278 charlen = codepage->char2uni(from, len, &wchar_to); 279 if (charlen < 1) { 280 cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n", 281 *from, charlen); 282 /* A question mark */ 283 wchar_to = 0x003f; 284 charlen = 1; 285 } 286 put_unaligned_le16(wchar_to, &to[i]); 287 } 288 289 success: 290 put_unaligned_le16(0, &to[i]); 291 return i; 292 } 293 294 /* 295 * cifs_utf16_bytes - how long will a string be after conversion? 296 * @utf16 - pointer to input string 297 * @maxbytes - don't go past this many bytes of input string 298 * @codepage - destination codepage 299 * 300 * Walk a utf16le string and return the number of bytes that the string will 301 * be after being converted to the given charset, not including any null 302 * termination required. Don't walk past maxbytes in the source buffer. 303 */ 304 int 305 cifs_utf16_bytes(const __le16 *from, int maxbytes, 306 const struct nls_table *codepage) 307 { 308 int i; 309 int charlen, outlen = 0; 310 int maxwords = maxbytes / 2; 311 char tmp[NLS_MAX_CHARSET_SIZE]; 312 __u16 ftmp[3]; 313 314 for (i = 0; i < maxwords; i++) { 315 ftmp[0] = get_unaligned_le16(&from[i]); 316 if (ftmp[0] == 0) 317 break; 318 if (i + 1 < maxwords) 319 ftmp[1] = get_unaligned_le16(&from[i + 1]); 320 else 321 ftmp[1] = 0; 322 if (i + 2 < maxwords) 323 ftmp[2] = get_unaligned_le16(&from[i + 2]); 324 else 325 ftmp[2] = 0; 326 327 charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD); 328 outlen += charlen; 329 } 330 331 return outlen; 332 } 333 334 /* 335 * cifs_strndup_from_utf16 - copy a string from wire format to the local 336 * codepage 337 * @src - source string 338 * @maxlen - don't walk past this many bytes in the source string 339 * @is_unicode - is this a unicode string? 340 * @codepage - destination codepage 341 * 342 * Take a string given by the server, convert it to the local codepage and 343 * put it in a new buffer. Returns a pointer to the new string or NULL on 344 * error. 345 */ 346 char * 347 cifs_strndup_from_utf16(const char *src, const int maxlen, 348 const bool is_unicode, const struct nls_table *codepage) 349 { 350 int len; 351 char *dst; 352 353 if (is_unicode) { 354 len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage); 355 len += nls_nullsize(codepage); 356 dst = kmalloc(len, GFP_KERNEL); 357 if (!dst) 358 return NULL; 359 cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, 360 NO_MAP_UNI_RSVD); 361 } else { 362 dst = kstrndup(src, maxlen, GFP_KERNEL); 363 } 364 365 return dst; 366 } 367 368 static __le16 convert_to_sfu_char(char src_char) 369 { 370 __le16 dest_char; 371 372 switch (src_char) { 373 case ':': 374 dest_char = cpu_to_le16(UNI_COLON); 375 break; 376 case '*': 377 dest_char = cpu_to_le16(UNI_ASTERISK); 378 break; 379 case '?': 380 dest_char = cpu_to_le16(UNI_QUESTION); 381 break; 382 case '<': 383 dest_char = cpu_to_le16(UNI_LESSTHAN); 384 break; 385 case '>': 386 dest_char = cpu_to_le16(UNI_GRTRTHAN); 387 break; 388 case '|': 389 dest_char = cpu_to_le16(UNI_PIPE); 390 break; 391 default: 392 dest_char = 0; 393 } 394 395 return dest_char; 396 } 397 398 static __le16 convert_to_sfm_char(char src_char, bool end_of_string) 399 { 400 __le16 dest_char; 401 402 if (src_char >= 0x01 && src_char <= 0x1F) { 403 dest_char = cpu_to_le16(src_char + 0xF000); 404 return dest_char; 405 } 406 switch (src_char) { 407 case ':': 408 dest_char = cpu_to_le16(SFM_COLON); 409 break; 410 case '"': 411 dest_char = cpu_to_le16(SFM_DOUBLEQUOTE); 412 break; 413 case '*': 414 dest_char = cpu_to_le16(SFM_ASTERISK); 415 break; 416 case '?': 417 dest_char = cpu_to_le16(SFM_QUESTION); 418 break; 419 case '<': 420 dest_char = cpu_to_le16(SFM_LESSTHAN); 421 break; 422 case '>': 423 dest_char = cpu_to_le16(SFM_GRTRTHAN); 424 break; 425 case '|': 426 dest_char = cpu_to_le16(SFM_PIPE); 427 break; 428 case '.': 429 if (end_of_string) 430 dest_char = cpu_to_le16(SFM_PERIOD); 431 else 432 dest_char = 0; 433 break; 434 case ' ': 435 if (end_of_string) 436 dest_char = cpu_to_le16(SFM_SPACE); 437 else 438 dest_char = 0; 439 break; 440 default: 441 dest_char = 0; 442 } 443 444 return dest_char; 445 } 446 447 /* 448 * Convert 16 bit Unicode pathname to wire format from string in current code 449 * page. Conversion may involve remapping up the six characters that are 450 * only legal in POSIX-like OS (if they are present in the string). Path 451 * names are little endian 16 bit Unicode on the wire 452 */ 453 int 454 cifsConvertToUTF16(__le16 *target, const char *source, int srclen, 455 const struct nls_table *cp, int map_chars) 456 { 457 int i, charlen; 458 int j = 0; 459 char src_char; 460 __le16 dst_char; 461 wchar_t tmp; 462 wchar_t *wchar_to; /* UTF-16 */ 463 int ret; 464 unicode_t u; 465 466 if (map_chars == NO_MAP_UNI_RSVD) 467 return cifs_strtoUTF16(target, source, PATH_MAX, cp); 468 469 wchar_to = kzalloc(6, GFP_KERNEL); 470 471 for (i = 0; i < srclen; j++) { 472 src_char = source[i]; 473 charlen = 1; 474 475 /* check if end of string */ 476 if (src_char == 0) 477 goto ctoUTF16_out; 478 479 /* see if we must remap this char */ 480 if (map_chars == SFU_MAP_UNI_RSVD) 481 dst_char = convert_to_sfu_char(src_char); 482 else if (map_chars == SFM_MAP_UNI_RSVD) { 483 bool end_of_string; 484 485 /** 486 * Remap spaces and periods found at the end of every 487 * component of the path. The special cases of '.' and 488 * '..' do not need to be dealt with explicitly because 489 * they are addressed in namei.c:link_path_walk(). 490 **/ 491 if ((i == srclen - 1) || (source[i+1] == '\\')) 492 end_of_string = true; 493 else 494 end_of_string = false; 495 496 dst_char = convert_to_sfm_char(src_char, end_of_string); 497 } else 498 dst_char = 0; 499 /* 500 * FIXME: We can not handle remapping backslash (UNI_SLASH) 501 * until all the calls to build_path_from_dentry are modified, 502 * as they use backslash as separator. 503 */ 504 if (dst_char == 0) { 505 charlen = cp->char2uni(source + i, srclen - i, &tmp); 506 dst_char = cpu_to_le16(tmp); 507 508 /* 509 * if no match, use question mark, which at least in 510 * some cases serves as wild card 511 */ 512 if (charlen > 0) 513 goto ctoUTF16; 514 515 /* convert SURROGATE_PAIR */ 516 if (strcmp(cp->charset, "utf8") || !wchar_to) 517 goto unknown; 518 if (*(source + i) & 0x80) { 519 charlen = utf8_to_utf32(source + i, 6, &u); 520 if (charlen < 0) 521 goto unknown; 522 } else 523 goto unknown; 524 ret = utf8s_to_utf16s(source + i, charlen, 525 UTF16_LITTLE_ENDIAN, 526 wchar_to, 6); 527 if (ret < 0) 528 goto unknown; 529 530 i += charlen; 531 dst_char = cpu_to_le16(*wchar_to); 532 if (charlen <= 3) 533 /* 1-3bytes UTF-8 to 2bytes UTF-16 */ 534 put_unaligned(dst_char, &target[j]); 535 else if (charlen == 4) { 536 /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16 537 * 7-8bytes UTF-8(IVS) divided to 2 UTF-16 538 * (charlen=3+4 or 4+4) */ 539 put_unaligned(dst_char, &target[j]); 540 dst_char = cpu_to_le16(*(wchar_to + 1)); 541 j++; 542 put_unaligned(dst_char, &target[j]); 543 } else if (charlen >= 5) { 544 /* 5-6bytes UTF-8 to 6bytes UTF-16 */ 545 put_unaligned(dst_char, &target[j]); 546 dst_char = cpu_to_le16(*(wchar_to + 1)); 547 j++; 548 put_unaligned(dst_char, &target[j]); 549 dst_char = cpu_to_le16(*(wchar_to + 2)); 550 j++; 551 put_unaligned(dst_char, &target[j]); 552 } 553 continue; 554 555 unknown: 556 dst_char = cpu_to_le16(0x003f); 557 charlen = 1; 558 } 559 560 ctoUTF16: 561 /* 562 * character may take more than one byte in the source string, 563 * but will take exactly two bytes in the target string 564 */ 565 i += charlen; 566 put_unaligned(dst_char, &target[j]); 567 } 568 569 ctoUTF16_out: 570 put_unaligned(0, &target[j]); /* Null terminate target unicode string */ 571 kfree(wchar_to); 572 return j; 573 } 574 575 /* 576 * cifs_local_to_utf16_bytes - how long will a string be after conversion? 577 * @from - pointer to input string 578 * @maxbytes - don't go past this many bytes of input string 579 * @codepage - source codepage 580 * 581 * Walk a string and return the number of bytes that the string will 582 * be after being converted to the given charset, not including any null 583 * termination required. Don't walk past maxbytes in the source buffer. 584 */ 585 586 static int 587 cifs_local_to_utf16_bytes(const char *from, int len, 588 const struct nls_table *codepage) 589 { 590 int charlen; 591 int i; 592 wchar_t wchar_to; 593 594 for (i = 0; len && *from; i++, from += charlen, len -= charlen) { 595 charlen = codepage->char2uni(from, len, &wchar_to); 596 /* Failed conversion defaults to a question mark */ 597 if (charlen < 1) 598 charlen = 1; 599 } 600 return 2 * i; /* UTF16 characters are two bytes */ 601 } 602 603 /* 604 * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage 605 * @src - source string 606 * @maxlen - don't walk past this many bytes in the source string 607 * @utf16_len - the length of the allocated string in bytes (including null) 608 * @cp - source codepage 609 * @remap - map special chars 610 * 611 * Take a string convert it from the local codepage to UTF16 and 612 * put it in a new buffer. Returns a pointer to the new string or NULL on 613 * error. 614 */ 615 __le16 * 616 cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, 617 const struct nls_table *cp, int remap) 618 { 619 int len; 620 __le16 *dst; 621 622 len = cifs_local_to_utf16_bytes(src, maxlen, cp); 623 len += 2; /* NULL */ 624 dst = kmalloc(len, GFP_KERNEL); 625 if (!dst) { 626 *utf16_len = 0; 627 return NULL; 628 } 629 cifsConvertToUTF16(dst, src, strlen(src), cp, remap); 630 *utf16_len = len; 631 return dst; 632 } 633