11e9ea7e0SNamjae Jeon // SPDX-License-Identifier: GPL-2.0-or-later 21e9ea7e0SNamjae Jeon /* 3*5218cd10SNamjae Jeon * NTFS Unicode string handling. 41e9ea7e0SNamjae Jeon * 51e9ea7e0SNamjae Jeon * Copyright (c) 2001-2006 Anton Altaparmakov 61e9ea7e0SNamjae Jeon */ 71e9ea7e0SNamjae Jeon 81e9ea7e0SNamjae Jeon #include "ntfs.h" 91e9ea7e0SNamjae Jeon 101e9ea7e0SNamjae Jeon /* 111e9ea7e0SNamjae Jeon * IMPORTANT 121e9ea7e0SNamjae Jeon * ========= 131e9ea7e0SNamjae Jeon * 141e9ea7e0SNamjae Jeon * All these routines assume that the Unicode characters are in little endian 151e9ea7e0SNamjae Jeon * encoding inside the strings!!! 161e9ea7e0SNamjae Jeon */ 171e9ea7e0SNamjae Jeon 181e9ea7e0SNamjae Jeon /* 191e9ea7e0SNamjae Jeon * This is used by the name collation functions to quickly determine what 201e9ea7e0SNamjae Jeon * characters are (in)valid. 211e9ea7e0SNamjae Jeon */ 221e9ea7e0SNamjae Jeon static const u8 legal_ansi_char_array[0x40] = { 231e9ea7e0SNamjae Jeon 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 241e9ea7e0SNamjae Jeon 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 251e9ea7e0SNamjae Jeon 261e9ea7e0SNamjae Jeon 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 271e9ea7e0SNamjae Jeon 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 281e9ea7e0SNamjae Jeon 291e9ea7e0SNamjae Jeon 0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17, 301e9ea7e0SNamjae Jeon 0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00, 311e9ea7e0SNamjae Jeon 321e9ea7e0SNamjae Jeon 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 331e9ea7e0SNamjae Jeon 0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18, 341e9ea7e0SNamjae Jeon }; 351e9ea7e0SNamjae Jeon 36*5218cd10SNamjae Jeon /* 371e9ea7e0SNamjae Jeon * ntfs_are_names_equal - compare two Unicode names for equality 381e9ea7e0SNamjae Jeon * @s1: name to compare to @s2 391e9ea7e0SNamjae Jeon * @s1_len: length in Unicode characters of @s1 401e9ea7e0SNamjae Jeon * @s2: name to compare to @s1 411e9ea7e0SNamjae Jeon * @s2_len: length in Unicode characters of @s2 421e9ea7e0SNamjae Jeon * @ic: ignore case bool 431e9ea7e0SNamjae Jeon * @upcase: upcase table (only if @ic == IGNORE_CASE) 441e9ea7e0SNamjae Jeon * @upcase_size: length in Unicode characters of @upcase (if present) 451e9ea7e0SNamjae Jeon * 461e9ea7e0SNamjae Jeon * Compare the names @s1 and @s2 and return 'true' (1) if the names are 471e9ea7e0SNamjae Jeon * identical, or 'false' (0) if they are not identical. If @ic is IGNORE_CASE, 481e9ea7e0SNamjae Jeon * the @upcase table is used to performa a case insensitive comparison. 491e9ea7e0SNamjae Jeon */ 50*5218cd10SNamjae Jeon bool ntfs_are_names_equal(const __le16 *s1, size_t s1_len, 51*5218cd10SNamjae Jeon const __le16 *s2, size_t s2_len, const u32 ic, 52*5218cd10SNamjae Jeon const __le16 *upcase, const u32 upcase_size) 531e9ea7e0SNamjae Jeon { 541e9ea7e0SNamjae Jeon if (s1_len != s2_len) 551e9ea7e0SNamjae Jeon return false; 561e9ea7e0SNamjae Jeon if (ic == CASE_SENSITIVE) 571e9ea7e0SNamjae Jeon return !ntfs_ucsncmp(s1, s2, s1_len); 581e9ea7e0SNamjae Jeon return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size); 591e9ea7e0SNamjae Jeon } 601e9ea7e0SNamjae Jeon 61*5218cd10SNamjae Jeon /* 621e9ea7e0SNamjae Jeon * ntfs_collate_names - collate two Unicode names 631e9ea7e0SNamjae Jeon * @name1: first Unicode name to compare 64*5218cd10SNamjae Jeon * @name1_len: first Unicode name length 651e9ea7e0SNamjae Jeon * @name2: second Unicode name to compare 66*5218cd10SNamjae Jeon * @name2_len: second Unicode name length 671e9ea7e0SNamjae Jeon * @err_val: if @name1 contains an invalid character return this value 681e9ea7e0SNamjae Jeon * @ic: either CASE_SENSITIVE or IGNORE_CASE 691e9ea7e0SNamjae Jeon * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE) 701e9ea7e0SNamjae Jeon * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE) 711e9ea7e0SNamjae Jeon * 721e9ea7e0SNamjae Jeon * ntfs_collate_names collates two Unicode names and returns: 731e9ea7e0SNamjae Jeon * 741e9ea7e0SNamjae Jeon * -1 if the first name collates before the second one, 751e9ea7e0SNamjae Jeon * 0 if the names match, 761e9ea7e0SNamjae Jeon * 1 if the second name collates before the first one, or 771e9ea7e0SNamjae Jeon * @err_val if an invalid character is found in @name1 during the comparison. 781e9ea7e0SNamjae Jeon * 791e9ea7e0SNamjae Jeon * The following characters are considered invalid: '"', '*', '<', '>' and '?'. 801e9ea7e0SNamjae Jeon */ 81*5218cd10SNamjae Jeon int ntfs_collate_names(const __le16 *name1, const u32 name1_len, 82*5218cd10SNamjae Jeon const __le16 *name2, const u32 name2_len, 83*5218cd10SNamjae Jeon const int err_val, const u32 ic, 84*5218cd10SNamjae Jeon const __le16 *upcase, const u32 upcase_len) 851e9ea7e0SNamjae Jeon { 861e9ea7e0SNamjae Jeon u32 cnt, min_len; 871e9ea7e0SNamjae Jeon u16 c1, c2; 881e9ea7e0SNamjae Jeon 891e9ea7e0SNamjae Jeon min_len = name1_len; 901e9ea7e0SNamjae Jeon if (name1_len > name2_len) 911e9ea7e0SNamjae Jeon min_len = name2_len; 921e9ea7e0SNamjae Jeon for (cnt = 0; cnt < min_len; ++cnt) { 931e9ea7e0SNamjae Jeon c1 = le16_to_cpu(*name1++); 941e9ea7e0SNamjae Jeon c2 = le16_to_cpu(*name2++); 951e9ea7e0SNamjae Jeon if (ic) { 961e9ea7e0SNamjae Jeon if (c1 < upcase_len) 971e9ea7e0SNamjae Jeon c1 = le16_to_cpu(upcase[c1]); 981e9ea7e0SNamjae Jeon if (c2 < upcase_len) 991e9ea7e0SNamjae Jeon c2 = le16_to_cpu(upcase[c2]); 1001e9ea7e0SNamjae Jeon } 1011e9ea7e0SNamjae Jeon if (c1 < 64 && legal_ansi_char_array[c1] & 8) 1021e9ea7e0SNamjae Jeon return err_val; 1031e9ea7e0SNamjae Jeon if (c1 < c2) 1041e9ea7e0SNamjae Jeon return -1; 1051e9ea7e0SNamjae Jeon if (c1 > c2) 1061e9ea7e0SNamjae Jeon return 1; 1071e9ea7e0SNamjae Jeon } 1081e9ea7e0SNamjae Jeon if (name1_len < name2_len) 1091e9ea7e0SNamjae Jeon return -1; 1101e9ea7e0SNamjae Jeon if (name1_len == name2_len) 1111e9ea7e0SNamjae Jeon return 0; 1121e9ea7e0SNamjae Jeon /* name1_len > name2_len */ 1131e9ea7e0SNamjae Jeon c1 = le16_to_cpu(*name1); 1141e9ea7e0SNamjae Jeon if (c1 < 64 && legal_ansi_char_array[c1] & 8) 1151e9ea7e0SNamjae Jeon return err_val; 1161e9ea7e0SNamjae Jeon return 1; 1171e9ea7e0SNamjae Jeon } 1181e9ea7e0SNamjae Jeon 119*5218cd10SNamjae Jeon /* 1201e9ea7e0SNamjae Jeon * ntfs_ucsncmp - compare two little endian Unicode strings 1211e9ea7e0SNamjae Jeon * @s1: first string 1221e9ea7e0SNamjae Jeon * @s2: second string 1231e9ea7e0SNamjae Jeon * @n: maximum unicode characters to compare 1241e9ea7e0SNamjae Jeon * 1251e9ea7e0SNamjae Jeon * Compare the first @n characters of the Unicode strings @s1 and @s2, 1261e9ea7e0SNamjae Jeon * The strings in little endian format and appropriate le16_to_cpu() 1271e9ea7e0SNamjae Jeon * conversion is performed on non-little endian machines. 1281e9ea7e0SNamjae Jeon * 1291e9ea7e0SNamjae Jeon * The function returns an integer less than, equal to, or greater than zero 1301e9ea7e0SNamjae Jeon * if @s1 (or the first @n Unicode characters thereof) is found, respectively, 1311e9ea7e0SNamjae Jeon * to be less than, to match, or be greater than @s2. 1321e9ea7e0SNamjae Jeon */ 133*5218cd10SNamjae Jeon int ntfs_ucsncmp(const __le16 *s1, const __le16 *s2, size_t n) 1341e9ea7e0SNamjae Jeon { 1351e9ea7e0SNamjae Jeon u16 c1, c2; 1361e9ea7e0SNamjae Jeon size_t i; 1371e9ea7e0SNamjae Jeon 1381e9ea7e0SNamjae Jeon for (i = 0; i < n; ++i) { 1391e9ea7e0SNamjae Jeon c1 = le16_to_cpu(s1[i]); 1401e9ea7e0SNamjae Jeon c2 = le16_to_cpu(s2[i]); 1411e9ea7e0SNamjae Jeon if (c1 < c2) 1421e9ea7e0SNamjae Jeon return -1; 1431e9ea7e0SNamjae Jeon if (c1 > c2) 1441e9ea7e0SNamjae Jeon return 1; 1451e9ea7e0SNamjae Jeon if (!c1) 1461e9ea7e0SNamjae Jeon break; 1471e9ea7e0SNamjae Jeon } 1481e9ea7e0SNamjae Jeon return 0; 1491e9ea7e0SNamjae Jeon } 1501e9ea7e0SNamjae Jeon 151*5218cd10SNamjae Jeon /* 1521e9ea7e0SNamjae Jeon * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case 1531e9ea7e0SNamjae Jeon * @s1: first string 1541e9ea7e0SNamjae Jeon * @s2: second string 1551e9ea7e0SNamjae Jeon * @n: maximum unicode characters to compare 1561e9ea7e0SNamjae Jeon * @upcase: upcase table 1571e9ea7e0SNamjae Jeon * @upcase_size: upcase table size in Unicode characters 1581e9ea7e0SNamjae Jeon * 1591e9ea7e0SNamjae Jeon * Compare the first @n characters of the Unicode strings @s1 and @s2, 1601e9ea7e0SNamjae Jeon * ignoring case. The strings in little endian format and appropriate 1611e9ea7e0SNamjae Jeon * le16_to_cpu() conversion is performed on non-little endian machines. 1621e9ea7e0SNamjae Jeon * 1631e9ea7e0SNamjae Jeon * Each character is uppercased using the @upcase table before the comparison. 1641e9ea7e0SNamjae Jeon * 1651e9ea7e0SNamjae Jeon * The function returns an integer less than, equal to, or greater than zero 1661e9ea7e0SNamjae Jeon * if @s1 (or the first @n Unicode characters thereof) is found, respectively, 1671e9ea7e0SNamjae Jeon * to be less than, to match, or be greater than @s2. 1681e9ea7e0SNamjae Jeon */ 169*5218cd10SNamjae Jeon int ntfs_ucsncasecmp(const __le16 *s1, const __le16 *s2, size_t n, 170*5218cd10SNamjae Jeon const __le16 *upcase, const u32 upcase_size) 1711e9ea7e0SNamjae Jeon { 1721e9ea7e0SNamjae Jeon size_t i; 1731e9ea7e0SNamjae Jeon u16 c1, c2; 1741e9ea7e0SNamjae Jeon 1751e9ea7e0SNamjae Jeon for (i = 0; i < n; ++i) { 176*5218cd10SNamjae Jeon c1 = le16_to_cpu(s1[i]); 177*5218cd10SNamjae Jeon if (c1 < upcase_size) 1781e9ea7e0SNamjae Jeon c1 = le16_to_cpu(upcase[c1]); 179*5218cd10SNamjae Jeon c2 = le16_to_cpu(s2[i]); 180*5218cd10SNamjae Jeon if (c2 < upcase_size) 1811e9ea7e0SNamjae Jeon c2 = le16_to_cpu(upcase[c2]); 1821e9ea7e0SNamjae Jeon if (c1 < c2) 1831e9ea7e0SNamjae Jeon return -1; 1841e9ea7e0SNamjae Jeon if (c1 > c2) 1851e9ea7e0SNamjae Jeon return 1; 1861e9ea7e0SNamjae Jeon if (!c1) 1871e9ea7e0SNamjae Jeon break; 1881e9ea7e0SNamjae Jeon } 1891e9ea7e0SNamjae Jeon return 0; 1901e9ea7e0SNamjae Jeon } 1911e9ea7e0SNamjae Jeon 192*5218cd10SNamjae Jeon int ntfs_file_compare_values(const struct file_name_attr *file_name_attr1, 193*5218cd10SNamjae Jeon const struct file_name_attr *file_name_attr2, 194*5218cd10SNamjae Jeon const int err_val, const u32 ic, 195*5218cd10SNamjae Jeon const __le16 *upcase, const u32 upcase_len) 1961e9ea7e0SNamjae Jeon { 197*5218cd10SNamjae Jeon return ntfs_collate_names((__le16 *)&file_name_attr1->file_name, 1981e9ea7e0SNamjae Jeon file_name_attr1->file_name_length, 199*5218cd10SNamjae Jeon (__le16 *)&file_name_attr2->file_name, 2001e9ea7e0SNamjae Jeon file_name_attr2->file_name_length, 2011e9ea7e0SNamjae Jeon err_val, ic, upcase, upcase_len); 2021e9ea7e0SNamjae Jeon } 2031e9ea7e0SNamjae Jeon 204*5218cd10SNamjae Jeon /* 2051e9ea7e0SNamjae Jeon * ntfs_nlstoucs - convert NLS string to little endian Unicode string 2061e9ea7e0SNamjae Jeon * @vol: ntfs volume which we are working with 2071e9ea7e0SNamjae Jeon * @ins: input NLS string buffer 2081e9ea7e0SNamjae Jeon * @ins_len: length of input string in bytes 2091e9ea7e0SNamjae Jeon * @outs: on return contains the allocated output Unicode string buffer 210*5218cd10SNamjae Jeon * @max_name_len: maximum number of Unicode characters allowed for the output name 2111e9ea7e0SNamjae Jeon * 2121e9ea7e0SNamjae Jeon * Convert the input string @ins, which is in whatever format the loaded NLS 2131e9ea7e0SNamjae Jeon * map dictates, into a little endian, 2-byte Unicode string. 2141e9ea7e0SNamjae Jeon * 2151e9ea7e0SNamjae Jeon * This function allocates the string and the caller is responsible for 2161e9ea7e0SNamjae Jeon * calling kmem_cache_free(ntfs_name_cache, *@outs); when finished with it. 2171e9ea7e0SNamjae Jeon * 2181e9ea7e0SNamjae Jeon * On success the function returns the number of Unicode characters written to 2191e9ea7e0SNamjae Jeon * the output string *@outs (>= 0), not counting the terminating Unicode NULL 2201e9ea7e0SNamjae Jeon * character. *@outs is set to the allocated output string buffer. 2211e9ea7e0SNamjae Jeon * 2221e9ea7e0SNamjae Jeon * On error, a negative number corresponding to the error code is returned. In 2231e9ea7e0SNamjae Jeon * that case the output string is not allocated. Both *@outs and *@outs_len 2241e9ea7e0SNamjae Jeon * are then undefined. 2251e9ea7e0SNamjae Jeon * 2261e9ea7e0SNamjae Jeon * This might look a bit odd due to fast path optimization... 2271e9ea7e0SNamjae Jeon */ 228*5218cd10SNamjae Jeon int ntfs_nlstoucs(const struct ntfs_volume *vol, const char *ins, 229*5218cd10SNamjae Jeon const int ins_len, __le16 **outs, int max_name_len) 2301e9ea7e0SNamjae Jeon { 2311e9ea7e0SNamjae Jeon struct nls_table *nls = vol->nls_map; 232*5218cd10SNamjae Jeon __le16 *ucs; 2331e9ea7e0SNamjae Jeon wchar_t wc; 2341e9ea7e0SNamjae Jeon int i, o, wc_len; 2351e9ea7e0SNamjae Jeon 2361e9ea7e0SNamjae Jeon /* We do not trust outside sources. */ 2371e9ea7e0SNamjae Jeon if (likely(ins)) { 238*5218cd10SNamjae Jeon if (max_name_len > NTFS_MAX_NAME_LEN) 239*5218cd10SNamjae Jeon ucs = kvmalloc((max_name_len + 2) * sizeof(__le16), 240*5218cd10SNamjae Jeon GFP_NOFS | __GFP_ZERO); 241*5218cd10SNamjae Jeon else 2421e9ea7e0SNamjae Jeon ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS); 2431e9ea7e0SNamjae Jeon if (likely(ucs)) { 244*5218cd10SNamjae Jeon if (vol->nls_utf8) { 245*5218cd10SNamjae Jeon o = utf8s_to_utf16s(ins, ins_len, 246*5218cd10SNamjae Jeon UTF16_LITTLE_ENDIAN, 247*5218cd10SNamjae Jeon (wchar_t *)ucs, 248*5218cd10SNamjae Jeon max_name_len + 2); 249*5218cd10SNamjae Jeon if (o < 0 || o > max_name_len) { 250*5218cd10SNamjae Jeon wc_len = o; 251*5218cd10SNamjae Jeon goto name_err; 252*5218cd10SNamjae Jeon } 253*5218cd10SNamjae Jeon } else { 2541e9ea7e0SNamjae Jeon for (i = o = 0; i < ins_len; i += wc_len) { 2551e9ea7e0SNamjae Jeon wc_len = nls->char2uni(ins + i, ins_len - i, 2561e9ea7e0SNamjae Jeon &wc); 2571e9ea7e0SNamjae Jeon if (likely(wc_len >= 0 && 258*5218cd10SNamjae Jeon o < max_name_len)) { 2591e9ea7e0SNamjae Jeon if (likely(wc)) { 2601e9ea7e0SNamjae Jeon ucs[o++] = cpu_to_le16(wc); 2611e9ea7e0SNamjae Jeon continue; 2621e9ea7e0SNamjae Jeon } /* else if (!wc) */ 2631e9ea7e0SNamjae Jeon break; 264*5218cd10SNamjae Jeon } 265*5218cd10SNamjae Jeon 2661e9ea7e0SNamjae Jeon goto name_err; 2671e9ea7e0SNamjae Jeon } 268*5218cd10SNamjae Jeon } 2691e9ea7e0SNamjae Jeon ucs[o] = 0; 2701e9ea7e0SNamjae Jeon *outs = ucs; 2711e9ea7e0SNamjae Jeon return o; 2721e9ea7e0SNamjae Jeon } /* else if (!ucs) */ 273*5218cd10SNamjae Jeon ntfs_debug("Failed to allocate buffer for converted name from ntfs_name_cache."); 2741e9ea7e0SNamjae Jeon return -ENOMEM; 2751e9ea7e0SNamjae Jeon } /* else if (!ins) */ 2761e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Received NULL pointer."); 2771e9ea7e0SNamjae Jeon return -EINVAL; 2781e9ea7e0SNamjae Jeon name_err: 279*5218cd10SNamjae Jeon if (max_name_len > NTFS_MAX_NAME_LEN) 280*5218cd10SNamjae Jeon kvfree(ucs); 281*5218cd10SNamjae Jeon else 2821e9ea7e0SNamjae Jeon kmem_cache_free(ntfs_name_cache, ucs); 2831e9ea7e0SNamjae Jeon if (wc_len < 0) { 284*5218cd10SNamjae Jeon ntfs_debug("Name using character set %s contains characters that cannot be converted to Unicode.", 285*5218cd10SNamjae Jeon nls->charset); 2861e9ea7e0SNamjae Jeon i = -EILSEQ; 287*5218cd10SNamjae Jeon } else { 288*5218cd10SNamjae Jeon ntfs_debug("Name is too long (maximum length for a name on NTFS is %d Unicode characters.", 289*5218cd10SNamjae Jeon max_name_len); 2901e9ea7e0SNamjae Jeon i = -ENAMETOOLONG; 2911e9ea7e0SNamjae Jeon } 2921e9ea7e0SNamjae Jeon return i; 2931e9ea7e0SNamjae Jeon } 2941e9ea7e0SNamjae Jeon 295*5218cd10SNamjae Jeon /* 2961e9ea7e0SNamjae Jeon * ntfs_ucstonls - convert little endian Unicode string to NLS string 2971e9ea7e0SNamjae Jeon * @vol: ntfs volume which we are working with 2981e9ea7e0SNamjae Jeon * @ins: input Unicode string buffer 2991e9ea7e0SNamjae Jeon * @ins_len: length of input string in Unicode characters 3001e9ea7e0SNamjae Jeon * @outs: on return contains the (allocated) output NLS string buffer 3011e9ea7e0SNamjae Jeon * @outs_len: length of output string buffer in bytes 3021e9ea7e0SNamjae Jeon * 3031e9ea7e0SNamjae Jeon * Convert the input little endian, 2-byte Unicode string @ins, of length 3041e9ea7e0SNamjae Jeon * @ins_len into the string format dictated by the loaded NLS. 3051e9ea7e0SNamjae Jeon * 3061e9ea7e0SNamjae Jeon * If *@outs is NULL, this function allocates the string and the caller is 3071e9ea7e0SNamjae Jeon * responsible for calling kfree(*@outs); when finished with it. In this case 3081e9ea7e0SNamjae Jeon * @outs_len is ignored and can be 0. 3091e9ea7e0SNamjae Jeon * 3101e9ea7e0SNamjae Jeon * On success the function returns the number of bytes written to the output 3111e9ea7e0SNamjae Jeon * string *@outs (>= 0), not counting the terminating NULL byte. If the output 3121e9ea7e0SNamjae Jeon * string buffer was allocated, *@outs is set to it. 3131e9ea7e0SNamjae Jeon * 3141e9ea7e0SNamjae Jeon * On error, a negative number corresponding to the error code is returned. In 3151e9ea7e0SNamjae Jeon * that case the output string is not allocated. The contents of *@outs are 3161e9ea7e0SNamjae Jeon * then undefined. 3171e9ea7e0SNamjae Jeon * 3181e9ea7e0SNamjae Jeon * This might look a bit odd due to fast path optimization... 3191e9ea7e0SNamjae Jeon */ 320*5218cd10SNamjae Jeon int ntfs_ucstonls(const struct ntfs_volume *vol, const __le16 *ins, 3211e9ea7e0SNamjae Jeon const int ins_len, unsigned char **outs, int outs_len) 3221e9ea7e0SNamjae Jeon { 3231e9ea7e0SNamjae Jeon struct nls_table *nls = vol->nls_map; 3241e9ea7e0SNamjae Jeon unsigned char *ns; 3251e9ea7e0SNamjae Jeon int i, o, ns_len, wc; 3261e9ea7e0SNamjae Jeon 3271e9ea7e0SNamjae Jeon /* We don't trust outside sources. */ 3281e9ea7e0SNamjae Jeon if (ins) { 3291e9ea7e0SNamjae Jeon ns = *outs; 3301e9ea7e0SNamjae Jeon ns_len = outs_len; 3311e9ea7e0SNamjae Jeon if (ns && !ns_len) { 3321e9ea7e0SNamjae Jeon wc = -ENAMETOOLONG; 3331e9ea7e0SNamjae Jeon goto conversion_err; 3341e9ea7e0SNamjae Jeon } 3351e9ea7e0SNamjae Jeon if (!ns) { 3361e9ea7e0SNamjae Jeon ns_len = ins_len * NLS_MAX_CHARSET_SIZE; 3371e9ea7e0SNamjae Jeon ns = kmalloc(ns_len + 1, GFP_NOFS); 3381e9ea7e0SNamjae Jeon if (!ns) 3391e9ea7e0SNamjae Jeon goto mem_err_out; 3401e9ea7e0SNamjae Jeon } 341*5218cd10SNamjae Jeon 342*5218cd10SNamjae Jeon if (vol->nls_utf8) { 343*5218cd10SNamjae Jeon o = utf16s_to_utf8s((const wchar_t *)ins, ins_len, 344*5218cd10SNamjae Jeon UTF16_LITTLE_ENDIAN, ns, ns_len); 345*5218cd10SNamjae Jeon if (o >= ns_len) { 346*5218cd10SNamjae Jeon wc = -ENAMETOOLONG; 347*5218cd10SNamjae Jeon goto conversion_err; 348*5218cd10SNamjae Jeon } 349*5218cd10SNamjae Jeon goto done; 350*5218cd10SNamjae Jeon } 351*5218cd10SNamjae Jeon 3521e9ea7e0SNamjae Jeon for (i = o = 0; i < ins_len; i++) { 353*5218cd10SNamjae Jeon retry: 354*5218cd10SNamjae Jeon wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, 3551e9ea7e0SNamjae Jeon ns_len - o); 3561e9ea7e0SNamjae Jeon if (wc > 0) { 3571e9ea7e0SNamjae Jeon o += wc; 3581e9ea7e0SNamjae Jeon continue; 3591e9ea7e0SNamjae Jeon } else if (!wc) 3601e9ea7e0SNamjae Jeon break; 3611e9ea7e0SNamjae Jeon else if (wc == -ENAMETOOLONG && ns != *outs) { 3621e9ea7e0SNamjae Jeon unsigned char *tc; 3631e9ea7e0SNamjae Jeon /* Grow in multiples of 64 bytes. */ 3641e9ea7e0SNamjae Jeon tc = kmalloc((ns_len + 64) & 3651e9ea7e0SNamjae Jeon ~63, GFP_NOFS); 3661e9ea7e0SNamjae Jeon if (tc) { 3671e9ea7e0SNamjae Jeon memcpy(tc, ns, ns_len); 3681e9ea7e0SNamjae Jeon ns_len = ((ns_len + 64) & ~63) - 1; 3691e9ea7e0SNamjae Jeon kfree(ns); 3701e9ea7e0SNamjae Jeon ns = tc; 3711e9ea7e0SNamjae Jeon goto retry; 3721e9ea7e0SNamjae Jeon } /* No memory so goto conversion_error; */ 3731e9ea7e0SNamjae Jeon } /* wc < 0, real error. */ 3741e9ea7e0SNamjae Jeon goto conversion_err; 3751e9ea7e0SNamjae Jeon } 376*5218cd10SNamjae Jeon done: 3771e9ea7e0SNamjae Jeon ns[o] = 0; 3781e9ea7e0SNamjae Jeon *outs = ns; 3791e9ea7e0SNamjae Jeon return o; 3801e9ea7e0SNamjae Jeon } /* else (!ins) */ 3811e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Received NULL pointer."); 3821e9ea7e0SNamjae Jeon return -EINVAL; 3831e9ea7e0SNamjae Jeon conversion_err: 384*5218cd10SNamjae Jeon ntfs_error(vol->sb, 385*5218cd10SNamjae Jeon "Unicode name contains characters that cannot be converted to character set %s. You might want to try to use the mount option nls=utf8.", 386*5218cd10SNamjae Jeon nls->charset); 3871e9ea7e0SNamjae Jeon if (ns != *outs) 3881e9ea7e0SNamjae Jeon kfree(ns); 3891e9ea7e0SNamjae Jeon if (wc != -ENAMETOOLONG) 3901e9ea7e0SNamjae Jeon wc = -EILSEQ; 3911e9ea7e0SNamjae Jeon return wc; 3921e9ea7e0SNamjae Jeon mem_err_out: 3931e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to allocate name!"); 3941e9ea7e0SNamjae Jeon return -ENOMEM; 3951e9ea7e0SNamjae Jeon } 396*5218cd10SNamjae Jeon 397*5218cd10SNamjae Jeon /* 398*5218cd10SNamjae Jeon * ntfs_ucsnlen - determine the length of a little endian Unicode string 399*5218cd10SNamjae Jeon * @s: pointer to Unicode string 400*5218cd10SNamjae Jeon * @maxlen: maximum length of string @s 401*5218cd10SNamjae Jeon * 402*5218cd10SNamjae Jeon * Return the number of Unicode characters in the little endian Unicode 403*5218cd10SNamjae Jeon * string @s up to a maximum of maxlen Unicode characters, not including 404*5218cd10SNamjae Jeon * the terminating (__le16)'\0'. If there is no (__le16)'\0' between @s 405*5218cd10SNamjae Jeon * and @s + @maxlen, @maxlen is returned. 406*5218cd10SNamjae Jeon * 407*5218cd10SNamjae Jeon * This function never looks beyond @s + @maxlen. 408*5218cd10SNamjae Jeon */ 409*5218cd10SNamjae Jeon static u32 ntfs_ucsnlen(const __le16 *s, u32 maxlen) 410*5218cd10SNamjae Jeon { 411*5218cd10SNamjae Jeon u32 i; 412*5218cd10SNamjae Jeon 413*5218cd10SNamjae Jeon for (i = 0; i < maxlen; i++) { 414*5218cd10SNamjae Jeon if (!le16_to_cpu(s[i])) 415*5218cd10SNamjae Jeon break; 416*5218cd10SNamjae Jeon } 417*5218cd10SNamjae Jeon return i; 418*5218cd10SNamjae Jeon } 419*5218cd10SNamjae Jeon 420*5218cd10SNamjae Jeon /* 421*5218cd10SNamjae Jeon * ntfs_ucsndup - duplicate little endian Unicode string 422*5218cd10SNamjae Jeon * @s: pointer to Unicode string 423*5218cd10SNamjae Jeon * @maxlen: maximum length of string @s 424*5218cd10SNamjae Jeon * 425*5218cd10SNamjae Jeon * Return a pointer to a new little endian Unicode string which is a duplicate 426*5218cd10SNamjae Jeon * of the string s. Memory for the new string is obtained with kmalloc, 427*5218cd10SNamjae Jeon * and can be freed with kfree. 428*5218cd10SNamjae Jeon * 429*5218cd10SNamjae Jeon * A maximum of @maxlen Unicode characters are copied and a terminating 430*5218cd10SNamjae Jeon * (__le16)'\0' little endian Unicode character is added. 431*5218cd10SNamjae Jeon * 432*5218cd10SNamjae Jeon * This function never looks beyond @s + @maxlen. 433*5218cd10SNamjae Jeon * 434*5218cd10SNamjae Jeon * Return a pointer to the new little endian Unicode string on success and NULL 435*5218cd10SNamjae Jeon * on failure with errno set to the error code. 436*5218cd10SNamjae Jeon */ 437*5218cd10SNamjae Jeon __le16 *ntfs_ucsndup(const __le16 *s, u32 maxlen) 438*5218cd10SNamjae Jeon { 439*5218cd10SNamjae Jeon __le16 *dst; 440*5218cd10SNamjae Jeon u32 len; 441*5218cd10SNamjae Jeon 442*5218cd10SNamjae Jeon len = ntfs_ucsnlen(s, maxlen); 443*5218cd10SNamjae Jeon dst = kmalloc((len + 1) * sizeof(__le16), GFP_NOFS); 444*5218cd10SNamjae Jeon if (dst) { 445*5218cd10SNamjae Jeon memcpy(dst, s, len * sizeof(__le16)); 446*5218cd10SNamjae Jeon dst[len] = cpu_to_le16(L'\0'); 447*5218cd10SNamjae Jeon } 448*5218cd10SNamjae Jeon return dst; 449*5218cd10SNamjae Jeon } 450*5218cd10SNamjae Jeon 451*5218cd10SNamjae Jeon /* 452*5218cd10SNamjae Jeon * ntfs_names_are_equal - compare two Unicode names for equality 453*5218cd10SNamjae Jeon * @s1: name to compare to @s2 454*5218cd10SNamjae Jeon * @s1_len: length in Unicode characters of @s1 455*5218cd10SNamjae Jeon * @s2: name to compare to @s1 456*5218cd10SNamjae Jeon * @s2_len: length in Unicode characters of @s2 457*5218cd10SNamjae Jeon * @ic: ignore case bool 458*5218cd10SNamjae Jeon * @upcase: upcase table (only if @ic == IGNORE_CASE) 459*5218cd10SNamjae Jeon * @upcase_size: length in Unicode characters of @upcase (if present) 460*5218cd10SNamjae Jeon * 461*5218cd10SNamjae Jeon * Compare the names @s1 and @s2 and return TRUE (1) if the names are 462*5218cd10SNamjae Jeon * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE, 463*5218cd10SNamjae Jeon * the @upcase table is used to perform a case insensitive comparison. 464*5218cd10SNamjae Jeon */ 465*5218cd10SNamjae Jeon bool ntfs_names_are_equal(const __le16 *s1, size_t s1_len, 466*5218cd10SNamjae Jeon const __le16 *s2, size_t s2_len, 467*5218cd10SNamjae Jeon const u32 ic, 468*5218cd10SNamjae Jeon const __le16 *upcase, const u32 upcase_size) 469*5218cd10SNamjae Jeon { 470*5218cd10SNamjae Jeon if (s1_len != s2_len) 471*5218cd10SNamjae Jeon return false; 472*5218cd10SNamjae Jeon if (!s1_len) 473*5218cd10SNamjae Jeon return true; 474*5218cd10SNamjae Jeon if (ic == CASE_SENSITIVE) 475*5218cd10SNamjae Jeon return ntfs_ucsncmp(s1, s2, s1_len) ? false : true; 476*5218cd10SNamjae Jeon return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? false : true; 477*5218cd10SNamjae Jeon } 478