xref: /linux/fs/ntfs/unistr.c (revision cdd4dc3aebeab43a72ce0bc2b5bab6f0a80b97a5)
11e9ea7e0SNamjae Jeon // SPDX-License-Identifier: GPL-2.0-or-later
21e9ea7e0SNamjae Jeon /*
3*5218cd10SNamjae Jeon  * NTFS Unicode string handling.
41e9ea7e0SNamjae Jeon  *
51e9ea7e0SNamjae Jeon  * Copyright (c) 2001-2006 Anton Altaparmakov
61e9ea7e0SNamjae Jeon  */
71e9ea7e0SNamjae Jeon 
81e9ea7e0SNamjae Jeon #include "ntfs.h"
91e9ea7e0SNamjae Jeon 
101e9ea7e0SNamjae Jeon /*
111e9ea7e0SNamjae Jeon  * IMPORTANT
121e9ea7e0SNamjae Jeon  * =========
131e9ea7e0SNamjae Jeon  *
141e9ea7e0SNamjae Jeon  * All these routines assume that the Unicode characters are in little endian
151e9ea7e0SNamjae Jeon  * encoding inside the strings!!!
161e9ea7e0SNamjae Jeon  */
171e9ea7e0SNamjae Jeon 
181e9ea7e0SNamjae Jeon /*
191e9ea7e0SNamjae Jeon  * This is used by the name collation functions to quickly determine what
201e9ea7e0SNamjae Jeon  * characters are (in)valid.
211e9ea7e0SNamjae Jeon  */
221e9ea7e0SNamjae Jeon static const u8 legal_ansi_char_array[0x40] = {
231e9ea7e0SNamjae Jeon 	0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
241e9ea7e0SNamjae Jeon 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
251e9ea7e0SNamjae Jeon 
261e9ea7e0SNamjae Jeon 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
271e9ea7e0SNamjae Jeon 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
281e9ea7e0SNamjae Jeon 
291e9ea7e0SNamjae Jeon 	0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
301e9ea7e0SNamjae Jeon 	0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
311e9ea7e0SNamjae Jeon 
321e9ea7e0SNamjae Jeon 	0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
331e9ea7e0SNamjae Jeon 	0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
341e9ea7e0SNamjae Jeon };
351e9ea7e0SNamjae Jeon 
36*5218cd10SNamjae Jeon /*
371e9ea7e0SNamjae Jeon  * ntfs_are_names_equal - compare two Unicode names for equality
381e9ea7e0SNamjae Jeon  * @s1:			name to compare to @s2
391e9ea7e0SNamjae Jeon  * @s1_len:		length in Unicode characters of @s1
401e9ea7e0SNamjae Jeon  * @s2:			name to compare to @s1
411e9ea7e0SNamjae Jeon  * @s2_len:		length in Unicode characters of @s2
421e9ea7e0SNamjae Jeon  * @ic:			ignore case bool
431e9ea7e0SNamjae Jeon  * @upcase:		upcase table (only if @ic == IGNORE_CASE)
441e9ea7e0SNamjae Jeon  * @upcase_size:	length in Unicode characters of @upcase (if present)
451e9ea7e0SNamjae Jeon  *
461e9ea7e0SNamjae Jeon  * Compare the names @s1 and @s2 and return 'true' (1) if the names are
471e9ea7e0SNamjae Jeon  * identical, or 'false' (0) if they are not identical. If @ic is IGNORE_CASE,
481e9ea7e0SNamjae Jeon  * the @upcase table is used to performa a case insensitive comparison.
491e9ea7e0SNamjae Jeon  */
50*5218cd10SNamjae Jeon bool ntfs_are_names_equal(const __le16 *s1, size_t s1_len,
51*5218cd10SNamjae Jeon 		const __le16 *s2, size_t s2_len, const u32 ic,
52*5218cd10SNamjae Jeon 		const __le16 *upcase, const u32 upcase_size)
531e9ea7e0SNamjae Jeon {
541e9ea7e0SNamjae Jeon 	if (s1_len != s2_len)
551e9ea7e0SNamjae Jeon 		return false;
561e9ea7e0SNamjae Jeon 	if (ic == CASE_SENSITIVE)
571e9ea7e0SNamjae Jeon 		return !ntfs_ucsncmp(s1, s2, s1_len);
581e9ea7e0SNamjae Jeon 	return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size);
591e9ea7e0SNamjae Jeon }
601e9ea7e0SNamjae Jeon 
61*5218cd10SNamjae Jeon /*
621e9ea7e0SNamjae Jeon  * ntfs_collate_names - collate two Unicode names
631e9ea7e0SNamjae Jeon  * @name1:	first Unicode name to compare
64*5218cd10SNamjae Jeon  * @name1_len:	first Unicode name length
651e9ea7e0SNamjae Jeon  * @name2:	second Unicode name to compare
66*5218cd10SNamjae Jeon  * @name2_len:	second Unicode name length
671e9ea7e0SNamjae Jeon  * @err_val:	if @name1 contains an invalid character return this value
681e9ea7e0SNamjae Jeon  * @ic:		either CASE_SENSITIVE or IGNORE_CASE
691e9ea7e0SNamjae Jeon  * @upcase:	upcase table (ignored if @ic is CASE_SENSITIVE)
701e9ea7e0SNamjae Jeon  * @upcase_len:	upcase table size (ignored if @ic is CASE_SENSITIVE)
711e9ea7e0SNamjae Jeon  *
721e9ea7e0SNamjae Jeon  * ntfs_collate_names collates two Unicode names and returns:
731e9ea7e0SNamjae Jeon  *
741e9ea7e0SNamjae Jeon  *  -1 if the first name collates before the second one,
751e9ea7e0SNamjae Jeon  *   0 if the names match,
761e9ea7e0SNamjae Jeon  *   1 if the second name collates before the first one, or
771e9ea7e0SNamjae Jeon  * @err_val if an invalid character is found in @name1 during the comparison.
781e9ea7e0SNamjae Jeon  *
791e9ea7e0SNamjae Jeon  * The following characters are considered invalid: '"', '*', '<', '>' and '?'.
801e9ea7e0SNamjae Jeon  */
81*5218cd10SNamjae Jeon int ntfs_collate_names(const __le16 *name1, const u32 name1_len,
82*5218cd10SNamjae Jeon 		const __le16 *name2, const u32 name2_len,
83*5218cd10SNamjae Jeon 		const int err_val, const u32 ic,
84*5218cd10SNamjae Jeon 		const __le16 *upcase, const u32 upcase_len)
851e9ea7e0SNamjae Jeon {
861e9ea7e0SNamjae Jeon 	u32 cnt, min_len;
871e9ea7e0SNamjae Jeon 	u16 c1, c2;
881e9ea7e0SNamjae Jeon 
891e9ea7e0SNamjae Jeon 	min_len = name1_len;
901e9ea7e0SNamjae Jeon 	if (name1_len > name2_len)
911e9ea7e0SNamjae Jeon 		min_len = name2_len;
921e9ea7e0SNamjae Jeon 	for (cnt = 0; cnt < min_len; ++cnt) {
931e9ea7e0SNamjae Jeon 		c1 = le16_to_cpu(*name1++);
941e9ea7e0SNamjae Jeon 		c2 = le16_to_cpu(*name2++);
951e9ea7e0SNamjae Jeon 		if (ic) {
961e9ea7e0SNamjae Jeon 			if (c1 < upcase_len)
971e9ea7e0SNamjae Jeon 				c1 = le16_to_cpu(upcase[c1]);
981e9ea7e0SNamjae Jeon 			if (c2 < upcase_len)
991e9ea7e0SNamjae Jeon 				c2 = le16_to_cpu(upcase[c2]);
1001e9ea7e0SNamjae Jeon 		}
1011e9ea7e0SNamjae Jeon 		if (c1 < 64 && legal_ansi_char_array[c1] & 8)
1021e9ea7e0SNamjae Jeon 			return err_val;
1031e9ea7e0SNamjae Jeon 		if (c1 < c2)
1041e9ea7e0SNamjae Jeon 			return -1;
1051e9ea7e0SNamjae Jeon 		if (c1 > c2)
1061e9ea7e0SNamjae Jeon 			return 1;
1071e9ea7e0SNamjae Jeon 	}
1081e9ea7e0SNamjae Jeon 	if (name1_len < name2_len)
1091e9ea7e0SNamjae Jeon 		return -1;
1101e9ea7e0SNamjae Jeon 	if (name1_len == name2_len)
1111e9ea7e0SNamjae Jeon 		return 0;
1121e9ea7e0SNamjae Jeon 	/* name1_len > name2_len */
1131e9ea7e0SNamjae Jeon 	c1 = le16_to_cpu(*name1);
1141e9ea7e0SNamjae Jeon 	if (c1 < 64 && legal_ansi_char_array[c1] & 8)
1151e9ea7e0SNamjae Jeon 		return err_val;
1161e9ea7e0SNamjae Jeon 	return 1;
1171e9ea7e0SNamjae Jeon }
1181e9ea7e0SNamjae Jeon 
119*5218cd10SNamjae Jeon /*
1201e9ea7e0SNamjae Jeon  * ntfs_ucsncmp - compare two little endian Unicode strings
1211e9ea7e0SNamjae Jeon  * @s1:		first string
1221e9ea7e0SNamjae Jeon  * @s2:		second string
1231e9ea7e0SNamjae Jeon  * @n:		maximum unicode characters to compare
1241e9ea7e0SNamjae Jeon  *
1251e9ea7e0SNamjae Jeon  * Compare the first @n characters of the Unicode strings @s1 and @s2,
1261e9ea7e0SNamjae Jeon  * The strings in little endian format and appropriate le16_to_cpu()
1271e9ea7e0SNamjae Jeon  * conversion is performed on non-little endian machines.
1281e9ea7e0SNamjae Jeon  *
1291e9ea7e0SNamjae Jeon  * The function returns an integer less than, equal to, or greater than zero
1301e9ea7e0SNamjae Jeon  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
1311e9ea7e0SNamjae Jeon  * to be less than, to match, or be greater than @s2.
1321e9ea7e0SNamjae Jeon  */
133*5218cd10SNamjae Jeon int ntfs_ucsncmp(const __le16 *s1, const __le16 *s2, size_t n)
1341e9ea7e0SNamjae Jeon {
1351e9ea7e0SNamjae Jeon 	u16 c1, c2;
1361e9ea7e0SNamjae Jeon 	size_t i;
1371e9ea7e0SNamjae Jeon 
1381e9ea7e0SNamjae Jeon 	for (i = 0; i < n; ++i) {
1391e9ea7e0SNamjae Jeon 		c1 = le16_to_cpu(s1[i]);
1401e9ea7e0SNamjae Jeon 		c2 = le16_to_cpu(s2[i]);
1411e9ea7e0SNamjae Jeon 		if (c1 < c2)
1421e9ea7e0SNamjae Jeon 			return -1;
1431e9ea7e0SNamjae Jeon 		if (c1 > c2)
1441e9ea7e0SNamjae Jeon 			return 1;
1451e9ea7e0SNamjae Jeon 		if (!c1)
1461e9ea7e0SNamjae Jeon 			break;
1471e9ea7e0SNamjae Jeon 	}
1481e9ea7e0SNamjae Jeon 	return 0;
1491e9ea7e0SNamjae Jeon }
1501e9ea7e0SNamjae Jeon 
151*5218cd10SNamjae Jeon /*
1521e9ea7e0SNamjae Jeon  * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
1531e9ea7e0SNamjae Jeon  * @s1:			first string
1541e9ea7e0SNamjae Jeon  * @s2:			second string
1551e9ea7e0SNamjae Jeon  * @n:			maximum unicode characters to compare
1561e9ea7e0SNamjae Jeon  * @upcase:		upcase table
1571e9ea7e0SNamjae Jeon  * @upcase_size:	upcase table size in Unicode characters
1581e9ea7e0SNamjae Jeon  *
1591e9ea7e0SNamjae Jeon  * Compare the first @n characters of the Unicode strings @s1 and @s2,
1601e9ea7e0SNamjae Jeon  * ignoring case. The strings in little endian format and appropriate
1611e9ea7e0SNamjae Jeon  * le16_to_cpu() conversion is performed on non-little endian machines.
1621e9ea7e0SNamjae Jeon  *
1631e9ea7e0SNamjae Jeon  * Each character is uppercased using the @upcase table before the comparison.
1641e9ea7e0SNamjae Jeon  *
1651e9ea7e0SNamjae Jeon  * The function returns an integer less than, equal to, or greater than zero
1661e9ea7e0SNamjae Jeon  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
1671e9ea7e0SNamjae Jeon  * to be less than, to match, or be greater than @s2.
1681e9ea7e0SNamjae Jeon  */
169*5218cd10SNamjae Jeon int ntfs_ucsncasecmp(const __le16 *s1, const __le16 *s2, size_t n,
170*5218cd10SNamjae Jeon 		const __le16 *upcase, const u32 upcase_size)
1711e9ea7e0SNamjae Jeon {
1721e9ea7e0SNamjae Jeon 	size_t i;
1731e9ea7e0SNamjae Jeon 	u16 c1, c2;
1741e9ea7e0SNamjae Jeon 
1751e9ea7e0SNamjae Jeon 	for (i = 0; i < n; ++i) {
176*5218cd10SNamjae Jeon 		c1 = le16_to_cpu(s1[i]);
177*5218cd10SNamjae Jeon 		if (c1 < upcase_size)
1781e9ea7e0SNamjae Jeon 			c1 = le16_to_cpu(upcase[c1]);
179*5218cd10SNamjae Jeon 		c2 = le16_to_cpu(s2[i]);
180*5218cd10SNamjae Jeon 		if (c2 < upcase_size)
1811e9ea7e0SNamjae Jeon 			c2 = le16_to_cpu(upcase[c2]);
1821e9ea7e0SNamjae Jeon 		if (c1 < c2)
1831e9ea7e0SNamjae Jeon 			return -1;
1841e9ea7e0SNamjae Jeon 		if (c1 > c2)
1851e9ea7e0SNamjae Jeon 			return 1;
1861e9ea7e0SNamjae Jeon 		if (!c1)
1871e9ea7e0SNamjae Jeon 			break;
1881e9ea7e0SNamjae Jeon 	}
1891e9ea7e0SNamjae Jeon 	return 0;
1901e9ea7e0SNamjae Jeon }
1911e9ea7e0SNamjae Jeon 
192*5218cd10SNamjae Jeon int ntfs_file_compare_values(const struct file_name_attr *file_name_attr1,
193*5218cd10SNamjae Jeon 		const struct file_name_attr *file_name_attr2,
194*5218cd10SNamjae Jeon 		const int err_val, const u32 ic,
195*5218cd10SNamjae Jeon 		const __le16 *upcase, const u32 upcase_len)
1961e9ea7e0SNamjae Jeon {
197*5218cd10SNamjae Jeon 	return ntfs_collate_names((__le16 *)&file_name_attr1->file_name,
1981e9ea7e0SNamjae Jeon 			file_name_attr1->file_name_length,
199*5218cd10SNamjae Jeon 			(__le16 *)&file_name_attr2->file_name,
2001e9ea7e0SNamjae Jeon 			file_name_attr2->file_name_length,
2011e9ea7e0SNamjae Jeon 			err_val, ic, upcase, upcase_len);
2021e9ea7e0SNamjae Jeon }
2031e9ea7e0SNamjae Jeon 
204*5218cd10SNamjae Jeon /*
2051e9ea7e0SNamjae Jeon  * ntfs_nlstoucs - convert NLS string to little endian Unicode string
2061e9ea7e0SNamjae Jeon  * @vol:	ntfs volume which we are working with
2071e9ea7e0SNamjae Jeon  * @ins:	input NLS string buffer
2081e9ea7e0SNamjae Jeon  * @ins_len:	length of input string in bytes
2091e9ea7e0SNamjae Jeon  * @outs:	on return contains the allocated output Unicode string buffer
210*5218cd10SNamjae Jeon  * @max_name_len: maximum number of Unicode characters allowed for the output name
2111e9ea7e0SNamjae Jeon  *
2121e9ea7e0SNamjae Jeon  * Convert the input string @ins, which is in whatever format the loaded NLS
2131e9ea7e0SNamjae Jeon  * map dictates, into a little endian, 2-byte Unicode string.
2141e9ea7e0SNamjae Jeon  *
2151e9ea7e0SNamjae Jeon  * This function allocates the string and the caller is responsible for
2161e9ea7e0SNamjae Jeon  * calling kmem_cache_free(ntfs_name_cache, *@outs); when finished with it.
2171e9ea7e0SNamjae Jeon  *
2181e9ea7e0SNamjae Jeon  * On success the function returns the number of Unicode characters written to
2191e9ea7e0SNamjae Jeon  * the output string *@outs (>= 0), not counting the terminating Unicode NULL
2201e9ea7e0SNamjae Jeon  * character. *@outs is set to the allocated output string buffer.
2211e9ea7e0SNamjae Jeon  *
2221e9ea7e0SNamjae Jeon  * On error, a negative number corresponding to the error code is returned. In
2231e9ea7e0SNamjae Jeon  * that case the output string is not allocated. Both *@outs and *@outs_len
2241e9ea7e0SNamjae Jeon  * are then undefined.
2251e9ea7e0SNamjae Jeon  *
2261e9ea7e0SNamjae Jeon  * This might look a bit odd due to fast path optimization...
2271e9ea7e0SNamjae Jeon  */
228*5218cd10SNamjae Jeon int ntfs_nlstoucs(const struct ntfs_volume *vol, const char *ins,
229*5218cd10SNamjae Jeon 		const int ins_len, __le16 **outs, int max_name_len)
2301e9ea7e0SNamjae Jeon {
2311e9ea7e0SNamjae Jeon 	struct nls_table *nls = vol->nls_map;
232*5218cd10SNamjae Jeon 	__le16 *ucs;
2331e9ea7e0SNamjae Jeon 	wchar_t wc;
2341e9ea7e0SNamjae Jeon 	int i, o, wc_len;
2351e9ea7e0SNamjae Jeon 
2361e9ea7e0SNamjae Jeon 	/* We do not trust outside sources. */
2371e9ea7e0SNamjae Jeon 	if (likely(ins)) {
238*5218cd10SNamjae Jeon 		if (max_name_len > NTFS_MAX_NAME_LEN)
239*5218cd10SNamjae Jeon 			ucs = kvmalloc((max_name_len + 2) * sizeof(__le16),
240*5218cd10SNamjae Jeon 				       GFP_NOFS | __GFP_ZERO);
241*5218cd10SNamjae Jeon 		else
2421e9ea7e0SNamjae Jeon 			ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS);
2431e9ea7e0SNamjae Jeon 		if (likely(ucs)) {
244*5218cd10SNamjae Jeon 			if (vol->nls_utf8) {
245*5218cd10SNamjae Jeon 				o = utf8s_to_utf16s(ins, ins_len,
246*5218cd10SNamjae Jeon 						    UTF16_LITTLE_ENDIAN,
247*5218cd10SNamjae Jeon 						    (wchar_t *)ucs,
248*5218cd10SNamjae Jeon 						    max_name_len + 2);
249*5218cd10SNamjae Jeon 				if (o < 0 || o > max_name_len) {
250*5218cd10SNamjae Jeon 					wc_len = o;
251*5218cd10SNamjae Jeon 					goto name_err;
252*5218cd10SNamjae Jeon 				}
253*5218cd10SNamjae Jeon 			} else {
2541e9ea7e0SNamjae Jeon 				for (i = o = 0; i < ins_len; i += wc_len) {
2551e9ea7e0SNamjae Jeon 					wc_len = nls->char2uni(ins + i, ins_len - i,
2561e9ea7e0SNamjae Jeon 							&wc);
2571e9ea7e0SNamjae Jeon 					if (likely(wc_len >= 0 &&
258*5218cd10SNamjae Jeon 					    o < max_name_len)) {
2591e9ea7e0SNamjae Jeon 						if (likely(wc)) {
2601e9ea7e0SNamjae Jeon 							ucs[o++] = cpu_to_le16(wc);
2611e9ea7e0SNamjae Jeon 							continue;
2621e9ea7e0SNamjae Jeon 						} /* else if (!wc) */
2631e9ea7e0SNamjae Jeon 						break;
264*5218cd10SNamjae Jeon 					}
265*5218cd10SNamjae Jeon 
2661e9ea7e0SNamjae Jeon 					goto name_err;
2671e9ea7e0SNamjae Jeon 				}
268*5218cd10SNamjae Jeon 			}
2691e9ea7e0SNamjae Jeon 			ucs[o] = 0;
2701e9ea7e0SNamjae Jeon 			*outs = ucs;
2711e9ea7e0SNamjae Jeon 			return o;
2721e9ea7e0SNamjae Jeon 		} /* else if (!ucs) */
273*5218cd10SNamjae Jeon 		ntfs_debug("Failed to allocate buffer for converted name from ntfs_name_cache.");
2741e9ea7e0SNamjae Jeon 		return -ENOMEM;
2751e9ea7e0SNamjae Jeon 	} /* else if (!ins) */
2761e9ea7e0SNamjae Jeon 	ntfs_error(vol->sb, "Received NULL pointer.");
2771e9ea7e0SNamjae Jeon 	return -EINVAL;
2781e9ea7e0SNamjae Jeon name_err:
279*5218cd10SNamjae Jeon 	if (max_name_len > NTFS_MAX_NAME_LEN)
280*5218cd10SNamjae Jeon 		kvfree(ucs);
281*5218cd10SNamjae Jeon 	else
2821e9ea7e0SNamjae Jeon 		kmem_cache_free(ntfs_name_cache, ucs);
2831e9ea7e0SNamjae Jeon 	if (wc_len < 0) {
284*5218cd10SNamjae Jeon 		ntfs_debug("Name using character set %s contains characters that cannot be converted to Unicode.",
285*5218cd10SNamjae Jeon 				nls->charset);
2861e9ea7e0SNamjae Jeon 		i = -EILSEQ;
287*5218cd10SNamjae Jeon 	} else {
288*5218cd10SNamjae Jeon 		ntfs_debug("Name is too long (maximum length for a name on NTFS is %d Unicode characters.",
289*5218cd10SNamjae Jeon 				max_name_len);
2901e9ea7e0SNamjae Jeon 		i = -ENAMETOOLONG;
2911e9ea7e0SNamjae Jeon 	}
2921e9ea7e0SNamjae Jeon 	return i;
2931e9ea7e0SNamjae Jeon }
2941e9ea7e0SNamjae Jeon 
295*5218cd10SNamjae Jeon /*
2961e9ea7e0SNamjae Jeon  * ntfs_ucstonls - convert little endian Unicode string to NLS string
2971e9ea7e0SNamjae Jeon  * @vol:	ntfs volume which we are working with
2981e9ea7e0SNamjae Jeon  * @ins:	input Unicode string buffer
2991e9ea7e0SNamjae Jeon  * @ins_len:	length of input string in Unicode characters
3001e9ea7e0SNamjae Jeon  * @outs:	on return contains the (allocated) output NLS string buffer
3011e9ea7e0SNamjae Jeon  * @outs_len:	length of output string buffer in bytes
3021e9ea7e0SNamjae Jeon  *
3031e9ea7e0SNamjae Jeon  * Convert the input little endian, 2-byte Unicode string @ins, of length
3041e9ea7e0SNamjae Jeon  * @ins_len into the string format dictated by the loaded NLS.
3051e9ea7e0SNamjae Jeon  *
3061e9ea7e0SNamjae Jeon  * If *@outs is NULL, this function allocates the string and the caller is
3071e9ea7e0SNamjae Jeon  * responsible for calling kfree(*@outs); when finished with it. In this case
3081e9ea7e0SNamjae Jeon  * @outs_len is ignored and can be 0.
3091e9ea7e0SNamjae Jeon  *
3101e9ea7e0SNamjae Jeon  * On success the function returns the number of bytes written to the output
3111e9ea7e0SNamjae Jeon  * string *@outs (>= 0), not counting the terminating NULL byte. If the output
3121e9ea7e0SNamjae Jeon  * string buffer was allocated, *@outs is set to it.
3131e9ea7e0SNamjae Jeon  *
3141e9ea7e0SNamjae Jeon  * On error, a negative number corresponding to the error code is returned. In
3151e9ea7e0SNamjae Jeon  * that case the output string is not allocated. The contents of *@outs are
3161e9ea7e0SNamjae Jeon  * then undefined.
3171e9ea7e0SNamjae Jeon  *
3181e9ea7e0SNamjae Jeon  * This might look a bit odd due to fast path optimization...
3191e9ea7e0SNamjae Jeon  */
320*5218cd10SNamjae Jeon int ntfs_ucstonls(const struct ntfs_volume *vol, const __le16 *ins,
3211e9ea7e0SNamjae Jeon 		const int ins_len, unsigned char **outs, int outs_len)
3221e9ea7e0SNamjae Jeon {
3231e9ea7e0SNamjae Jeon 	struct nls_table *nls = vol->nls_map;
3241e9ea7e0SNamjae Jeon 	unsigned char *ns;
3251e9ea7e0SNamjae Jeon 	int i, o, ns_len, wc;
3261e9ea7e0SNamjae Jeon 
3271e9ea7e0SNamjae Jeon 	/* We don't trust outside sources. */
3281e9ea7e0SNamjae Jeon 	if (ins) {
3291e9ea7e0SNamjae Jeon 		ns = *outs;
3301e9ea7e0SNamjae Jeon 		ns_len = outs_len;
3311e9ea7e0SNamjae Jeon 		if (ns && !ns_len) {
3321e9ea7e0SNamjae Jeon 			wc = -ENAMETOOLONG;
3331e9ea7e0SNamjae Jeon 			goto conversion_err;
3341e9ea7e0SNamjae Jeon 		}
3351e9ea7e0SNamjae Jeon 		if (!ns) {
3361e9ea7e0SNamjae Jeon 			ns_len = ins_len * NLS_MAX_CHARSET_SIZE;
3371e9ea7e0SNamjae Jeon 			ns = kmalloc(ns_len + 1, GFP_NOFS);
3381e9ea7e0SNamjae Jeon 			if (!ns)
3391e9ea7e0SNamjae Jeon 				goto mem_err_out;
3401e9ea7e0SNamjae Jeon 		}
341*5218cd10SNamjae Jeon 
342*5218cd10SNamjae Jeon 		if (vol->nls_utf8) {
343*5218cd10SNamjae Jeon 			o = utf16s_to_utf8s((const wchar_t *)ins, ins_len,
344*5218cd10SNamjae Jeon 					UTF16_LITTLE_ENDIAN, ns, ns_len);
345*5218cd10SNamjae Jeon 			if (o >= ns_len) {
346*5218cd10SNamjae Jeon 				wc = -ENAMETOOLONG;
347*5218cd10SNamjae Jeon 				goto conversion_err;
348*5218cd10SNamjae Jeon 			}
349*5218cd10SNamjae Jeon 			goto done;
350*5218cd10SNamjae Jeon 		}
351*5218cd10SNamjae Jeon 
3521e9ea7e0SNamjae Jeon 		for (i = o = 0; i < ins_len; i++) {
353*5218cd10SNamjae Jeon retry:
354*5218cd10SNamjae Jeon 			wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
3551e9ea7e0SNamjae Jeon 					ns_len - o);
3561e9ea7e0SNamjae Jeon 			if (wc > 0) {
3571e9ea7e0SNamjae Jeon 				o += wc;
3581e9ea7e0SNamjae Jeon 				continue;
3591e9ea7e0SNamjae Jeon 			} else if (!wc)
3601e9ea7e0SNamjae Jeon 				break;
3611e9ea7e0SNamjae Jeon 			else if (wc == -ENAMETOOLONG && ns != *outs) {
3621e9ea7e0SNamjae Jeon 				unsigned char *tc;
3631e9ea7e0SNamjae Jeon 				/* Grow in multiples of 64 bytes. */
3641e9ea7e0SNamjae Jeon 				tc = kmalloc((ns_len + 64) &
3651e9ea7e0SNamjae Jeon 						~63, GFP_NOFS);
3661e9ea7e0SNamjae Jeon 				if (tc) {
3671e9ea7e0SNamjae Jeon 					memcpy(tc, ns, ns_len);
3681e9ea7e0SNamjae Jeon 					ns_len = ((ns_len + 64) & ~63) - 1;
3691e9ea7e0SNamjae Jeon 					kfree(ns);
3701e9ea7e0SNamjae Jeon 					ns = tc;
3711e9ea7e0SNamjae Jeon 					goto retry;
3721e9ea7e0SNamjae Jeon 				} /* No memory so goto conversion_error; */
3731e9ea7e0SNamjae Jeon 			} /* wc < 0, real error. */
3741e9ea7e0SNamjae Jeon 			goto conversion_err;
3751e9ea7e0SNamjae Jeon 		}
376*5218cd10SNamjae Jeon done:
3771e9ea7e0SNamjae Jeon 		ns[o] = 0;
3781e9ea7e0SNamjae Jeon 		*outs = ns;
3791e9ea7e0SNamjae Jeon 		return o;
3801e9ea7e0SNamjae Jeon 	} /* else (!ins) */
3811e9ea7e0SNamjae Jeon 	ntfs_error(vol->sb, "Received NULL pointer.");
3821e9ea7e0SNamjae Jeon 	return -EINVAL;
3831e9ea7e0SNamjae Jeon conversion_err:
384*5218cd10SNamjae Jeon 	ntfs_error(vol->sb,
385*5218cd10SNamjae Jeon 		"Unicode name contains characters that cannot be converted to character set %s.  You might want to try to use the mount option nls=utf8.",
386*5218cd10SNamjae Jeon 		nls->charset);
3871e9ea7e0SNamjae Jeon 	if (ns != *outs)
3881e9ea7e0SNamjae Jeon 		kfree(ns);
3891e9ea7e0SNamjae Jeon 	if (wc != -ENAMETOOLONG)
3901e9ea7e0SNamjae Jeon 		wc = -EILSEQ;
3911e9ea7e0SNamjae Jeon 	return wc;
3921e9ea7e0SNamjae Jeon mem_err_out:
3931e9ea7e0SNamjae Jeon 	ntfs_error(vol->sb, "Failed to allocate name!");
3941e9ea7e0SNamjae Jeon 	return -ENOMEM;
3951e9ea7e0SNamjae Jeon }
396*5218cd10SNamjae Jeon 
397*5218cd10SNamjae Jeon /*
398*5218cd10SNamjae Jeon  * ntfs_ucsnlen - determine the length of a little endian Unicode string
399*5218cd10SNamjae Jeon  * @s:		pointer to Unicode string
400*5218cd10SNamjae Jeon  * @maxlen:	maximum length of string @s
401*5218cd10SNamjae Jeon  *
402*5218cd10SNamjae Jeon  * Return the number of Unicode characters in the little endian Unicode
403*5218cd10SNamjae Jeon  * string @s up to a maximum of maxlen Unicode characters, not including
404*5218cd10SNamjae Jeon  * the terminating (__le16)'\0'. If there is no (__le16)'\0' between @s
405*5218cd10SNamjae Jeon  * and @s + @maxlen, @maxlen is returned.
406*5218cd10SNamjae Jeon  *
407*5218cd10SNamjae Jeon  * This function never looks beyond @s + @maxlen.
408*5218cd10SNamjae Jeon  */
409*5218cd10SNamjae Jeon static u32 ntfs_ucsnlen(const __le16 *s, u32 maxlen)
410*5218cd10SNamjae Jeon {
411*5218cd10SNamjae Jeon 	u32 i;
412*5218cd10SNamjae Jeon 
413*5218cd10SNamjae Jeon 	for (i = 0; i < maxlen; i++) {
414*5218cd10SNamjae Jeon 		if (!le16_to_cpu(s[i]))
415*5218cd10SNamjae Jeon 			break;
416*5218cd10SNamjae Jeon 	}
417*5218cd10SNamjae Jeon 	return i;
418*5218cd10SNamjae Jeon }
419*5218cd10SNamjae Jeon 
420*5218cd10SNamjae Jeon /*
421*5218cd10SNamjae Jeon  * ntfs_ucsndup - duplicate little endian Unicode string
422*5218cd10SNamjae Jeon  * @s:		pointer to Unicode string
423*5218cd10SNamjae Jeon  * @maxlen:	maximum length of string @s
424*5218cd10SNamjae Jeon  *
425*5218cd10SNamjae Jeon  * Return a pointer to a new little endian Unicode string which is a duplicate
426*5218cd10SNamjae Jeon  * of the string s.  Memory for the new string is obtained with kmalloc,
427*5218cd10SNamjae Jeon  * and can be freed with kfree.
428*5218cd10SNamjae Jeon  *
429*5218cd10SNamjae Jeon  * A maximum of @maxlen Unicode characters are copied and a terminating
430*5218cd10SNamjae Jeon  * (__le16)'\0' little endian Unicode character is added.
431*5218cd10SNamjae Jeon  *
432*5218cd10SNamjae Jeon  * This function never looks beyond @s + @maxlen.
433*5218cd10SNamjae Jeon  *
434*5218cd10SNamjae Jeon  * Return a pointer to the new little endian Unicode string on success and NULL
435*5218cd10SNamjae Jeon  * on failure with errno set to the error code.
436*5218cd10SNamjae Jeon  */
437*5218cd10SNamjae Jeon __le16 *ntfs_ucsndup(const __le16 *s, u32 maxlen)
438*5218cd10SNamjae Jeon {
439*5218cd10SNamjae Jeon 	__le16 *dst;
440*5218cd10SNamjae Jeon 	u32 len;
441*5218cd10SNamjae Jeon 
442*5218cd10SNamjae Jeon 	len = ntfs_ucsnlen(s, maxlen);
443*5218cd10SNamjae Jeon 	dst = kmalloc((len + 1) * sizeof(__le16), GFP_NOFS);
444*5218cd10SNamjae Jeon 	if (dst) {
445*5218cd10SNamjae Jeon 		memcpy(dst, s, len * sizeof(__le16));
446*5218cd10SNamjae Jeon 		dst[len] = cpu_to_le16(L'\0');
447*5218cd10SNamjae Jeon 	}
448*5218cd10SNamjae Jeon 	return dst;
449*5218cd10SNamjae Jeon }
450*5218cd10SNamjae Jeon 
451*5218cd10SNamjae Jeon /*
452*5218cd10SNamjae Jeon  * ntfs_names_are_equal - compare two Unicode names for equality
453*5218cd10SNamjae Jeon  * @s1:                 name to compare to @s2
454*5218cd10SNamjae Jeon  * @s1_len:             length in Unicode characters of @s1
455*5218cd10SNamjae Jeon  * @s2:                 name to compare to @s1
456*5218cd10SNamjae Jeon  * @s2_len:             length in Unicode characters of @s2
457*5218cd10SNamjae Jeon  * @ic:                 ignore case bool
458*5218cd10SNamjae Jeon  * @upcase:             upcase table (only if @ic == IGNORE_CASE)
459*5218cd10SNamjae Jeon  * @upcase_size:        length in Unicode characters of @upcase (if present)
460*5218cd10SNamjae Jeon  *
461*5218cd10SNamjae Jeon  * Compare the names @s1 and @s2 and return TRUE (1) if the names are
462*5218cd10SNamjae Jeon  * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE,
463*5218cd10SNamjae Jeon  * the @upcase table is used to perform a case insensitive comparison.
464*5218cd10SNamjae Jeon  */
465*5218cd10SNamjae Jeon bool ntfs_names_are_equal(const __le16 *s1, size_t s1_len,
466*5218cd10SNamjae Jeon 		const __le16 *s2, size_t s2_len,
467*5218cd10SNamjae Jeon 		const u32 ic,
468*5218cd10SNamjae Jeon 		const __le16 *upcase, const u32 upcase_size)
469*5218cd10SNamjae Jeon {
470*5218cd10SNamjae Jeon 	if (s1_len != s2_len)
471*5218cd10SNamjae Jeon 		return false;
472*5218cd10SNamjae Jeon 	if (!s1_len)
473*5218cd10SNamjae Jeon 		return true;
474*5218cd10SNamjae Jeon 	if (ic == CASE_SENSITIVE)
475*5218cd10SNamjae Jeon 		return ntfs_ucsncmp(s1, s2, s1_len) ? false : true;
476*5218cd10SNamjae Jeon 	return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? false : true;
477*5218cd10SNamjae Jeon }
478