1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NTFS Unicode string handling. 4 * 5 * Copyright (c) 2001-2006 Anton Altaparmakov 6 */ 7 8 #include "ntfs.h" 9 10 /* 11 * IMPORTANT 12 * ========= 13 * 14 * All these routines assume that the Unicode characters are in little endian 15 * encoding inside the strings!!! 16 */ 17 18 /* 19 * This is used by the name collation functions to quickly determine what 20 * characters are (in)valid. 21 */ 22 static const u8 legal_ansi_char_array[0x40] = { 23 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 24 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 25 26 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 27 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 28 29 0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17, 30 0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00, 31 32 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 33 0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18, 34 }; 35 36 /* 37 * ntfs_are_names_equal - compare two Unicode names for equality 38 * @s1: name to compare to @s2 39 * @s1_len: length in Unicode characters of @s1 40 * @s2: name to compare to @s1 41 * @s2_len: length in Unicode characters of @s2 42 * @ic: ignore case bool 43 * @upcase: upcase table (only if @ic == IGNORE_CASE) 44 * @upcase_size: length in Unicode characters of @upcase (if present) 45 * 46 * Compare the names @s1 and @s2 and return 'true' (1) if the names are 47 * identical, or 'false' (0) if they are not identical. If @ic is IGNORE_CASE, 48 * the @upcase table is used to performa a case insensitive comparison. 49 */ 50 bool ntfs_are_names_equal(const __le16 *s1, size_t s1_len, 51 const __le16 *s2, size_t s2_len, const u32 ic, 52 const __le16 *upcase, const u32 upcase_size) 53 { 54 if (s1_len != s2_len) 55 return false; 56 if (ic == CASE_SENSITIVE) 57 return !ntfs_ucsncmp(s1, s2, s1_len); 58 return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size); 59 } 60 61 /* 62 * ntfs_collate_names - collate two Unicode names 63 * @name1: first Unicode name to compare 64 * @name1_len: first Unicode name length 65 * @name2: second Unicode name to compare 66 * @name2_len: second Unicode name length 67 * @err_val: if @name1 contains an invalid character return this value 68 * @ic: either CASE_SENSITIVE or IGNORE_CASE 69 * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE) 70 * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE) 71 * 72 * ntfs_collate_names collates two Unicode names and returns: 73 * 74 * -1 if the first name collates before the second one, 75 * 0 if the names match, 76 * 1 if the second name collates before the first one, or 77 * @err_val if an invalid character is found in @name1 during the comparison. 78 * 79 * The following characters are considered invalid: '"', '*', '<', '>' and '?'. 80 */ 81 int ntfs_collate_names(const __le16 *name1, const u32 name1_len, 82 const __le16 *name2, const u32 name2_len, 83 const int err_val, const u32 ic, 84 const __le16 *upcase, const u32 upcase_len) 85 { 86 u32 cnt, min_len; 87 u16 c1, c2; 88 89 min_len = name1_len; 90 if (name1_len > name2_len) 91 min_len = name2_len; 92 for (cnt = 0; cnt < min_len; ++cnt) { 93 c1 = le16_to_cpu(*name1++); 94 c2 = le16_to_cpu(*name2++); 95 if (ic) { 96 if (c1 < upcase_len) 97 c1 = le16_to_cpu(upcase[c1]); 98 if (c2 < upcase_len) 99 c2 = le16_to_cpu(upcase[c2]); 100 } 101 if (c1 < 64 && legal_ansi_char_array[c1] & 8) 102 return err_val; 103 if (c1 < c2) 104 return -1; 105 if (c1 > c2) 106 return 1; 107 } 108 if (name1_len < name2_len) 109 return -1; 110 if (name1_len == name2_len) 111 return 0; 112 /* name1_len > name2_len */ 113 c1 = le16_to_cpu(*name1); 114 if (c1 < 64 && legal_ansi_char_array[c1] & 8) 115 return err_val; 116 return 1; 117 } 118 119 /* 120 * ntfs_ucsncmp - compare two little endian Unicode strings 121 * @s1: first string 122 * @s2: second string 123 * @n: maximum unicode characters to compare 124 * 125 * Compare the first @n characters of the Unicode strings @s1 and @s2, 126 * The strings in little endian format and appropriate le16_to_cpu() 127 * conversion is performed on non-little endian machines. 128 * 129 * The function returns an integer less than, equal to, or greater than zero 130 * if @s1 (or the first @n Unicode characters thereof) is found, respectively, 131 * to be less than, to match, or be greater than @s2. 132 */ 133 int ntfs_ucsncmp(const __le16 *s1, const __le16 *s2, size_t n) 134 { 135 u16 c1, c2; 136 size_t i; 137 138 for (i = 0; i < n; ++i) { 139 c1 = le16_to_cpu(s1[i]); 140 c2 = le16_to_cpu(s2[i]); 141 if (c1 < c2) 142 return -1; 143 if (c1 > c2) 144 return 1; 145 if (!c1) 146 break; 147 } 148 return 0; 149 } 150 151 /* 152 * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case 153 * @s1: first string 154 * @s2: second string 155 * @n: maximum unicode characters to compare 156 * @upcase: upcase table 157 * @upcase_size: upcase table size in Unicode characters 158 * 159 * Compare the first @n characters of the Unicode strings @s1 and @s2, 160 * ignoring case. The strings in little endian format and appropriate 161 * le16_to_cpu() conversion is performed on non-little endian machines. 162 * 163 * Each character is uppercased using the @upcase table before the comparison. 164 * 165 * The function returns an integer less than, equal to, or greater than zero 166 * if @s1 (or the first @n Unicode characters thereof) is found, respectively, 167 * to be less than, to match, or be greater than @s2. 168 */ 169 int ntfs_ucsncasecmp(const __le16 *s1, const __le16 *s2, size_t n, 170 const __le16 *upcase, const u32 upcase_size) 171 { 172 size_t i; 173 u16 c1, c2; 174 175 for (i = 0; i < n; ++i) { 176 c1 = le16_to_cpu(s1[i]); 177 if (c1 < upcase_size) 178 c1 = le16_to_cpu(upcase[c1]); 179 c2 = le16_to_cpu(s2[i]); 180 if (c2 < upcase_size) 181 c2 = le16_to_cpu(upcase[c2]); 182 if (c1 < c2) 183 return -1; 184 if (c1 > c2) 185 return 1; 186 if (!c1) 187 break; 188 } 189 return 0; 190 } 191 192 int ntfs_file_compare_values(const struct file_name_attr *file_name_attr1, 193 const struct file_name_attr *file_name_attr2, 194 const int err_val, const u32 ic, 195 const __le16 *upcase, const u32 upcase_len) 196 { 197 return ntfs_collate_names((__le16 *)&file_name_attr1->file_name, 198 file_name_attr1->file_name_length, 199 (__le16 *)&file_name_attr2->file_name, 200 file_name_attr2->file_name_length, 201 err_val, ic, upcase, upcase_len); 202 } 203 204 /* 205 * ntfs_nlstoucs - convert NLS string to little endian Unicode string 206 * @vol: ntfs volume which we are working with 207 * @ins: input NLS string buffer 208 * @ins_len: length of input string in bytes 209 * @outs: on return contains the allocated output Unicode string buffer 210 * @max_name_len: maximum number of Unicode characters allowed for the output name 211 * 212 * Convert the input string @ins, which is in whatever format the loaded NLS 213 * map dictates, into a little endian, 2-byte Unicode string. 214 * 215 * This function allocates the string and the caller is responsible for 216 * calling kmem_cache_free(ntfs_name_cache, *@outs); when finished with it. 217 * 218 * On success the function returns the number of Unicode characters written to 219 * the output string *@outs (>= 0), not counting the terminating Unicode NULL 220 * character. *@outs is set to the allocated output string buffer. 221 * 222 * On error, a negative number corresponding to the error code is returned. In 223 * that case the output string is not allocated. Both *@outs and *@outs_len 224 * are then undefined. 225 * 226 * This might look a bit odd due to fast path optimization... 227 */ 228 int ntfs_nlstoucs(const struct ntfs_volume *vol, const char *ins, 229 const int ins_len, __le16 **outs, int max_name_len) 230 { 231 struct nls_table *nls = vol->nls_map; 232 __le16 *ucs; 233 wchar_t wc; 234 int i, o, wc_len; 235 236 /* We do not trust outside sources. */ 237 if (likely(ins)) { 238 if (max_name_len > NTFS_MAX_NAME_LEN) 239 ucs = kvmalloc((max_name_len + 2) * sizeof(__le16), 240 GFP_NOFS | __GFP_ZERO); 241 else 242 ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS); 243 if (likely(ucs)) { 244 if (vol->nls_utf8) { 245 o = utf8s_to_utf16s(ins, ins_len, 246 UTF16_LITTLE_ENDIAN, 247 (wchar_t *)ucs, 248 max_name_len + 2); 249 if (o < 0 || o > max_name_len) { 250 wc_len = o; 251 goto name_err; 252 } 253 } else { 254 for (i = o = 0; i < ins_len; i += wc_len) { 255 wc_len = nls->char2uni(ins + i, ins_len - i, 256 &wc); 257 if (likely(wc_len >= 0 && 258 o < max_name_len)) { 259 if (likely(wc)) { 260 ucs[o++] = cpu_to_le16(wc); 261 continue; 262 } /* else if (!wc) */ 263 break; 264 } 265 266 goto name_err; 267 } 268 } 269 ucs[o] = 0; 270 *outs = ucs; 271 return o; 272 } /* else if (!ucs) */ 273 ntfs_debug("Failed to allocate buffer for converted name from ntfs_name_cache."); 274 return -ENOMEM; 275 } /* else if (!ins) */ 276 ntfs_error(vol->sb, "Received NULL pointer."); 277 return -EINVAL; 278 name_err: 279 if (max_name_len > NTFS_MAX_NAME_LEN) 280 kvfree(ucs); 281 else 282 kmem_cache_free(ntfs_name_cache, ucs); 283 if (wc_len < 0) { 284 ntfs_debug("Name using character set %s contains characters that cannot be converted to Unicode.", 285 nls->charset); 286 i = -EILSEQ; 287 } else { 288 ntfs_debug("Name is too long (maximum length for a name on NTFS is %d Unicode characters.", 289 max_name_len); 290 i = -ENAMETOOLONG; 291 } 292 return i; 293 } 294 295 /* 296 * ntfs_ucstonls - convert little endian Unicode string to NLS string 297 * @vol: ntfs volume which we are working with 298 * @ins: input Unicode string buffer 299 * @ins_len: length of input string in Unicode characters 300 * @outs: on return contains the (allocated) output NLS string buffer 301 * @outs_len: length of output string buffer in bytes 302 * 303 * Convert the input little endian, 2-byte Unicode string @ins, of length 304 * @ins_len into the string format dictated by the loaded NLS. 305 * 306 * If *@outs is NULL, this function allocates the string and the caller is 307 * responsible for calling kfree(*@outs); when finished with it. In this case 308 * @outs_len is ignored and can be 0. 309 * 310 * On success the function returns the number of bytes written to the output 311 * string *@outs (>= 0), not counting the terminating NULL byte. If the output 312 * string buffer was allocated, *@outs is set to it. 313 * 314 * On error, a negative number corresponding to the error code is returned. In 315 * that case the output string is not allocated. The contents of *@outs are 316 * then undefined. 317 * 318 * This might look a bit odd due to fast path optimization... 319 */ 320 int ntfs_ucstonls(const struct ntfs_volume *vol, const __le16 *ins, 321 const int ins_len, unsigned char **outs, int outs_len) 322 { 323 struct nls_table *nls = vol->nls_map; 324 unsigned char *ns; 325 int i, o, ns_len, wc; 326 327 /* We don't trust outside sources. */ 328 if (ins) { 329 ns = *outs; 330 ns_len = outs_len; 331 if (ns && !ns_len) { 332 wc = -ENAMETOOLONG; 333 goto conversion_err; 334 } 335 if (!ns) { 336 ns_len = ins_len * NLS_MAX_CHARSET_SIZE; 337 ns = kmalloc(ns_len + 1, GFP_NOFS); 338 if (!ns) 339 goto mem_err_out; 340 } 341 342 if (vol->nls_utf8) { 343 o = utf16s_to_utf8s((const wchar_t *)ins, ins_len, 344 UTF16_LITTLE_ENDIAN, ns, ns_len); 345 if (o >= ns_len) { 346 wc = -ENAMETOOLONG; 347 goto conversion_err; 348 } 349 goto done; 350 } 351 352 for (i = o = 0; i < ins_len; i++) { 353 retry: 354 wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, 355 ns_len - o); 356 if (wc > 0) { 357 o += wc; 358 continue; 359 } else if (!wc) 360 break; 361 else if (wc == -ENAMETOOLONG && ns != *outs) { 362 unsigned char *tc; 363 /* Grow in multiples of 64 bytes. */ 364 tc = kmalloc((ns_len + 64) & 365 ~63, GFP_NOFS); 366 if (tc) { 367 memcpy(tc, ns, ns_len); 368 ns_len = ((ns_len + 64) & ~63) - 1; 369 kfree(ns); 370 ns = tc; 371 goto retry; 372 } /* No memory so goto conversion_error; */ 373 } /* wc < 0, real error. */ 374 goto conversion_err; 375 } 376 done: 377 ns[o] = 0; 378 *outs = ns; 379 return o; 380 } /* else (!ins) */ 381 ntfs_error(vol->sb, "Received NULL pointer."); 382 return -EINVAL; 383 conversion_err: 384 ntfs_error(vol->sb, 385 "Unicode name contains characters that cannot be converted to character set %s. You might want to try to use the mount option nls=utf8.", 386 nls->charset); 387 if (ns != *outs) 388 kfree(ns); 389 if (wc != -ENAMETOOLONG) 390 wc = -EILSEQ; 391 return wc; 392 mem_err_out: 393 ntfs_error(vol->sb, "Failed to allocate name!"); 394 return -ENOMEM; 395 } 396 397 /* 398 * ntfs_ucsnlen - determine the length of a little endian Unicode string 399 * @s: pointer to Unicode string 400 * @maxlen: maximum length of string @s 401 * 402 * Return the number of Unicode characters in the little endian Unicode 403 * string @s up to a maximum of maxlen Unicode characters, not including 404 * the terminating (__le16)'\0'. If there is no (__le16)'\0' between @s 405 * and @s + @maxlen, @maxlen is returned. 406 * 407 * This function never looks beyond @s + @maxlen. 408 */ 409 static u32 ntfs_ucsnlen(const __le16 *s, u32 maxlen) 410 { 411 u32 i; 412 413 for (i = 0; i < maxlen; i++) { 414 if (!le16_to_cpu(s[i])) 415 break; 416 } 417 return i; 418 } 419 420 /* 421 * ntfs_ucsndup - duplicate little endian Unicode string 422 * @s: pointer to Unicode string 423 * @maxlen: maximum length of string @s 424 * 425 * Return a pointer to a new little endian Unicode string which is a duplicate 426 * of the string s. Memory for the new string is obtained with kmalloc, 427 * and can be freed with kfree. 428 * 429 * A maximum of @maxlen Unicode characters are copied and a terminating 430 * (__le16)'\0' little endian Unicode character is added. 431 * 432 * This function never looks beyond @s + @maxlen. 433 * 434 * Return a pointer to the new little endian Unicode string on success and NULL 435 * on failure with errno set to the error code. 436 */ 437 __le16 *ntfs_ucsndup(const __le16 *s, u32 maxlen) 438 { 439 __le16 *dst; 440 u32 len; 441 442 len = ntfs_ucsnlen(s, maxlen); 443 dst = kmalloc((len + 1) * sizeof(__le16), GFP_NOFS); 444 if (dst) { 445 memcpy(dst, s, len * sizeof(__le16)); 446 dst[len] = cpu_to_le16(L'\0'); 447 } 448 return dst; 449 } 450 451 /* 452 * ntfs_names_are_equal - compare two Unicode names for equality 453 * @s1: name to compare to @s2 454 * @s1_len: length in Unicode characters of @s1 455 * @s2: name to compare to @s1 456 * @s2_len: length in Unicode characters of @s2 457 * @ic: ignore case bool 458 * @upcase: upcase table (only if @ic == IGNORE_CASE) 459 * @upcase_size: length in Unicode characters of @upcase (if present) 460 * 461 * Compare the names @s1 and @s2 and return TRUE (1) if the names are 462 * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE, 463 * the @upcase table is used to perform a case insensitive comparison. 464 */ 465 bool ntfs_names_are_equal(const __le16 *s1, size_t s1_len, 466 const __le16 *s2, size_t s2_len, 467 const u32 ic, 468 const __le16 *upcase, const u32 upcase_size) 469 { 470 if (s1_len != s2_len) 471 return false; 472 if (!s1_len) 473 return true; 474 if (ic == CASE_SENSITIVE) 475 return ntfs_ucsncmp(s1, s2, s1_len) ? false : true; 476 return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? false : true; 477 } 478