1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/hfsplus/unicode.c 4 * 5 * Copyright (C) 2001 6 * Brad Boyer (flar@allandria.com) 7 * (C) 2003 Ardis Technologies <roman@ardistech.com> 8 * 9 * Handler routines for unicode strings 10 */ 11 12 #include <linux/types.h> 13 #include <linux/nls.h> 14 15 #include <kunit/visibility.h> 16 17 #include "hfsplus_fs.h" 18 #include "hfsplus_raw.h" 19 20 /* Fold the case of a unicode char, given the 16 bit value */ 21 /* Returns folded char, or 0 if ignorable */ 22 static inline u16 case_fold(u16 c) 23 { 24 u16 tmp; 25 26 tmp = hfsplus_case_fold_table[c >> 8]; 27 if (tmp) 28 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; 29 else 30 tmp = c; 31 return tmp; 32 } 33 34 /* Compare unicode strings, return values like normal strcmp */ 35 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1, 36 const struct hfsplus_unistr *s2) 37 { 38 u16 len1, len2, c1, c2; 39 const hfsplus_unichr *p1, *p2; 40 41 len1 = be16_to_cpu(s1->length); 42 len2 = be16_to_cpu(s2->length); 43 p1 = s1->unicode; 44 p2 = s2->unicode; 45 46 if (len1 > HFSPLUS_MAX_STRLEN) { 47 len1 = HFSPLUS_MAX_STRLEN; 48 pr_err("invalid length %u has been corrected to %d\n", 49 be16_to_cpu(s1->length), len1); 50 } 51 52 if (len2 > HFSPLUS_MAX_STRLEN) { 53 len2 = HFSPLUS_MAX_STRLEN; 54 pr_err("invalid length %u has been corrected to %d\n", 55 be16_to_cpu(s2->length), len2); 56 } 57 58 while (1) { 59 c1 = c2 = 0; 60 61 while (len1 && !c1) { 62 c1 = case_fold(be16_to_cpu(*p1)); 63 p1++; 64 len1--; 65 } 66 while (len2 && !c2) { 67 c2 = case_fold(be16_to_cpu(*p2)); 68 p2++; 69 len2--; 70 } 71 72 if (c1 != c2) 73 return (c1 < c2) ? -1 : 1; 74 if (!c1 && !c2) 75 return 0; 76 } 77 } 78 EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcasecmp); 79 80 /* Compare names as a sequence of 16-bit unsigned integers */ 81 int hfsplus_strcmp(const struct hfsplus_unistr *s1, 82 const struct hfsplus_unistr *s2) 83 { 84 u16 len1, len2, c1, c2; 85 const hfsplus_unichr *p1, *p2; 86 int len; 87 88 len1 = be16_to_cpu(s1->length); 89 len2 = be16_to_cpu(s2->length); 90 p1 = s1->unicode; 91 p2 = s2->unicode; 92 93 if (len1 > HFSPLUS_MAX_STRLEN) { 94 len1 = HFSPLUS_MAX_STRLEN; 95 pr_err("invalid length %u has been corrected to %d\n", 96 be16_to_cpu(s1->length), len1); 97 } 98 99 if (len2 > HFSPLUS_MAX_STRLEN) { 100 len2 = HFSPLUS_MAX_STRLEN; 101 pr_err("invalid length %u has been corrected to %d\n", 102 be16_to_cpu(s2->length), len2); 103 } 104 105 for (len = min(len1, len2); len > 0; len--) { 106 c1 = be16_to_cpu(*p1); 107 c2 = be16_to_cpu(*p2); 108 if (c1 != c2) 109 return c1 < c2 ? -1 : 1; 110 p1++; 111 p2++; 112 } 113 114 return len1 < len2 ? -1 : 115 len1 > len2 ? 1 : 0; 116 } 117 EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcmp); 118 119 #define Hangul_SBase 0xac00 120 #define Hangul_LBase 0x1100 121 #define Hangul_VBase 0x1161 122 #define Hangul_TBase 0x11a7 123 #define Hangul_SCount 11172 124 #define Hangul_LCount 19 125 #define Hangul_VCount 21 126 #define Hangul_TCount 28 127 #define Hangul_NCount (Hangul_VCount * Hangul_TCount) 128 129 130 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) 131 { 132 int i, s, e; 133 134 s = 1; 135 e = p[1]; 136 if (!e || cc < p[s * 2] || cc > p[e * 2]) 137 return NULL; 138 do { 139 i = (s + e) / 2; 140 if (cc > p[i * 2]) 141 s = i + 1; 142 else if (cc < p[i * 2]) 143 e = i - 1; 144 else 145 return hfsplus_compose_table + p[i * 2 + 1]; 146 } while (s <= e); 147 return NULL; 148 } 149 150 /* 151 * In HFS+, a filename can contain / because : is the separator. 152 * The slash is a valid filename character on macOS. 153 * But on Linux, / is the path separator and 154 * it cannot appear in a filename component. 155 * There's a parallel mapping for the NUL character (0 -> U+2400). 156 * NUL terminates strings in C/POSIX but is valid in HFS+ filenames. 157 */ 158 static inline 159 void hfsplus_mac2linux_compatibility_check(u16 symbol, u16 *conversion, 160 int name_type) 161 { 162 *conversion = symbol; 163 164 switch (name_type) { 165 case HFS_XATTR_NAME: 166 /* ignore conversion */ 167 return; 168 169 default: 170 /* continue logic */ 171 break; 172 } 173 174 switch (symbol) { 175 case 0: 176 *conversion = 0x2400; 177 break; 178 case '/': 179 *conversion = ':'; 180 break; 181 } 182 } 183 184 static int hfsplus_uni2asc(struct super_block *sb, 185 const struct hfsplus_unistr *ustr, 186 int max_len, char *astr, int *len_p, 187 int name_type) 188 { 189 const hfsplus_unichr *ip; 190 struct nls_table *nls = HFSPLUS_SB(sb)->nls; 191 u8 *op; 192 u16 cc, c0, c1; 193 u16 *ce1, *ce2; 194 int i, len, ustrlen, res, compose; 195 196 op = astr; 197 ip = ustr->unicode; 198 199 ustrlen = be16_to_cpu(ustr->length); 200 if (ustrlen > max_len) { 201 ustrlen = max_len; 202 pr_err("invalid length %u has been corrected to %d\n", 203 be16_to_cpu(ustr->length), ustrlen); 204 } 205 206 len = *len_p; 207 ce1 = NULL; 208 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 209 210 while (ustrlen > 0) { 211 c0 = be16_to_cpu(*ip++); 212 ustrlen--; 213 /* search for single decomposed char */ 214 if (likely(compose)) 215 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); 216 if (ce1) 217 cc = ce1[0]; 218 else 219 cc = 0; 220 if (cc) { 221 /* start of a possibly decomposed Hangul char */ 222 if (cc != 0xffff) 223 goto done; 224 if (!ustrlen) 225 goto same; 226 c1 = be16_to_cpu(*ip) - Hangul_VBase; 227 if (c1 < Hangul_VCount) { 228 /* compose the Hangul char */ 229 cc = (c0 - Hangul_LBase) * Hangul_VCount; 230 cc = (cc + c1) * Hangul_TCount; 231 cc += Hangul_SBase; 232 ip++; 233 ustrlen--; 234 if (!ustrlen) 235 goto done; 236 c1 = be16_to_cpu(*ip) - Hangul_TBase; 237 if (c1 > 0 && c1 < Hangul_TCount) { 238 cc += c1; 239 ip++; 240 ustrlen--; 241 } 242 goto done; 243 } 244 } 245 while (1) { 246 /* main loop for common case of not composed chars */ 247 if (!ustrlen) 248 goto same; 249 c1 = be16_to_cpu(*ip); 250 if (likely(compose)) 251 ce1 = hfsplus_compose_lookup( 252 hfsplus_compose_table, c1); 253 if (ce1) 254 break; 255 hfsplus_mac2linux_compatibility_check(c0, &c0, 256 name_type); 257 res = nls->uni2char(c0, op, len); 258 if (res < 0) { 259 if (res == -ENAMETOOLONG) 260 goto out; 261 *op = '?'; 262 res = 1; 263 } 264 op += res; 265 len -= res; 266 c0 = c1; 267 ip++; 268 ustrlen--; 269 } 270 ce2 = hfsplus_compose_lookup(ce1, c0); 271 if (ce2) { 272 i = 1; 273 while (i < ustrlen) { 274 ce1 = hfsplus_compose_lookup(ce2, 275 be16_to_cpu(ip[i])); 276 if (!ce1) 277 break; 278 i++; 279 ce2 = ce1; 280 } 281 cc = ce2[0]; 282 if (cc) { 283 ip += i; 284 ustrlen -= i; 285 goto done; 286 } 287 } 288 same: 289 hfsplus_mac2linux_compatibility_check(c0, &cc, 290 name_type); 291 done: 292 res = nls->uni2char(cc, op, len); 293 if (res < 0) { 294 if (res == -ENAMETOOLONG) 295 goto out; 296 *op = '?'; 297 res = 1; 298 } 299 op += res; 300 len -= res; 301 } 302 res = 0; 303 out: 304 *len_p = (char *)op - astr; 305 return res; 306 } 307 308 inline int hfsplus_uni2asc_str(struct super_block *sb, 309 const struct hfsplus_unistr *ustr, char *astr, 310 int *len_p) 311 { 312 return hfsplus_uni2asc(sb, 313 ustr, HFSPLUS_MAX_STRLEN, 314 astr, len_p, 315 HFS_REGULAR_NAME); 316 } 317 EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_str); 318 319 inline int hfsplus_uni2asc_xattr_str(struct super_block *sb, 320 const struct hfsplus_attr_unistr *ustr, 321 char *astr, int *len_p) 322 { 323 return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr, 324 HFSPLUS_ATTR_MAX_STRLEN, astr, len_p, 325 HFS_XATTR_NAME); 326 } 327 EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_xattr_str); 328 329 /* 330 * In HFS+, a filename can contain / because : is the separator. 331 * The slash is a valid filename character on macOS. 332 * But on Linux, / is the path separator and 333 * it cannot appear in a filename component. 334 * There's a parallel mapping for the NUL character (0 -> U+2400). 335 * NUL terminates strings in C/POSIX but is valid in HFS+ filenames. 336 */ 337 static inline 338 void hfsplus_linux2mac_compatibility_check(wchar_t *uc, int name_type) 339 { 340 switch (name_type) { 341 case HFS_XATTR_NAME: 342 /* ignore conversion */ 343 return; 344 345 default: 346 /* continue logic */ 347 break; 348 } 349 350 switch (*uc) { 351 case 0x2400: 352 *uc = 0; 353 break; 354 case ':': 355 *uc = '/'; 356 break; 357 } 358 } 359 360 /* 361 * Convert one or more ASCII characters into a single unicode character. 362 * Returns the number of ASCII characters corresponding to the unicode char. 363 */ 364 static inline int asc2unichar(struct super_block *sb, const char *astr, int len, 365 wchar_t *uc, int name_type) 366 { 367 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); 368 369 if (size <= 0) { 370 *uc = '?'; 371 size = 1; 372 } 373 374 hfsplus_linux2mac_compatibility_check(uc, name_type); 375 return size; 376 } 377 378 /* Decomposes a non-Hangul unicode character. */ 379 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size) 380 { 381 int off; 382 383 off = hfsplus_decompose_table[(uc >> 12) & 0xf]; 384 if (off == 0 || off == 0xffff) 385 return NULL; 386 387 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)]; 388 if (!off) 389 return NULL; 390 391 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)]; 392 if (!off) 393 return NULL; 394 395 off = hfsplus_decompose_table[off + (uc & 0xf)]; 396 *size = off & 3; 397 if (*size == 0) 398 return NULL; 399 return hfsplus_decompose_table + (off / 4); 400 } 401 402 /* 403 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not 404 * precomposed Hangul, otherwise return the length of the decomposition. 405 * 406 * This function was adapted from sample code from the Unicode Standard 407 * Annex #15: Unicode Normalization Forms, version 3.2.0. 408 * 409 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed 410 * under the Terms of Use in http://www.unicode.org/copyright.html. 411 */ 412 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result) 413 { 414 int index; 415 int l, v, t; 416 417 index = uc - Hangul_SBase; 418 if (index < 0 || index >= Hangul_SCount) 419 return 0; 420 421 l = Hangul_LBase + index / Hangul_NCount; 422 v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount; 423 t = Hangul_TBase + index % Hangul_TCount; 424 425 result[0] = l; 426 result[1] = v; 427 if (t != Hangul_TBase) { 428 result[2] = t; 429 return 3; 430 } 431 return 2; 432 } 433 434 /* Decomposes a single unicode character. */ 435 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer) 436 { 437 u16 *result; 438 439 /* Hangul is handled separately */ 440 result = hangul_buffer; 441 *size = hfsplus_try_decompose_hangul(uc, result); 442 if (*size == 0) 443 result = hfsplus_decompose_nonhangul(uc, size); 444 return result; 445 } 446 447 int hfsplus_asc2uni(struct super_block *sb, 448 struct hfsplus_unistr *ustr, int max_unistr_len, 449 const char *astr, int len, int name_type) 450 { 451 int size, dsize, decompose; 452 u16 *dstr, outlen = 0; 453 wchar_t c; 454 u16 dhangul[3]; 455 456 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 457 while (outlen < max_unistr_len && len > 0) { 458 size = asc2unichar(sb, astr, len, &c, name_type); 459 460 if (decompose) 461 dstr = decompose_unichar(c, &dsize, dhangul); 462 else 463 dstr = NULL; 464 if (dstr) { 465 if (outlen + dsize > max_unistr_len) 466 break; 467 do { 468 ustr->unicode[outlen++] = cpu_to_be16(*dstr++); 469 } while (--dsize > 0); 470 } else 471 ustr->unicode[outlen++] = cpu_to_be16(c); 472 473 astr += size; 474 len -= size; 475 } 476 ustr->length = cpu_to_be16(outlen); 477 if (len > 0) 478 return -ENAMETOOLONG; 479 return 0; 480 } 481 EXPORT_SYMBOL_IF_KUNIT(hfsplus_asc2uni); 482 483 /* 484 * Hash a string to an integer as appropriate for the HFS+ filesystem. 485 * Composed unicode characters are decomposed and case-folding is performed 486 * if the appropriate bits are (un)set on the superblock. 487 */ 488 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str) 489 { 490 struct super_block *sb = dentry->d_sb; 491 const char *astr; 492 const u16 *dstr; 493 int casefold, decompose, size, len; 494 unsigned long hash; 495 wchar_t c; 496 u16 c2; 497 u16 dhangul[3]; 498 499 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 500 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 501 hash = init_name_hash(dentry); 502 astr = str->name; 503 len = str->len; 504 while (len > 0) { 505 int dsize; 506 size = asc2unichar(sb, astr, len, &c, HFS_REGULAR_NAME); 507 astr += size; 508 len -= size; 509 510 if (decompose) 511 dstr = decompose_unichar(c, &dsize, dhangul); 512 else 513 dstr = NULL; 514 if (dstr) { 515 do { 516 c2 = *dstr++; 517 if (casefold) 518 c2 = case_fold(c2); 519 if (!casefold || c2) 520 hash = partial_name_hash(c2, hash); 521 } while (--dsize > 0); 522 } else { 523 c2 = c; 524 if (casefold) 525 c2 = case_fold(c2); 526 if (!casefold || c2) 527 hash = partial_name_hash(c2, hash); 528 } 529 } 530 str->hash = end_name_hash(hash); 531 532 return 0; 533 } 534 EXPORT_SYMBOL_IF_KUNIT(hfsplus_hash_dentry); 535 536 /* 537 * Compare strings with HFS+ filename ordering. 538 * Composed unicode characters are decomposed and case-folding is performed 539 * if the appropriate bits are (un)set on the superblock. 540 */ 541 int hfsplus_compare_dentry(const struct dentry *dentry, 542 unsigned int len, const char *str, const struct qstr *name) 543 { 544 struct super_block *sb = dentry->d_sb; 545 int casefold, decompose, size; 546 int dsize1, dsize2, len1, len2; 547 const u16 *dstr1, *dstr2; 548 const char *astr1, *astr2; 549 u16 c1, c2; 550 wchar_t c; 551 u16 dhangul_1[3], dhangul_2[3]; 552 553 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 554 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 555 astr1 = str; 556 len1 = len; 557 astr2 = name->name; 558 len2 = name->len; 559 dsize1 = dsize2 = 0; 560 dstr1 = dstr2 = NULL; 561 562 while (len1 > 0 && len2 > 0) { 563 if (!dsize1) { 564 size = asc2unichar(sb, astr1, len1, &c, 565 HFS_REGULAR_NAME); 566 astr1 += size; 567 len1 -= size; 568 569 if (decompose) 570 dstr1 = decompose_unichar(c, &dsize1, 571 dhangul_1); 572 if (!decompose || !dstr1) { 573 c1 = c; 574 dstr1 = &c1; 575 dsize1 = 1; 576 } 577 } 578 579 if (!dsize2) { 580 size = asc2unichar(sb, astr2, len2, &c, 581 HFS_REGULAR_NAME); 582 astr2 += size; 583 len2 -= size; 584 585 if (decompose) 586 dstr2 = decompose_unichar(c, &dsize2, 587 dhangul_2); 588 if (!decompose || !dstr2) { 589 c2 = c; 590 dstr2 = &c2; 591 dsize2 = 1; 592 } 593 } 594 595 c1 = *dstr1; 596 c2 = *dstr2; 597 if (casefold) { 598 c1 = case_fold(c1); 599 if (!c1) { 600 dstr1++; 601 dsize1--; 602 continue; 603 } 604 c2 = case_fold(c2); 605 if (!c2) { 606 dstr2++; 607 dsize2--; 608 continue; 609 } 610 } 611 if (c1 < c2) 612 return -1; 613 else if (c1 > c2) 614 return 1; 615 616 dstr1++; 617 dsize1--; 618 dstr2++; 619 dsize2--; 620 } 621 622 if (len1 < len2) 623 return -1; 624 if (len1 > len2) 625 return 1; 626 return 0; 627 } 628 EXPORT_SYMBOL_IF_KUNIT(hfsplus_compare_dentry); 629