1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/hfsplus/unicode.c 4 * 5 * Copyright (C) 2001 6 * Brad Boyer (flar@allandria.com) 7 * (C) 2003 Ardis Technologies <roman@ardistech.com> 8 * 9 * Handler routines for unicode strings 10 */ 11 12 #include <linux/types.h> 13 #include <linux/nls.h> 14 15 #include <kunit/visibility.h> 16 17 #include "hfsplus_fs.h" 18 #include "hfsplus_raw.h" 19 20 /* Fold the case of a unicode char, given the 16 bit value */ 21 /* Returns folded char, or 0 if ignorable */ 22 static inline u16 case_fold(u16 c) 23 { 24 u16 tmp; 25 26 tmp = hfsplus_case_fold_table[c >> 8]; 27 if (tmp) 28 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; 29 else 30 tmp = c; 31 return tmp; 32 } 33 34 /* Compare unicode strings, return values like normal strcmp */ 35 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1, 36 const struct hfsplus_unistr *s2) 37 { 38 u16 len1, len2, c1, c2; 39 const hfsplus_unichr *p1, *p2; 40 41 len1 = be16_to_cpu(s1->length); 42 len2 = be16_to_cpu(s2->length); 43 p1 = s1->unicode; 44 p2 = s2->unicode; 45 46 if (len1 > HFSPLUS_MAX_STRLEN) { 47 len1 = HFSPLUS_MAX_STRLEN; 48 pr_err("invalid length %u has been corrected to %d\n", 49 be16_to_cpu(s1->length), len1); 50 } 51 52 if (len2 > HFSPLUS_MAX_STRLEN) { 53 len2 = HFSPLUS_MAX_STRLEN; 54 pr_err("invalid length %u has been corrected to %d\n", 55 be16_to_cpu(s2->length), len2); 56 } 57 58 while (1) { 59 c1 = c2 = 0; 60 61 while (len1 && !c1) { 62 c1 = case_fold(be16_to_cpu(*p1)); 63 p1++; 64 len1--; 65 } 66 while (len2 && !c2) { 67 c2 = case_fold(be16_to_cpu(*p2)); 68 p2++; 69 len2--; 70 } 71 72 if (c1 != c2) 73 return (c1 < c2) ? -1 : 1; 74 if (!c1 && !c2) 75 return 0; 76 } 77 } 78 EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcasecmp); 79 80 /* Compare names as a sequence of 16-bit unsigned integers */ 81 int hfsplus_strcmp(const struct hfsplus_unistr *s1, 82 const struct hfsplus_unistr *s2) 83 { 84 u16 len1, len2, c1, c2; 85 const hfsplus_unichr *p1, *p2; 86 int len; 87 88 len1 = be16_to_cpu(s1->length); 89 len2 = be16_to_cpu(s2->length); 90 p1 = s1->unicode; 91 p2 = s2->unicode; 92 93 if (len1 > HFSPLUS_MAX_STRLEN) { 94 len1 = HFSPLUS_MAX_STRLEN; 95 pr_err("invalid length %u has been corrected to %d\n", 96 be16_to_cpu(s1->length), len1); 97 } 98 99 if (len2 > HFSPLUS_MAX_STRLEN) { 100 len2 = HFSPLUS_MAX_STRLEN; 101 pr_err("invalid length %u has been corrected to %d\n", 102 be16_to_cpu(s2->length), len2); 103 } 104 105 for (len = min(len1, len2); len > 0; len--) { 106 c1 = be16_to_cpu(*p1); 107 c2 = be16_to_cpu(*p2); 108 if (c1 != c2) 109 return c1 < c2 ? -1 : 1; 110 p1++; 111 p2++; 112 } 113 114 return len1 < len2 ? -1 : 115 len1 > len2 ? 1 : 0; 116 } 117 EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcmp); 118 119 #define Hangul_SBase 0xac00 120 #define Hangul_LBase 0x1100 121 #define Hangul_VBase 0x1161 122 #define Hangul_TBase 0x11a7 123 #define Hangul_SCount 11172 124 #define Hangul_LCount 19 125 #define Hangul_VCount 21 126 #define Hangul_TCount 28 127 #define Hangul_NCount (Hangul_VCount * Hangul_TCount) 128 129 130 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) 131 { 132 int i, s, e; 133 134 s = 1; 135 e = p[1]; 136 if (!e || cc < p[s * 2] || cc > p[e * 2]) 137 return NULL; 138 do { 139 i = (s + e) / 2; 140 if (cc > p[i * 2]) 141 s = i + 1; 142 else if (cc < p[i * 2]) 143 e = i - 1; 144 else 145 return hfsplus_compose_table + p[i * 2 + 1]; 146 } while (s <= e); 147 return NULL; 148 } 149 150 static int hfsplus_uni2asc(struct super_block *sb, 151 const struct hfsplus_unistr *ustr, 152 int max_len, char *astr, int *len_p) 153 { 154 const hfsplus_unichr *ip; 155 struct nls_table *nls = HFSPLUS_SB(sb)->nls; 156 u8 *op; 157 u16 cc, c0, c1; 158 u16 *ce1, *ce2; 159 int i, len, ustrlen, res, compose; 160 161 op = astr; 162 ip = ustr->unicode; 163 164 ustrlen = be16_to_cpu(ustr->length); 165 if (ustrlen > max_len) { 166 ustrlen = max_len; 167 pr_err("invalid length %u has been corrected to %d\n", 168 be16_to_cpu(ustr->length), ustrlen); 169 } 170 171 len = *len_p; 172 ce1 = NULL; 173 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 174 175 while (ustrlen > 0) { 176 c0 = be16_to_cpu(*ip++); 177 ustrlen--; 178 /* search for single decomposed char */ 179 if (likely(compose)) 180 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); 181 if (ce1) 182 cc = ce1[0]; 183 else 184 cc = 0; 185 if (cc) { 186 /* start of a possibly decomposed Hangul char */ 187 if (cc != 0xffff) 188 goto done; 189 if (!ustrlen) 190 goto same; 191 c1 = be16_to_cpu(*ip) - Hangul_VBase; 192 if (c1 < Hangul_VCount) { 193 /* compose the Hangul char */ 194 cc = (c0 - Hangul_LBase) * Hangul_VCount; 195 cc = (cc + c1) * Hangul_TCount; 196 cc += Hangul_SBase; 197 ip++; 198 ustrlen--; 199 if (!ustrlen) 200 goto done; 201 c1 = be16_to_cpu(*ip) - Hangul_TBase; 202 if (c1 > 0 && c1 < Hangul_TCount) { 203 cc += c1; 204 ip++; 205 ustrlen--; 206 } 207 goto done; 208 } 209 } 210 while (1) { 211 /* main loop for common case of not composed chars */ 212 if (!ustrlen) 213 goto same; 214 c1 = be16_to_cpu(*ip); 215 if (likely(compose)) 216 ce1 = hfsplus_compose_lookup( 217 hfsplus_compose_table, c1); 218 if (ce1) 219 break; 220 switch (c0) { 221 case 0: 222 c0 = 0x2400; 223 break; 224 case '/': 225 c0 = ':'; 226 break; 227 } 228 res = nls->uni2char(c0, op, len); 229 if (res < 0) { 230 if (res == -ENAMETOOLONG) 231 goto out; 232 *op = '?'; 233 res = 1; 234 } 235 op += res; 236 len -= res; 237 c0 = c1; 238 ip++; 239 ustrlen--; 240 } 241 ce2 = hfsplus_compose_lookup(ce1, c0); 242 if (ce2) { 243 i = 1; 244 while (i < ustrlen) { 245 ce1 = hfsplus_compose_lookup(ce2, 246 be16_to_cpu(ip[i])); 247 if (!ce1) 248 break; 249 i++; 250 ce2 = ce1; 251 } 252 cc = ce2[0]; 253 if (cc) { 254 ip += i; 255 ustrlen -= i; 256 goto done; 257 } 258 } 259 same: 260 switch (c0) { 261 case 0: 262 cc = 0x2400; 263 break; 264 case '/': 265 cc = ':'; 266 break; 267 default: 268 cc = c0; 269 } 270 done: 271 res = nls->uni2char(cc, op, len); 272 if (res < 0) { 273 if (res == -ENAMETOOLONG) 274 goto out; 275 *op = '?'; 276 res = 1; 277 } 278 op += res; 279 len -= res; 280 } 281 res = 0; 282 out: 283 *len_p = (char *)op - astr; 284 return res; 285 } 286 287 inline int hfsplus_uni2asc_str(struct super_block *sb, 288 const struct hfsplus_unistr *ustr, char *astr, 289 int *len_p) 290 { 291 return hfsplus_uni2asc(sb, ustr, HFSPLUS_MAX_STRLEN, astr, len_p); 292 } 293 EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_str); 294 295 inline int hfsplus_uni2asc_xattr_str(struct super_block *sb, 296 const struct hfsplus_attr_unistr *ustr, 297 char *astr, int *len_p) 298 { 299 return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr, 300 HFSPLUS_ATTR_MAX_STRLEN, astr, len_p); 301 } 302 EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_xattr_str); 303 304 /* 305 * Convert one or more ASCII characters into a single unicode character. 306 * Returns the number of ASCII characters corresponding to the unicode char. 307 */ 308 static inline int asc2unichar(struct super_block *sb, const char *astr, int len, 309 wchar_t *uc) 310 { 311 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); 312 if (size <= 0) { 313 *uc = '?'; 314 size = 1; 315 } 316 switch (*uc) { 317 case 0x2400: 318 *uc = 0; 319 break; 320 case ':': 321 *uc = '/'; 322 break; 323 } 324 return size; 325 } 326 327 /* Decomposes a non-Hangul unicode character. */ 328 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size) 329 { 330 int off; 331 332 off = hfsplus_decompose_table[(uc >> 12) & 0xf]; 333 if (off == 0 || off == 0xffff) 334 return NULL; 335 336 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)]; 337 if (!off) 338 return NULL; 339 340 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)]; 341 if (!off) 342 return NULL; 343 344 off = hfsplus_decompose_table[off + (uc & 0xf)]; 345 *size = off & 3; 346 if (*size == 0) 347 return NULL; 348 return hfsplus_decompose_table + (off / 4); 349 } 350 351 /* 352 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not 353 * precomposed Hangul, otherwise return the length of the decomposition. 354 * 355 * This function was adapted from sample code from the Unicode Standard 356 * Annex #15: Unicode Normalization Forms, version 3.2.0. 357 * 358 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed 359 * under the Terms of Use in http://www.unicode.org/copyright.html. 360 */ 361 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result) 362 { 363 int index; 364 int l, v, t; 365 366 index = uc - Hangul_SBase; 367 if (index < 0 || index >= Hangul_SCount) 368 return 0; 369 370 l = Hangul_LBase + index / Hangul_NCount; 371 v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount; 372 t = Hangul_TBase + index % Hangul_TCount; 373 374 result[0] = l; 375 result[1] = v; 376 if (t != Hangul_TBase) { 377 result[2] = t; 378 return 3; 379 } 380 return 2; 381 } 382 383 /* Decomposes a single unicode character. */ 384 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer) 385 { 386 u16 *result; 387 388 /* Hangul is handled separately */ 389 result = hangul_buffer; 390 *size = hfsplus_try_decompose_hangul(uc, result); 391 if (*size == 0) 392 result = hfsplus_decompose_nonhangul(uc, size); 393 return result; 394 } 395 396 int hfsplus_asc2uni(struct super_block *sb, 397 struct hfsplus_unistr *ustr, int max_unistr_len, 398 const char *astr, int len) 399 { 400 int size, dsize, decompose; 401 u16 *dstr, outlen = 0; 402 wchar_t c; 403 u16 dhangul[3]; 404 405 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 406 while (outlen < max_unistr_len && len > 0) { 407 size = asc2unichar(sb, astr, len, &c); 408 409 if (decompose) 410 dstr = decompose_unichar(c, &dsize, dhangul); 411 else 412 dstr = NULL; 413 if (dstr) { 414 if (outlen + dsize > max_unistr_len) 415 break; 416 do { 417 ustr->unicode[outlen++] = cpu_to_be16(*dstr++); 418 } while (--dsize > 0); 419 } else 420 ustr->unicode[outlen++] = cpu_to_be16(c); 421 422 astr += size; 423 len -= size; 424 } 425 ustr->length = cpu_to_be16(outlen); 426 if (len > 0) 427 return -ENAMETOOLONG; 428 return 0; 429 } 430 EXPORT_SYMBOL_IF_KUNIT(hfsplus_asc2uni); 431 432 /* 433 * Hash a string to an integer as appropriate for the HFS+ filesystem. 434 * Composed unicode characters are decomposed and case-folding is performed 435 * if the appropriate bits are (un)set on the superblock. 436 */ 437 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str) 438 { 439 struct super_block *sb = dentry->d_sb; 440 const char *astr; 441 const u16 *dstr; 442 int casefold, decompose, size, len; 443 unsigned long hash; 444 wchar_t c; 445 u16 c2; 446 u16 dhangul[3]; 447 448 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 449 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 450 hash = init_name_hash(dentry); 451 astr = str->name; 452 len = str->len; 453 while (len > 0) { 454 int dsize; 455 size = asc2unichar(sb, astr, len, &c); 456 astr += size; 457 len -= size; 458 459 if (decompose) 460 dstr = decompose_unichar(c, &dsize, dhangul); 461 else 462 dstr = NULL; 463 if (dstr) { 464 do { 465 c2 = *dstr++; 466 if (casefold) 467 c2 = case_fold(c2); 468 if (!casefold || c2) 469 hash = partial_name_hash(c2, hash); 470 } while (--dsize > 0); 471 } else { 472 c2 = c; 473 if (casefold) 474 c2 = case_fold(c2); 475 if (!casefold || c2) 476 hash = partial_name_hash(c2, hash); 477 } 478 } 479 str->hash = end_name_hash(hash); 480 481 return 0; 482 } 483 EXPORT_SYMBOL_IF_KUNIT(hfsplus_hash_dentry); 484 485 /* 486 * Compare strings with HFS+ filename ordering. 487 * Composed unicode characters are decomposed and case-folding is performed 488 * if the appropriate bits are (un)set on the superblock. 489 */ 490 int hfsplus_compare_dentry(const struct dentry *dentry, 491 unsigned int len, const char *str, const struct qstr *name) 492 { 493 struct super_block *sb = dentry->d_sb; 494 int casefold, decompose, size; 495 int dsize1, dsize2, len1, len2; 496 const u16 *dstr1, *dstr2; 497 const char *astr1, *astr2; 498 u16 c1, c2; 499 wchar_t c; 500 u16 dhangul_1[3], dhangul_2[3]; 501 502 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 503 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 504 astr1 = str; 505 len1 = len; 506 astr2 = name->name; 507 len2 = name->len; 508 dsize1 = dsize2 = 0; 509 dstr1 = dstr2 = NULL; 510 511 while (len1 > 0 && len2 > 0) { 512 if (!dsize1) { 513 size = asc2unichar(sb, astr1, len1, &c); 514 astr1 += size; 515 len1 -= size; 516 517 if (decompose) 518 dstr1 = decompose_unichar(c, &dsize1, 519 dhangul_1); 520 if (!decompose || !dstr1) { 521 c1 = c; 522 dstr1 = &c1; 523 dsize1 = 1; 524 } 525 } 526 527 if (!dsize2) { 528 size = asc2unichar(sb, astr2, len2, &c); 529 astr2 += size; 530 len2 -= size; 531 532 if (decompose) 533 dstr2 = decompose_unichar(c, &dsize2, 534 dhangul_2); 535 if (!decompose || !dstr2) { 536 c2 = c; 537 dstr2 = &c2; 538 dsize2 = 1; 539 } 540 } 541 542 c1 = *dstr1; 543 c2 = *dstr2; 544 if (casefold) { 545 c1 = case_fold(c1); 546 if (!c1) { 547 dstr1++; 548 dsize1--; 549 continue; 550 } 551 c2 = case_fold(c2); 552 if (!c2) { 553 dstr2++; 554 dsize2--; 555 continue; 556 } 557 } 558 if (c1 < c2) 559 return -1; 560 else if (c1 > c2) 561 return 1; 562 563 dstr1++; 564 dsize1--; 565 dstr2++; 566 dsize2--; 567 } 568 569 if (len1 < len2) 570 return -1; 571 if (len1 > len2) 572 return 1; 573 return 0; 574 } 575 EXPORT_SYMBOL_IF_KUNIT(hfsplus_compare_dentry); 576