1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/hfsplus/unicode.c 4 * 5 * Copyright (C) 2001 6 * Brad Boyer (flar@allandria.com) 7 * (C) 2003 Ardis Technologies <roman@ardistech.com> 8 * 9 * Handler routines for unicode strings 10 */ 11 12 #include <linux/types.h> 13 #include <linux/nls.h> 14 #include "hfsplus_fs.h" 15 #include "hfsplus_raw.h" 16 17 /* Fold the case of a unicode char, given the 16 bit value */ 18 /* Returns folded char, or 0 if ignorable */ 19 static inline u16 case_fold(u16 c) 20 { 21 u16 tmp; 22 23 tmp = hfsplus_case_fold_table[c >> 8]; 24 if (tmp) 25 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; 26 else 27 tmp = c; 28 return tmp; 29 } 30 31 /* Compare unicode strings, return values like normal strcmp */ 32 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1, 33 const struct hfsplus_unistr *s2) 34 { 35 u16 len1, len2, c1, c2; 36 const hfsplus_unichr *p1, *p2; 37 38 len1 = be16_to_cpu(s1->length); 39 len2 = be16_to_cpu(s2->length); 40 p1 = s1->unicode; 41 p2 = s2->unicode; 42 43 if (len1 > HFSPLUS_MAX_STRLEN) { 44 len1 = HFSPLUS_MAX_STRLEN; 45 pr_err("invalid length %u has been corrected to %d\n", 46 be16_to_cpu(s1->length), len1); 47 } 48 49 if (len2 > HFSPLUS_MAX_STRLEN) { 50 len2 = HFSPLUS_MAX_STRLEN; 51 pr_err("invalid length %u has been corrected to %d\n", 52 be16_to_cpu(s2->length), len2); 53 } 54 55 while (1) { 56 c1 = c2 = 0; 57 58 while (len1 && !c1) { 59 c1 = case_fold(be16_to_cpu(*p1)); 60 p1++; 61 len1--; 62 } 63 while (len2 && !c2) { 64 c2 = case_fold(be16_to_cpu(*p2)); 65 p2++; 66 len2--; 67 } 68 69 if (c1 != c2) 70 return (c1 < c2) ? -1 : 1; 71 if (!c1 && !c2) 72 return 0; 73 } 74 } 75 76 /* Compare names as a sequence of 16-bit unsigned integers */ 77 int hfsplus_strcmp(const struct hfsplus_unistr *s1, 78 const struct hfsplus_unistr *s2) 79 { 80 u16 len1, len2, c1, c2; 81 const hfsplus_unichr *p1, *p2; 82 int len; 83 84 len1 = be16_to_cpu(s1->length); 85 len2 = be16_to_cpu(s2->length); 86 p1 = s1->unicode; 87 p2 = s2->unicode; 88 89 if (len1 > HFSPLUS_MAX_STRLEN) { 90 len1 = HFSPLUS_MAX_STRLEN; 91 pr_err("invalid length %u has been corrected to %d\n", 92 be16_to_cpu(s1->length), len1); 93 } 94 95 if (len2 > HFSPLUS_MAX_STRLEN) { 96 len2 = HFSPLUS_MAX_STRLEN; 97 pr_err("invalid length %u has been corrected to %d\n", 98 be16_to_cpu(s2->length), len2); 99 } 100 101 for (len = min(len1, len2); len > 0; len--) { 102 c1 = be16_to_cpu(*p1); 103 c2 = be16_to_cpu(*p2); 104 if (c1 != c2) 105 return c1 < c2 ? -1 : 1; 106 p1++; 107 p2++; 108 } 109 110 return len1 < len2 ? -1 : 111 len1 > len2 ? 1 : 0; 112 } 113 114 115 #define Hangul_SBase 0xac00 116 #define Hangul_LBase 0x1100 117 #define Hangul_VBase 0x1161 118 #define Hangul_TBase 0x11a7 119 #define Hangul_SCount 11172 120 #define Hangul_LCount 19 121 #define Hangul_VCount 21 122 #define Hangul_TCount 28 123 #define Hangul_NCount (Hangul_VCount * Hangul_TCount) 124 125 126 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) 127 { 128 int i, s, e; 129 130 s = 1; 131 e = p[1]; 132 if (!e || cc < p[s * 2] || cc > p[e * 2]) 133 return NULL; 134 do { 135 i = (s + e) / 2; 136 if (cc > p[i * 2]) 137 s = i + 1; 138 else if (cc < p[i * 2]) 139 e = i - 1; 140 else 141 return hfsplus_compose_table + p[i * 2 + 1]; 142 } while (s <= e); 143 return NULL; 144 } 145 146 static int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, 147 int max_len, char *astr, int *len_p) 148 { 149 const hfsplus_unichr *ip; 150 struct nls_table *nls = HFSPLUS_SB(sb)->nls; 151 u8 *op; 152 u16 cc, c0, c1; 153 u16 *ce1, *ce2; 154 int i, len, ustrlen, res, compose; 155 156 op = astr; 157 ip = ustr->unicode; 158 159 ustrlen = be16_to_cpu(ustr->length); 160 if (ustrlen > max_len) { 161 ustrlen = max_len; 162 pr_err("invalid length %u has been corrected to %d\n", 163 be16_to_cpu(ustr->length), ustrlen); 164 } 165 166 len = *len_p; 167 ce1 = NULL; 168 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 169 170 while (ustrlen > 0) { 171 c0 = be16_to_cpu(*ip++); 172 ustrlen--; 173 /* search for single decomposed char */ 174 if (likely(compose)) 175 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); 176 if (ce1) 177 cc = ce1[0]; 178 else 179 cc = 0; 180 if (cc) { 181 /* start of a possibly decomposed Hangul char */ 182 if (cc != 0xffff) 183 goto done; 184 if (!ustrlen) 185 goto same; 186 c1 = be16_to_cpu(*ip) - Hangul_VBase; 187 if (c1 < Hangul_VCount) { 188 /* compose the Hangul char */ 189 cc = (c0 - Hangul_LBase) * Hangul_VCount; 190 cc = (cc + c1) * Hangul_TCount; 191 cc += Hangul_SBase; 192 ip++; 193 ustrlen--; 194 if (!ustrlen) 195 goto done; 196 c1 = be16_to_cpu(*ip) - Hangul_TBase; 197 if (c1 > 0 && c1 < Hangul_TCount) { 198 cc += c1; 199 ip++; 200 ustrlen--; 201 } 202 goto done; 203 } 204 } 205 while (1) { 206 /* main loop for common case of not composed chars */ 207 if (!ustrlen) 208 goto same; 209 c1 = be16_to_cpu(*ip); 210 if (likely(compose)) 211 ce1 = hfsplus_compose_lookup( 212 hfsplus_compose_table, c1); 213 if (ce1) 214 break; 215 switch (c0) { 216 case 0: 217 c0 = 0x2400; 218 break; 219 case '/': 220 c0 = ':'; 221 break; 222 } 223 res = nls->uni2char(c0, op, len); 224 if (res < 0) { 225 if (res == -ENAMETOOLONG) 226 goto out; 227 *op = '?'; 228 res = 1; 229 } 230 op += res; 231 len -= res; 232 c0 = c1; 233 ip++; 234 ustrlen--; 235 } 236 ce2 = hfsplus_compose_lookup(ce1, c0); 237 if (ce2) { 238 i = 1; 239 while (i < ustrlen) { 240 ce1 = hfsplus_compose_lookup(ce2, 241 be16_to_cpu(ip[i])); 242 if (!ce1) 243 break; 244 i++; 245 ce2 = ce1; 246 } 247 cc = ce2[0]; 248 if (cc) { 249 ip += i; 250 ustrlen -= i; 251 goto done; 252 } 253 } 254 same: 255 switch (c0) { 256 case 0: 257 cc = 0x2400; 258 break; 259 case '/': 260 cc = ':'; 261 break; 262 default: 263 cc = c0; 264 } 265 done: 266 res = nls->uni2char(cc, op, len); 267 if (res < 0) { 268 if (res == -ENAMETOOLONG) 269 goto out; 270 *op = '?'; 271 res = 1; 272 } 273 op += res; 274 len -= res; 275 } 276 res = 0; 277 out: 278 *len_p = (char *)op - astr; 279 return res; 280 } 281 282 inline int hfsplus_uni2asc_str(struct super_block *sb, 283 const struct hfsplus_unistr *ustr, char *astr, 284 int *len_p) 285 { 286 return hfsplus_uni2asc(sb, ustr, HFSPLUS_MAX_STRLEN, astr, len_p); 287 } 288 289 inline int hfsplus_uni2asc_xattr_str(struct super_block *sb, 290 const struct hfsplus_attr_unistr *ustr, 291 char *astr, int *len_p) 292 { 293 return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr, 294 HFSPLUS_ATTR_MAX_STRLEN, astr, len_p); 295 } 296 297 /* 298 * Convert one or more ASCII characters into a single unicode character. 299 * Returns the number of ASCII characters corresponding to the unicode char. 300 */ 301 static inline int asc2unichar(struct super_block *sb, const char *astr, int len, 302 wchar_t *uc) 303 { 304 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); 305 if (size <= 0) { 306 *uc = '?'; 307 size = 1; 308 } 309 switch (*uc) { 310 case 0x2400: 311 *uc = 0; 312 break; 313 case ':': 314 *uc = '/'; 315 break; 316 } 317 return size; 318 } 319 320 /* Decomposes a non-Hangul unicode character. */ 321 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size) 322 { 323 int off; 324 325 off = hfsplus_decompose_table[(uc >> 12) & 0xf]; 326 if (off == 0 || off == 0xffff) 327 return NULL; 328 329 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)]; 330 if (!off) 331 return NULL; 332 333 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)]; 334 if (!off) 335 return NULL; 336 337 off = hfsplus_decompose_table[off + (uc & 0xf)]; 338 *size = off & 3; 339 if (*size == 0) 340 return NULL; 341 return hfsplus_decompose_table + (off / 4); 342 } 343 344 /* 345 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not 346 * precomposed Hangul, otherwise return the length of the decomposition. 347 * 348 * This function was adapted from sample code from the Unicode Standard 349 * Annex #15: Unicode Normalization Forms, version 3.2.0. 350 * 351 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed 352 * under the Terms of Use in http://www.unicode.org/copyright.html. 353 */ 354 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result) 355 { 356 int index; 357 int l, v, t; 358 359 index = uc - Hangul_SBase; 360 if (index < 0 || index >= Hangul_SCount) 361 return 0; 362 363 l = Hangul_LBase + index / Hangul_NCount; 364 v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount; 365 t = Hangul_TBase + index % Hangul_TCount; 366 367 result[0] = l; 368 result[1] = v; 369 if (t != Hangul_TBase) { 370 result[2] = t; 371 return 3; 372 } 373 return 2; 374 } 375 376 /* Decomposes a single unicode character. */ 377 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer) 378 { 379 u16 *result; 380 381 /* Hangul is handled separately */ 382 result = hangul_buffer; 383 *size = hfsplus_try_decompose_hangul(uc, result); 384 if (*size == 0) 385 result = hfsplus_decompose_nonhangul(uc, size); 386 return result; 387 } 388 389 int hfsplus_asc2uni(struct super_block *sb, 390 struct hfsplus_unistr *ustr, int max_unistr_len, 391 const char *astr, int len) 392 { 393 int size, dsize, decompose; 394 u16 *dstr, outlen = 0; 395 wchar_t c; 396 u16 dhangul[3]; 397 398 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 399 while (outlen < max_unistr_len && len > 0) { 400 size = asc2unichar(sb, astr, len, &c); 401 402 if (decompose) 403 dstr = decompose_unichar(c, &dsize, dhangul); 404 else 405 dstr = NULL; 406 if (dstr) { 407 if (outlen + dsize > max_unistr_len) 408 break; 409 do { 410 ustr->unicode[outlen++] = cpu_to_be16(*dstr++); 411 } while (--dsize > 0); 412 } else 413 ustr->unicode[outlen++] = cpu_to_be16(c); 414 415 astr += size; 416 len -= size; 417 } 418 ustr->length = cpu_to_be16(outlen); 419 if (len > 0) 420 return -ENAMETOOLONG; 421 return 0; 422 } 423 424 /* 425 * Hash a string to an integer as appropriate for the HFS+ filesystem. 426 * Composed unicode characters are decomposed and case-folding is performed 427 * if the appropriate bits are (un)set on the superblock. 428 */ 429 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str) 430 { 431 struct super_block *sb = dentry->d_sb; 432 const char *astr; 433 const u16 *dstr; 434 int casefold, decompose, size, len; 435 unsigned long hash; 436 wchar_t c; 437 u16 c2; 438 u16 dhangul[3]; 439 440 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 441 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 442 hash = init_name_hash(dentry); 443 astr = str->name; 444 len = str->len; 445 while (len > 0) { 446 int dsize; 447 size = asc2unichar(sb, astr, len, &c); 448 astr += size; 449 len -= size; 450 451 if (decompose) 452 dstr = decompose_unichar(c, &dsize, dhangul); 453 else 454 dstr = NULL; 455 if (dstr) { 456 do { 457 c2 = *dstr++; 458 if (casefold) 459 c2 = case_fold(c2); 460 if (!casefold || c2) 461 hash = partial_name_hash(c2, hash); 462 } while (--dsize > 0); 463 } else { 464 c2 = c; 465 if (casefold) 466 c2 = case_fold(c2); 467 if (!casefold || c2) 468 hash = partial_name_hash(c2, hash); 469 } 470 } 471 str->hash = end_name_hash(hash); 472 473 return 0; 474 } 475 476 /* 477 * Compare strings with HFS+ filename ordering. 478 * Composed unicode characters are decomposed and case-folding is performed 479 * if the appropriate bits are (un)set on the superblock. 480 */ 481 int hfsplus_compare_dentry(const struct dentry *dentry, 482 unsigned int len, const char *str, const struct qstr *name) 483 { 484 struct super_block *sb = dentry->d_sb; 485 int casefold, decompose, size; 486 int dsize1, dsize2, len1, len2; 487 const u16 *dstr1, *dstr2; 488 const char *astr1, *astr2; 489 u16 c1, c2; 490 wchar_t c; 491 u16 dhangul_1[3], dhangul_2[3]; 492 493 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 494 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 495 astr1 = str; 496 len1 = len; 497 astr2 = name->name; 498 len2 = name->len; 499 dsize1 = dsize2 = 0; 500 dstr1 = dstr2 = NULL; 501 502 while (len1 > 0 && len2 > 0) { 503 if (!dsize1) { 504 size = asc2unichar(sb, astr1, len1, &c); 505 astr1 += size; 506 len1 -= size; 507 508 if (decompose) 509 dstr1 = decompose_unichar(c, &dsize1, 510 dhangul_1); 511 if (!decompose || !dstr1) { 512 c1 = c; 513 dstr1 = &c1; 514 dsize1 = 1; 515 } 516 } 517 518 if (!dsize2) { 519 size = asc2unichar(sb, astr2, len2, &c); 520 astr2 += size; 521 len2 -= size; 522 523 if (decompose) 524 dstr2 = decompose_unichar(c, &dsize2, 525 dhangul_2); 526 if (!decompose || !dstr2) { 527 c2 = c; 528 dstr2 = &c2; 529 dsize2 = 1; 530 } 531 } 532 533 c1 = *dstr1; 534 c2 = *dstr2; 535 if (casefold) { 536 c1 = case_fold(c1); 537 if (!c1) { 538 dstr1++; 539 dsize1--; 540 continue; 541 } 542 c2 = case_fold(c2); 543 if (!c2) { 544 dstr2++; 545 dsize2--; 546 continue; 547 } 548 } 549 if (c1 < c2) 550 return -1; 551 else if (c1 > c2) 552 return 1; 553 554 dstr1++; 555 dsize1--; 556 dstr2++; 557 dsize2--; 558 } 559 560 if (len1 < len2) 561 return -1; 562 if (len1 > len2) 563 return 1; 564 return 0; 565 } 566