1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * linux/fs/hfsplus/unicode.c
4 *
5 * Copyright (C) 2001
6 * Brad Boyer (flar@allandria.com)
7 * (C) 2003 Ardis Technologies <roman@ardistech.com>
8 *
9 * Handler routines for unicode strings
10 */
11
12 #include <linux/types.h>
13 #include <linux/nls.h>
14 #include "hfsplus_fs.h"
15 #include "hfsplus_raw.h"
16
17 /* Fold the case of a unicode char, given the 16 bit value */
18 /* Returns folded char, or 0 if ignorable */
case_fold(u16 c)19 static inline u16 case_fold(u16 c)
20 {
21 u16 tmp;
22
23 tmp = hfsplus_case_fold_table[c >> 8];
24 if (tmp)
25 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
26 else
27 tmp = c;
28 return tmp;
29 }
30
31 /* Compare unicode strings, return values like normal strcmp */
hfsplus_strcasecmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)32 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
33 const struct hfsplus_unistr *s2)
34 {
35 u16 len1, len2, c1, c2;
36 const hfsplus_unichr *p1, *p2;
37
38 len1 = be16_to_cpu(s1->length);
39 len2 = be16_to_cpu(s2->length);
40 p1 = s1->unicode;
41 p2 = s2->unicode;
42
43 if (len1 > HFSPLUS_MAX_STRLEN) {
44 len1 = HFSPLUS_MAX_STRLEN;
45 pr_err("invalid length %u has been corrected to %d\n",
46 be16_to_cpu(s1->length), len1);
47 }
48
49 if (len2 > HFSPLUS_MAX_STRLEN) {
50 len2 = HFSPLUS_MAX_STRLEN;
51 pr_err("invalid length %u has been corrected to %d\n",
52 be16_to_cpu(s2->length), len2);
53 }
54
55 while (1) {
56 c1 = c2 = 0;
57
58 while (len1 && !c1) {
59 c1 = case_fold(be16_to_cpu(*p1));
60 p1++;
61 len1--;
62 }
63 while (len2 && !c2) {
64 c2 = case_fold(be16_to_cpu(*p2));
65 p2++;
66 len2--;
67 }
68
69 if (c1 != c2)
70 return (c1 < c2) ? -1 : 1;
71 if (!c1 && !c2)
72 return 0;
73 }
74 }
75
76 /* Compare names as a sequence of 16-bit unsigned integers */
hfsplus_strcmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)77 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
78 const struct hfsplus_unistr *s2)
79 {
80 u16 len1, len2, c1, c2;
81 const hfsplus_unichr *p1, *p2;
82 int len;
83
84 len1 = be16_to_cpu(s1->length);
85 len2 = be16_to_cpu(s2->length);
86 p1 = s1->unicode;
87 p2 = s2->unicode;
88
89 if (len1 > HFSPLUS_MAX_STRLEN) {
90 len1 = HFSPLUS_MAX_STRLEN;
91 pr_err("invalid length %u has been corrected to %d\n",
92 be16_to_cpu(s1->length), len1);
93 }
94
95 if (len2 > HFSPLUS_MAX_STRLEN) {
96 len2 = HFSPLUS_MAX_STRLEN;
97 pr_err("invalid length %u has been corrected to %d\n",
98 be16_to_cpu(s2->length), len2);
99 }
100
101 for (len = min(len1, len2); len > 0; len--) {
102 c1 = be16_to_cpu(*p1);
103 c2 = be16_to_cpu(*p2);
104 if (c1 != c2)
105 return c1 < c2 ? -1 : 1;
106 p1++;
107 p2++;
108 }
109
110 return len1 < len2 ? -1 :
111 len1 > len2 ? 1 : 0;
112 }
113
114
115 #define Hangul_SBase 0xac00
116 #define Hangul_LBase 0x1100
117 #define Hangul_VBase 0x1161
118 #define Hangul_TBase 0x11a7
119 #define Hangul_SCount 11172
120 #define Hangul_LCount 19
121 #define Hangul_VCount 21
122 #define Hangul_TCount 28
123 #define Hangul_NCount (Hangul_VCount * Hangul_TCount)
124
125
hfsplus_compose_lookup(u16 * p,u16 cc)126 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
127 {
128 int i, s, e;
129
130 s = 1;
131 e = p[1];
132 if (!e || cc < p[s * 2] || cc > p[e * 2])
133 return NULL;
134 do {
135 i = (s + e) / 2;
136 if (cc > p[i * 2])
137 s = i + 1;
138 else if (cc < p[i * 2])
139 e = i - 1;
140 else
141 return hfsplus_compose_table + p[i * 2 + 1];
142 } while (s <= e);
143 return NULL;
144 }
145
hfsplus_uni2asc(struct super_block * sb,const struct hfsplus_unistr * ustr,int max_len,char * astr,int * len_p)146 static int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr,
147 int max_len, char *astr, int *len_p)
148 {
149 const hfsplus_unichr *ip;
150 struct nls_table *nls = HFSPLUS_SB(sb)->nls;
151 u8 *op;
152 u16 cc, c0, c1;
153 u16 *ce1, *ce2;
154 int i, len, ustrlen, res, compose;
155
156 op = astr;
157 ip = ustr->unicode;
158
159 ustrlen = be16_to_cpu(ustr->length);
160 if (ustrlen > max_len) {
161 ustrlen = max_len;
162 pr_err("invalid length %u has been corrected to %d\n",
163 be16_to_cpu(ustr->length), ustrlen);
164 }
165
166 len = *len_p;
167 ce1 = NULL;
168 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
169
170 while (ustrlen > 0) {
171 c0 = be16_to_cpu(*ip++);
172 ustrlen--;
173 /* search for single decomposed char */
174 if (likely(compose))
175 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
176 if (ce1)
177 cc = ce1[0];
178 else
179 cc = 0;
180 if (cc) {
181 /* start of a possibly decomposed Hangul char */
182 if (cc != 0xffff)
183 goto done;
184 if (!ustrlen)
185 goto same;
186 c1 = be16_to_cpu(*ip) - Hangul_VBase;
187 if (c1 < Hangul_VCount) {
188 /* compose the Hangul char */
189 cc = (c0 - Hangul_LBase) * Hangul_VCount;
190 cc = (cc + c1) * Hangul_TCount;
191 cc += Hangul_SBase;
192 ip++;
193 ustrlen--;
194 if (!ustrlen)
195 goto done;
196 c1 = be16_to_cpu(*ip) - Hangul_TBase;
197 if (c1 > 0 && c1 < Hangul_TCount) {
198 cc += c1;
199 ip++;
200 ustrlen--;
201 }
202 goto done;
203 }
204 }
205 while (1) {
206 /* main loop for common case of not composed chars */
207 if (!ustrlen)
208 goto same;
209 c1 = be16_to_cpu(*ip);
210 if (likely(compose))
211 ce1 = hfsplus_compose_lookup(
212 hfsplus_compose_table, c1);
213 if (ce1)
214 break;
215 switch (c0) {
216 case 0:
217 c0 = 0x2400;
218 break;
219 case '/':
220 c0 = ':';
221 break;
222 }
223 res = nls->uni2char(c0, op, len);
224 if (res < 0) {
225 if (res == -ENAMETOOLONG)
226 goto out;
227 *op = '?';
228 res = 1;
229 }
230 op += res;
231 len -= res;
232 c0 = c1;
233 ip++;
234 ustrlen--;
235 }
236 ce2 = hfsplus_compose_lookup(ce1, c0);
237 if (ce2) {
238 i = 1;
239 while (i < ustrlen) {
240 ce1 = hfsplus_compose_lookup(ce2,
241 be16_to_cpu(ip[i]));
242 if (!ce1)
243 break;
244 i++;
245 ce2 = ce1;
246 }
247 cc = ce2[0];
248 if (cc) {
249 ip += i;
250 ustrlen -= i;
251 goto done;
252 }
253 }
254 same:
255 switch (c0) {
256 case 0:
257 cc = 0x2400;
258 break;
259 case '/':
260 cc = ':';
261 break;
262 default:
263 cc = c0;
264 }
265 done:
266 res = nls->uni2char(cc, op, len);
267 if (res < 0) {
268 if (res == -ENAMETOOLONG)
269 goto out;
270 *op = '?';
271 res = 1;
272 }
273 op += res;
274 len -= res;
275 }
276 res = 0;
277 out:
278 *len_p = (char *)op - astr;
279 return res;
280 }
281
hfsplus_uni2asc_str(struct super_block * sb,const struct hfsplus_unistr * ustr,char * astr,int * len_p)282 inline int hfsplus_uni2asc_str(struct super_block *sb,
283 const struct hfsplus_unistr *ustr, char *astr,
284 int *len_p)
285 {
286 return hfsplus_uni2asc(sb, ustr, HFSPLUS_MAX_STRLEN, astr, len_p);
287 }
288
hfsplus_uni2asc_xattr_str(struct super_block * sb,const struct hfsplus_attr_unistr * ustr,char * astr,int * len_p)289 inline int hfsplus_uni2asc_xattr_str(struct super_block *sb,
290 const struct hfsplus_attr_unistr *ustr,
291 char *astr, int *len_p)
292 {
293 return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr,
294 HFSPLUS_ATTR_MAX_STRLEN, astr, len_p);
295 }
296
297 /*
298 * Convert one or more ASCII characters into a single unicode character.
299 * Returns the number of ASCII characters corresponding to the unicode char.
300 */
asc2unichar(struct super_block * sb,const char * astr,int len,wchar_t * uc)301 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
302 wchar_t *uc)
303 {
304 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
305 if (size <= 0) {
306 *uc = '?';
307 size = 1;
308 }
309 switch (*uc) {
310 case 0x2400:
311 *uc = 0;
312 break;
313 case ':':
314 *uc = '/';
315 break;
316 }
317 return size;
318 }
319
320 /* Decomposes a non-Hangul unicode character. */
hfsplus_decompose_nonhangul(wchar_t uc,int * size)321 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
322 {
323 int off;
324
325 off = hfsplus_decompose_table[(uc >> 12) & 0xf];
326 if (off == 0 || off == 0xffff)
327 return NULL;
328
329 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
330 if (!off)
331 return NULL;
332
333 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
334 if (!off)
335 return NULL;
336
337 off = hfsplus_decompose_table[off + (uc & 0xf)];
338 *size = off & 3;
339 if (*size == 0)
340 return NULL;
341 return hfsplus_decompose_table + (off / 4);
342 }
343
344 /*
345 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
346 * precomposed Hangul, otherwise return the length of the decomposition.
347 *
348 * This function was adapted from sample code from the Unicode Standard
349 * Annex #15: Unicode Normalization Forms, version 3.2.0.
350 *
351 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
352 * under the Terms of Use in http://www.unicode.org/copyright.html.
353 */
hfsplus_try_decompose_hangul(wchar_t uc,u16 * result)354 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
355 {
356 int index;
357 int l, v, t;
358
359 index = uc - Hangul_SBase;
360 if (index < 0 || index >= Hangul_SCount)
361 return 0;
362
363 l = Hangul_LBase + index / Hangul_NCount;
364 v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
365 t = Hangul_TBase + index % Hangul_TCount;
366
367 result[0] = l;
368 result[1] = v;
369 if (t != Hangul_TBase) {
370 result[2] = t;
371 return 3;
372 }
373 return 2;
374 }
375
376 /* Decomposes a single unicode character. */
decompose_unichar(wchar_t uc,int * size,u16 * hangul_buffer)377 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
378 {
379 u16 *result;
380
381 /* Hangul is handled separately */
382 result = hangul_buffer;
383 *size = hfsplus_try_decompose_hangul(uc, result);
384 if (*size == 0)
385 result = hfsplus_decompose_nonhangul(uc, size);
386 return result;
387 }
388
hfsplus_asc2uni(struct super_block * sb,struct hfsplus_unistr * ustr,int max_unistr_len,const char * astr,int len)389 int hfsplus_asc2uni(struct super_block *sb,
390 struct hfsplus_unistr *ustr, int max_unistr_len,
391 const char *astr, int len)
392 {
393 int size, dsize, decompose;
394 u16 *dstr, outlen = 0;
395 wchar_t c;
396 u16 dhangul[3];
397
398 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
399 while (outlen < max_unistr_len && len > 0) {
400 size = asc2unichar(sb, astr, len, &c);
401
402 if (decompose)
403 dstr = decompose_unichar(c, &dsize, dhangul);
404 else
405 dstr = NULL;
406 if (dstr) {
407 if (outlen + dsize > max_unistr_len)
408 break;
409 do {
410 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
411 } while (--dsize > 0);
412 } else
413 ustr->unicode[outlen++] = cpu_to_be16(c);
414
415 astr += size;
416 len -= size;
417 }
418 ustr->length = cpu_to_be16(outlen);
419 if (len > 0)
420 return -ENAMETOOLONG;
421 return 0;
422 }
423
424 /*
425 * Hash a string to an integer as appropriate for the HFS+ filesystem.
426 * Composed unicode characters are decomposed and case-folding is performed
427 * if the appropriate bits are (un)set on the superblock.
428 */
hfsplus_hash_dentry(const struct dentry * dentry,struct qstr * str)429 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
430 {
431 struct super_block *sb = dentry->d_sb;
432 const char *astr;
433 const u16 *dstr;
434 int casefold, decompose, size, len;
435 unsigned long hash;
436 wchar_t c;
437 u16 c2;
438 u16 dhangul[3];
439
440 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
441 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
442 hash = init_name_hash(dentry);
443 astr = str->name;
444 len = str->len;
445 while (len > 0) {
446 int dsize;
447 size = asc2unichar(sb, astr, len, &c);
448 astr += size;
449 len -= size;
450
451 if (decompose)
452 dstr = decompose_unichar(c, &dsize, dhangul);
453 else
454 dstr = NULL;
455 if (dstr) {
456 do {
457 c2 = *dstr++;
458 if (casefold)
459 c2 = case_fold(c2);
460 if (!casefold || c2)
461 hash = partial_name_hash(c2, hash);
462 } while (--dsize > 0);
463 } else {
464 c2 = c;
465 if (casefold)
466 c2 = case_fold(c2);
467 if (!casefold || c2)
468 hash = partial_name_hash(c2, hash);
469 }
470 }
471 str->hash = end_name_hash(hash);
472
473 return 0;
474 }
475
476 /*
477 * Compare strings with HFS+ filename ordering.
478 * Composed unicode characters are decomposed and case-folding is performed
479 * if the appropriate bits are (un)set on the superblock.
480 */
hfsplus_compare_dentry(const struct dentry * dentry,unsigned int len,const char * str,const struct qstr * name)481 int hfsplus_compare_dentry(const struct dentry *dentry,
482 unsigned int len, const char *str, const struct qstr *name)
483 {
484 struct super_block *sb = dentry->d_sb;
485 int casefold, decompose, size;
486 int dsize1, dsize2, len1, len2;
487 const u16 *dstr1, *dstr2;
488 const char *astr1, *astr2;
489 u16 c1, c2;
490 wchar_t c;
491 u16 dhangul_1[3], dhangul_2[3];
492
493 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
494 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
495 astr1 = str;
496 len1 = len;
497 astr2 = name->name;
498 len2 = name->len;
499 dsize1 = dsize2 = 0;
500 dstr1 = dstr2 = NULL;
501
502 while (len1 > 0 && len2 > 0) {
503 if (!dsize1) {
504 size = asc2unichar(sb, astr1, len1, &c);
505 astr1 += size;
506 len1 -= size;
507
508 if (decompose)
509 dstr1 = decompose_unichar(c, &dsize1,
510 dhangul_1);
511 if (!decompose || !dstr1) {
512 c1 = c;
513 dstr1 = &c1;
514 dsize1 = 1;
515 }
516 }
517
518 if (!dsize2) {
519 size = asc2unichar(sb, astr2, len2, &c);
520 astr2 += size;
521 len2 -= size;
522
523 if (decompose)
524 dstr2 = decompose_unichar(c, &dsize2,
525 dhangul_2);
526 if (!decompose || !dstr2) {
527 c2 = c;
528 dstr2 = &c2;
529 dsize2 = 1;
530 }
531 }
532
533 c1 = *dstr1;
534 c2 = *dstr2;
535 if (casefold) {
536 c1 = case_fold(c1);
537 if (!c1) {
538 dstr1++;
539 dsize1--;
540 continue;
541 }
542 c2 = case_fold(c2);
543 if (!c2) {
544 dstr2++;
545 dsize2--;
546 continue;
547 }
548 }
549 if (c1 < c2)
550 return -1;
551 else if (c1 > c2)
552 return 1;
553
554 dstr1++;
555 dsize1--;
556 dstr2++;
557 dsize2--;
558 }
559
560 if (len1 < len2)
561 return -1;
562 if (len1 > len2)
563 return 1;
564 return 0;
565 }
566