xref: /linux/fs/hfsplus/unicode.c (revision ca010e2ef64ce2a8f3907a5c02f8109012ea5dc6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/hfsplus/unicode.c
4  *
5  * Copyright (C) 2001
6  * Brad Boyer (flar@allandria.com)
7  * (C) 2003 Ardis Technologies <roman@ardistech.com>
8  *
9  * Handler routines for unicode strings
10  */
11 
12 #include <linux/types.h>
13 #include <linux/nls.h>
14 
15 #include <kunit/visibility.h>
16 
17 #include "hfsplus_fs.h"
18 #include "hfsplus_raw.h"
19 
20 /* Fold the case of a unicode char, given the 16 bit value */
21 /* Returns folded char, or 0 if ignorable */
22 static inline u16 case_fold(u16 c)
23 {
24 	u16 tmp;
25 
26 	tmp = hfsplus_case_fold_table[c >> 8];
27 	if (tmp)
28 		tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
29 	else
30 		tmp = c;
31 	return tmp;
32 }
33 
34 /* Compare unicode strings, return values like normal strcmp */
35 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
36 		       const struct hfsplus_unistr *s2)
37 {
38 	u16 len1, len2, c1, c2;
39 	const hfsplus_unichr *p1, *p2;
40 
41 	len1 = be16_to_cpu(s1->length);
42 	len2 = be16_to_cpu(s2->length);
43 	p1 = s1->unicode;
44 	p2 = s2->unicode;
45 
46 	if (len1 > HFSPLUS_MAX_STRLEN) {
47 		len1 = HFSPLUS_MAX_STRLEN;
48 		pr_err("invalid length %u has been corrected to %d\n",
49 			be16_to_cpu(s1->length), len1);
50 	}
51 
52 	if (len2 > HFSPLUS_MAX_STRLEN) {
53 		len2 = HFSPLUS_MAX_STRLEN;
54 		pr_err("invalid length %u has been corrected to %d\n",
55 			be16_to_cpu(s2->length), len2);
56 	}
57 
58 	while (1) {
59 		c1 = c2 = 0;
60 
61 		while (len1 && !c1) {
62 			c1 = case_fold(be16_to_cpu(*p1));
63 			p1++;
64 			len1--;
65 		}
66 		while (len2 && !c2) {
67 			c2 = case_fold(be16_to_cpu(*p2));
68 			p2++;
69 			len2--;
70 		}
71 
72 		if (c1 != c2)
73 			return (c1 < c2) ? -1 : 1;
74 		if (!c1 && !c2)
75 			return 0;
76 	}
77 }
78 EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcasecmp);
79 
80 /* Compare names as a sequence of 16-bit unsigned integers */
81 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
82 		   const struct hfsplus_unistr *s2)
83 {
84 	u16 len1, len2, c1, c2;
85 	const hfsplus_unichr *p1, *p2;
86 	int len;
87 
88 	len1 = be16_to_cpu(s1->length);
89 	len2 = be16_to_cpu(s2->length);
90 	p1 = s1->unicode;
91 	p2 = s2->unicode;
92 
93 	if (len1 > HFSPLUS_MAX_STRLEN) {
94 		len1 = HFSPLUS_MAX_STRLEN;
95 		pr_err("invalid length %u has been corrected to %d\n",
96 			be16_to_cpu(s1->length), len1);
97 	}
98 
99 	if (len2 > HFSPLUS_MAX_STRLEN) {
100 		len2 = HFSPLUS_MAX_STRLEN;
101 		pr_err("invalid length %u has been corrected to %d\n",
102 			be16_to_cpu(s2->length), len2);
103 	}
104 
105 	for (len = min(len1, len2); len > 0; len--) {
106 		c1 = be16_to_cpu(*p1);
107 		c2 = be16_to_cpu(*p2);
108 		if (c1 != c2)
109 			return c1 < c2 ? -1 : 1;
110 		p1++;
111 		p2++;
112 	}
113 
114 	return len1 < len2 ? -1 :
115 	       len1 > len2 ? 1 : 0;
116 }
117 EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcmp);
118 
119 #define Hangul_SBase	0xac00
120 #define Hangul_LBase	0x1100
121 #define Hangul_VBase	0x1161
122 #define Hangul_TBase	0x11a7
123 #define Hangul_SCount	11172
124 #define Hangul_LCount	19
125 #define Hangul_VCount	21
126 #define Hangul_TCount	28
127 #define Hangul_NCount	(Hangul_VCount * Hangul_TCount)
128 
129 
130 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
131 {
132 	int i, s, e;
133 
134 	s = 1;
135 	e = p[1];
136 	if (!e || cc < p[s * 2] || cc > p[e * 2])
137 		return NULL;
138 	do {
139 		i = (s + e) / 2;
140 		if (cc > p[i * 2])
141 			s = i + 1;
142 		else if (cc < p[i * 2])
143 			e = i - 1;
144 		else
145 			return hfsplus_compose_table + p[i * 2 + 1];
146 	} while (s <= e);
147 	return NULL;
148 }
149 
150 static int hfsplus_uni2asc(struct super_block *sb,
151 			   const struct hfsplus_unistr *ustr,
152 			   int max_len, char *astr, int *len_p)
153 {
154 	const hfsplus_unichr *ip;
155 	struct nls_table *nls = HFSPLUS_SB(sb)->nls;
156 	u8 *op;
157 	u16 cc, c0, c1;
158 	u16 *ce1, *ce2;
159 	int i, len, ustrlen, res, compose;
160 
161 	op = astr;
162 	ip = ustr->unicode;
163 
164 	ustrlen = be16_to_cpu(ustr->length);
165 	if (ustrlen > max_len) {
166 		ustrlen = max_len;
167 		pr_err("invalid length %u has been corrected to %d\n",
168 			be16_to_cpu(ustr->length), ustrlen);
169 	}
170 
171 	len = *len_p;
172 	ce1 = NULL;
173 	compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
174 
175 	while (ustrlen > 0) {
176 		c0 = be16_to_cpu(*ip++);
177 		ustrlen--;
178 		/* search for single decomposed char */
179 		if (likely(compose))
180 			ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
181 		if (ce1)
182 			cc = ce1[0];
183 		else
184 			cc = 0;
185 		if (cc) {
186 			/* start of a possibly decomposed Hangul char */
187 			if (cc != 0xffff)
188 				goto done;
189 			if (!ustrlen)
190 				goto same;
191 			c1 = be16_to_cpu(*ip) - Hangul_VBase;
192 			if (c1 < Hangul_VCount) {
193 				/* compose the Hangul char */
194 				cc = (c0 - Hangul_LBase) * Hangul_VCount;
195 				cc = (cc + c1) * Hangul_TCount;
196 				cc += Hangul_SBase;
197 				ip++;
198 				ustrlen--;
199 				if (!ustrlen)
200 					goto done;
201 				c1 = be16_to_cpu(*ip) - Hangul_TBase;
202 				if (c1 > 0 && c1 < Hangul_TCount) {
203 					cc += c1;
204 					ip++;
205 					ustrlen--;
206 				}
207 				goto done;
208 			}
209 		}
210 		while (1) {
211 			/* main loop for common case of not composed chars */
212 			if (!ustrlen)
213 				goto same;
214 			c1 = be16_to_cpu(*ip);
215 			if (likely(compose))
216 				ce1 = hfsplus_compose_lookup(
217 					hfsplus_compose_table, c1);
218 			if (ce1)
219 				break;
220 			switch (c0) {
221 			case 0:
222 				c0 = 0x2400;
223 				break;
224 			case '/':
225 				c0 = ':';
226 				break;
227 			}
228 			res = nls->uni2char(c0, op, len);
229 			if (res < 0) {
230 				if (res == -ENAMETOOLONG)
231 					goto out;
232 				*op = '?';
233 				res = 1;
234 			}
235 			op += res;
236 			len -= res;
237 			c0 = c1;
238 			ip++;
239 			ustrlen--;
240 		}
241 		ce2 = hfsplus_compose_lookup(ce1, c0);
242 		if (ce2) {
243 			i = 1;
244 			while (i < ustrlen) {
245 				ce1 = hfsplus_compose_lookup(ce2,
246 					be16_to_cpu(ip[i]));
247 				if (!ce1)
248 					break;
249 				i++;
250 				ce2 = ce1;
251 			}
252 			cc = ce2[0];
253 			if (cc) {
254 				ip += i;
255 				ustrlen -= i;
256 				goto done;
257 			}
258 		}
259 same:
260 		switch (c0) {
261 		case 0:
262 			cc = 0x2400;
263 			break;
264 		case '/':
265 			cc = ':';
266 			break;
267 		default:
268 			cc = c0;
269 		}
270 done:
271 		res = nls->uni2char(cc, op, len);
272 		if (res < 0) {
273 			if (res == -ENAMETOOLONG)
274 				goto out;
275 			*op = '?';
276 			res = 1;
277 		}
278 		op += res;
279 		len -= res;
280 	}
281 	res = 0;
282 out:
283 	*len_p = (char *)op - astr;
284 	return res;
285 }
286 
287 inline int hfsplus_uni2asc_str(struct super_block *sb,
288 			       const struct hfsplus_unistr *ustr, char *astr,
289 			       int *len_p)
290 {
291 	return hfsplus_uni2asc(sb, ustr, HFSPLUS_MAX_STRLEN, astr, len_p);
292 }
293 EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_str);
294 
295 inline int hfsplus_uni2asc_xattr_str(struct super_block *sb,
296 				     const struct hfsplus_attr_unistr *ustr,
297 				     char *astr, int *len_p)
298 {
299 	return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr,
300 			       HFSPLUS_ATTR_MAX_STRLEN, astr, len_p);
301 }
302 EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_xattr_str);
303 
304 /*
305  * Convert one or more ASCII characters into a single unicode character.
306  * Returns the number of ASCII characters corresponding to the unicode char.
307  */
308 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
309 			      wchar_t *uc)
310 {
311 	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
312 	if (size <= 0) {
313 		*uc = '?';
314 		size = 1;
315 	}
316 	switch (*uc) {
317 	case 0x2400:
318 		*uc = 0;
319 		break;
320 	case ':':
321 		*uc = '/';
322 		break;
323 	}
324 	return size;
325 }
326 
327 /* Decomposes a non-Hangul unicode character. */
328 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
329 {
330 	int off;
331 
332 	off = hfsplus_decompose_table[(uc >> 12) & 0xf];
333 	if (off == 0 || off == 0xffff)
334 		return NULL;
335 
336 	off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
337 	if (!off)
338 		return NULL;
339 
340 	off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
341 	if (!off)
342 		return NULL;
343 
344 	off = hfsplus_decompose_table[off + (uc & 0xf)];
345 	*size = off & 3;
346 	if (*size == 0)
347 		return NULL;
348 	return hfsplus_decompose_table + (off / 4);
349 }
350 
351 /*
352  * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
353  * precomposed Hangul, otherwise return the length of the decomposition.
354  *
355  * This function was adapted from sample code from the Unicode Standard
356  * Annex #15: Unicode Normalization Forms, version 3.2.0.
357  *
358  * Copyright (C) 1991-2018 Unicode, Inc.  All rights reserved.  Distributed
359  * under the Terms of Use in http://www.unicode.org/copyright.html.
360  */
361 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
362 {
363 	int index;
364 	int l, v, t;
365 
366 	index = uc - Hangul_SBase;
367 	if (index < 0 || index >= Hangul_SCount)
368 		return 0;
369 
370 	l = Hangul_LBase + index / Hangul_NCount;
371 	v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
372 	t = Hangul_TBase + index % Hangul_TCount;
373 
374 	result[0] = l;
375 	result[1] = v;
376 	if (t != Hangul_TBase) {
377 		result[2] = t;
378 		return 3;
379 	}
380 	return 2;
381 }
382 
383 /* Decomposes a single unicode character. */
384 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
385 {
386 	u16 *result;
387 
388 	/* Hangul is handled separately */
389 	result = hangul_buffer;
390 	*size = hfsplus_try_decompose_hangul(uc, result);
391 	if (*size == 0)
392 		result = hfsplus_decompose_nonhangul(uc, size);
393 	return result;
394 }
395 
396 int hfsplus_asc2uni(struct super_block *sb,
397 		    struct hfsplus_unistr *ustr, int max_unistr_len,
398 		    const char *astr, int len)
399 {
400 	int size, dsize, decompose;
401 	u16 *dstr, outlen = 0;
402 	wchar_t c;
403 	u16 dhangul[3];
404 
405 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
406 	while (outlen < max_unistr_len && len > 0) {
407 		size = asc2unichar(sb, astr, len, &c);
408 
409 		if (decompose)
410 			dstr = decompose_unichar(c, &dsize, dhangul);
411 		else
412 			dstr = NULL;
413 		if (dstr) {
414 			if (outlen + dsize > max_unistr_len)
415 				break;
416 			do {
417 				ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
418 			} while (--dsize > 0);
419 		} else
420 			ustr->unicode[outlen++] = cpu_to_be16(c);
421 
422 		astr += size;
423 		len -= size;
424 	}
425 	ustr->length = cpu_to_be16(outlen);
426 	if (len > 0)
427 		return -ENAMETOOLONG;
428 	return 0;
429 }
430 EXPORT_SYMBOL_IF_KUNIT(hfsplus_asc2uni);
431 
432 /*
433  * Hash a string to an integer as appropriate for the HFS+ filesystem.
434  * Composed unicode characters are decomposed and case-folding is performed
435  * if the appropriate bits are (un)set on the superblock.
436  */
437 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
438 {
439 	struct super_block *sb = dentry->d_sb;
440 	const char *astr;
441 	const u16 *dstr;
442 	int casefold, decompose, size, len;
443 	unsigned long hash;
444 	wchar_t c;
445 	u16 c2;
446 	u16 dhangul[3];
447 
448 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
449 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
450 	hash = init_name_hash(dentry);
451 	astr = str->name;
452 	len = str->len;
453 	while (len > 0) {
454 		int dsize;
455 		size = asc2unichar(sb, astr, len, &c);
456 		astr += size;
457 		len -= size;
458 
459 		if (decompose)
460 			dstr = decompose_unichar(c, &dsize, dhangul);
461 		else
462 			dstr = NULL;
463 		if (dstr) {
464 			do {
465 				c2 = *dstr++;
466 				if (casefold)
467 					c2 = case_fold(c2);
468 				if (!casefold || c2)
469 					hash = partial_name_hash(c2, hash);
470 			} while (--dsize > 0);
471 		} else {
472 			c2 = c;
473 			if (casefold)
474 				c2 = case_fold(c2);
475 			if (!casefold || c2)
476 				hash = partial_name_hash(c2, hash);
477 		}
478 	}
479 	str->hash = end_name_hash(hash);
480 
481 	return 0;
482 }
483 EXPORT_SYMBOL_IF_KUNIT(hfsplus_hash_dentry);
484 
485 /*
486  * Compare strings with HFS+ filename ordering.
487  * Composed unicode characters are decomposed and case-folding is performed
488  * if the appropriate bits are (un)set on the superblock.
489  */
490 int hfsplus_compare_dentry(const struct dentry *dentry,
491 		unsigned int len, const char *str, const struct qstr *name)
492 {
493 	struct super_block *sb = dentry->d_sb;
494 	int casefold, decompose, size;
495 	int dsize1, dsize2, len1, len2;
496 	const u16 *dstr1, *dstr2;
497 	const char *astr1, *astr2;
498 	u16 c1, c2;
499 	wchar_t c;
500 	u16 dhangul_1[3], dhangul_2[3];
501 
502 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
503 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
504 	astr1 = str;
505 	len1 = len;
506 	astr2 = name->name;
507 	len2 = name->len;
508 	dsize1 = dsize2 = 0;
509 	dstr1 = dstr2 = NULL;
510 
511 	while (len1 > 0 && len2 > 0) {
512 		if (!dsize1) {
513 			size = asc2unichar(sb, astr1, len1, &c);
514 			astr1 += size;
515 			len1 -= size;
516 
517 			if (decompose)
518 				dstr1 = decompose_unichar(c, &dsize1,
519 							  dhangul_1);
520 			if (!decompose || !dstr1) {
521 				c1 = c;
522 				dstr1 = &c1;
523 				dsize1 = 1;
524 			}
525 		}
526 
527 		if (!dsize2) {
528 			size = asc2unichar(sb, astr2, len2, &c);
529 			astr2 += size;
530 			len2 -= size;
531 
532 			if (decompose)
533 				dstr2 = decompose_unichar(c, &dsize2,
534 							  dhangul_2);
535 			if (!decompose || !dstr2) {
536 				c2 = c;
537 				dstr2 = &c2;
538 				dsize2 = 1;
539 			}
540 		}
541 
542 		c1 = *dstr1;
543 		c2 = *dstr2;
544 		if (casefold) {
545 			c1 = case_fold(c1);
546 			if (!c1) {
547 				dstr1++;
548 				dsize1--;
549 				continue;
550 			}
551 			c2 = case_fold(c2);
552 			if (!c2) {
553 				dstr2++;
554 				dsize2--;
555 				continue;
556 			}
557 		}
558 		if (c1 < c2)
559 			return -1;
560 		else if (c1 > c2)
561 			return 1;
562 
563 		dstr1++;
564 		dsize1--;
565 		dstr2++;
566 		dsize2--;
567 	}
568 
569 	if (len1 < len2)
570 		return -1;
571 	if (len1 > len2)
572 		return 1;
573 	return 0;
574 }
575 EXPORT_SYMBOL_IF_KUNIT(hfsplus_compare_dentry);
576