xref: /linux/fs/hfsplus/unicode.c (revision fd639726bf15fca8ee1a00dce8e0096d0ad9bd18)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/hfsplus/unicode.c
4  *
5  * Copyright (C) 2001
6  * Brad Boyer (flar@allandria.com)
7  * (C) 2003 Ardis Technologies <roman@ardistech.com>
8  *
9  * Handler routines for unicode strings
10  */
11 
12 #include <linux/types.h>
13 #include <linux/nls.h>
14 #include "hfsplus_fs.h"
15 #include "hfsplus_raw.h"
16 
17 /* Fold the case of a unicode char, given the 16 bit value */
18 /* Returns folded char, or 0 if ignorable */
19 static inline u16 case_fold(u16 c)
20 {
21 	u16 tmp;
22 
23 	tmp = hfsplus_case_fold_table[c >> 8];
24 	if (tmp)
25 		tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
26 	else
27 		tmp = c;
28 	return tmp;
29 }
30 
31 /* Compare unicode strings, return values like normal strcmp */
32 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
33 		       const struct hfsplus_unistr *s2)
34 {
35 	u16 len1, len2, c1, c2;
36 	const hfsplus_unichr *p1, *p2;
37 
38 	len1 = be16_to_cpu(s1->length);
39 	len2 = be16_to_cpu(s2->length);
40 	p1 = s1->unicode;
41 	p2 = s2->unicode;
42 
43 	while (1) {
44 		c1 = c2 = 0;
45 
46 		while (len1 && !c1) {
47 			c1 = case_fold(be16_to_cpu(*p1));
48 			p1++;
49 			len1--;
50 		}
51 		while (len2 && !c2) {
52 			c2 = case_fold(be16_to_cpu(*p2));
53 			p2++;
54 			len2--;
55 		}
56 
57 		if (c1 != c2)
58 			return (c1 < c2) ? -1 : 1;
59 		if (!c1 && !c2)
60 			return 0;
61 	}
62 }
63 
64 /* Compare names as a sequence of 16-bit unsigned integers */
65 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
66 		   const struct hfsplus_unistr *s2)
67 {
68 	u16 len1, len2, c1, c2;
69 	const hfsplus_unichr *p1, *p2;
70 	int len;
71 
72 	len1 = be16_to_cpu(s1->length);
73 	len2 = be16_to_cpu(s2->length);
74 	p1 = s1->unicode;
75 	p2 = s2->unicode;
76 
77 	for (len = min(len1, len2); len > 0; len--) {
78 		c1 = be16_to_cpu(*p1);
79 		c2 = be16_to_cpu(*p2);
80 		if (c1 != c2)
81 			return c1 < c2 ? -1 : 1;
82 		p1++;
83 		p2++;
84 	}
85 
86 	return len1 < len2 ? -1 :
87 	       len1 > len2 ? 1 : 0;
88 }
89 
90 
91 #define Hangul_SBase	0xac00
92 #define Hangul_LBase	0x1100
93 #define Hangul_VBase	0x1161
94 #define Hangul_TBase	0x11a7
95 #define Hangul_SCount	11172
96 #define Hangul_LCount	19
97 #define Hangul_VCount	21
98 #define Hangul_TCount	28
99 #define Hangul_NCount	(Hangul_VCount * Hangul_TCount)
100 
101 
102 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
103 {
104 	int i, s, e;
105 
106 	s = 1;
107 	e = p[1];
108 	if (!e || cc < p[s * 2] || cc > p[e * 2])
109 		return NULL;
110 	do {
111 		i = (s + e) / 2;
112 		if (cc > p[i * 2])
113 			s = i + 1;
114 		else if (cc < p[i * 2])
115 			e = i - 1;
116 		else
117 			return hfsplus_compose_table + p[i * 2 + 1];
118 	} while (s <= e);
119 	return NULL;
120 }
121 
122 int hfsplus_uni2asc(struct super_block *sb,
123 		const struct hfsplus_unistr *ustr,
124 		char *astr, int *len_p)
125 {
126 	const hfsplus_unichr *ip;
127 	struct nls_table *nls = HFSPLUS_SB(sb)->nls;
128 	u8 *op;
129 	u16 cc, c0, c1;
130 	u16 *ce1, *ce2;
131 	int i, len, ustrlen, res, compose;
132 
133 	op = astr;
134 	ip = ustr->unicode;
135 	ustrlen = be16_to_cpu(ustr->length);
136 	len = *len_p;
137 	ce1 = NULL;
138 	compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
139 
140 	while (ustrlen > 0) {
141 		c0 = be16_to_cpu(*ip++);
142 		ustrlen--;
143 		/* search for single decomposed char */
144 		if (likely(compose))
145 			ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
146 		if (ce1)
147 			cc = ce1[0];
148 		else
149 			cc = 0;
150 		if (cc) {
151 			/* start of a possibly decomposed Hangul char */
152 			if (cc != 0xffff)
153 				goto done;
154 			if (!ustrlen)
155 				goto same;
156 			c1 = be16_to_cpu(*ip) - Hangul_VBase;
157 			if (c1 < Hangul_VCount) {
158 				/* compose the Hangul char */
159 				cc = (c0 - Hangul_LBase) * Hangul_VCount;
160 				cc = (cc + c1) * Hangul_TCount;
161 				cc += Hangul_SBase;
162 				ip++;
163 				ustrlen--;
164 				if (!ustrlen)
165 					goto done;
166 				c1 = be16_to_cpu(*ip) - Hangul_TBase;
167 				if (c1 > 0 && c1 < Hangul_TCount) {
168 					cc += c1;
169 					ip++;
170 					ustrlen--;
171 				}
172 				goto done;
173 			}
174 		}
175 		while (1) {
176 			/* main loop for common case of not composed chars */
177 			if (!ustrlen)
178 				goto same;
179 			c1 = be16_to_cpu(*ip);
180 			if (likely(compose))
181 				ce1 = hfsplus_compose_lookup(
182 					hfsplus_compose_table, c1);
183 			if (ce1)
184 				break;
185 			switch (c0) {
186 			case 0:
187 				c0 = 0x2400;
188 				break;
189 			case '/':
190 				c0 = ':';
191 				break;
192 			}
193 			res = nls->uni2char(c0, op, len);
194 			if (res < 0) {
195 				if (res == -ENAMETOOLONG)
196 					goto out;
197 				*op = '?';
198 				res = 1;
199 			}
200 			op += res;
201 			len -= res;
202 			c0 = c1;
203 			ip++;
204 			ustrlen--;
205 		}
206 		ce2 = hfsplus_compose_lookup(ce1, c0);
207 		if (ce2) {
208 			i = 1;
209 			while (i < ustrlen) {
210 				ce1 = hfsplus_compose_lookup(ce2,
211 					be16_to_cpu(ip[i]));
212 				if (!ce1)
213 					break;
214 				i++;
215 				ce2 = ce1;
216 			}
217 			cc = ce2[0];
218 			if (cc) {
219 				ip += i;
220 				ustrlen -= i;
221 				goto done;
222 			}
223 		}
224 same:
225 		switch (c0) {
226 		case 0:
227 			cc = 0x2400;
228 			break;
229 		case '/':
230 			cc = ':';
231 			break;
232 		default:
233 			cc = c0;
234 		}
235 done:
236 		res = nls->uni2char(cc, op, len);
237 		if (res < 0) {
238 			if (res == -ENAMETOOLONG)
239 				goto out;
240 			*op = '?';
241 			res = 1;
242 		}
243 		op += res;
244 		len -= res;
245 	}
246 	res = 0;
247 out:
248 	*len_p = (char *)op - astr;
249 	return res;
250 }
251 
252 /*
253  * Convert one or more ASCII characters into a single unicode character.
254  * Returns the number of ASCII characters corresponding to the unicode char.
255  */
256 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
257 			      wchar_t *uc)
258 {
259 	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
260 	if (size <= 0) {
261 		*uc = '?';
262 		size = 1;
263 	}
264 	switch (*uc) {
265 	case 0x2400:
266 		*uc = 0;
267 		break;
268 	case ':':
269 		*uc = '/';
270 		break;
271 	}
272 	return size;
273 }
274 
275 /* Decomposes a single unicode character. */
276 static inline u16 *decompose_unichar(wchar_t uc, int *size)
277 {
278 	int off;
279 
280 	off = hfsplus_decompose_table[(uc >> 12) & 0xf];
281 	if (off == 0 || off == 0xffff)
282 		return NULL;
283 
284 	off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
285 	if (!off)
286 		return NULL;
287 
288 	off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
289 	if (!off)
290 		return NULL;
291 
292 	off = hfsplus_decompose_table[off + (uc & 0xf)];
293 	*size = off & 3;
294 	if (*size == 0)
295 		return NULL;
296 	return hfsplus_decompose_table + (off / 4);
297 }
298 
299 int hfsplus_asc2uni(struct super_block *sb,
300 		    struct hfsplus_unistr *ustr, int max_unistr_len,
301 		    const char *astr, int len)
302 {
303 	int size, dsize, decompose;
304 	u16 *dstr, outlen = 0;
305 	wchar_t c;
306 
307 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
308 	while (outlen < max_unistr_len && len > 0) {
309 		size = asc2unichar(sb, astr, len, &c);
310 
311 		if (decompose)
312 			dstr = decompose_unichar(c, &dsize);
313 		else
314 			dstr = NULL;
315 		if (dstr) {
316 			if (outlen + dsize > max_unistr_len)
317 				break;
318 			do {
319 				ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
320 			} while (--dsize > 0);
321 		} else
322 			ustr->unicode[outlen++] = cpu_to_be16(c);
323 
324 		astr += size;
325 		len -= size;
326 	}
327 	ustr->length = cpu_to_be16(outlen);
328 	if (len > 0)
329 		return -ENAMETOOLONG;
330 	return 0;
331 }
332 
333 /*
334  * Hash a string to an integer as appropriate for the HFS+ filesystem.
335  * Composed unicode characters are decomposed and case-folding is performed
336  * if the appropriate bits are (un)set on the superblock.
337  */
338 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
339 {
340 	struct super_block *sb = dentry->d_sb;
341 	const char *astr;
342 	const u16 *dstr;
343 	int casefold, decompose, size, len;
344 	unsigned long hash;
345 	wchar_t c;
346 	u16 c2;
347 
348 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
349 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
350 	hash = init_name_hash(dentry);
351 	astr = str->name;
352 	len = str->len;
353 	while (len > 0) {
354 		int uninitialized_var(dsize);
355 		size = asc2unichar(sb, astr, len, &c);
356 		astr += size;
357 		len -= size;
358 
359 		if (decompose)
360 			dstr = decompose_unichar(c, &dsize);
361 		else
362 			dstr = NULL;
363 		if (dstr) {
364 			do {
365 				c2 = *dstr++;
366 				if (casefold)
367 					c2 = case_fold(c2);
368 				if (!casefold || c2)
369 					hash = partial_name_hash(c2, hash);
370 			} while (--dsize > 0);
371 		} else {
372 			c2 = c;
373 			if (casefold)
374 				c2 = case_fold(c2);
375 			if (!casefold || c2)
376 				hash = partial_name_hash(c2, hash);
377 		}
378 	}
379 	str->hash = end_name_hash(hash);
380 
381 	return 0;
382 }
383 
384 /*
385  * Compare strings with HFS+ filename ordering.
386  * Composed unicode characters are decomposed and case-folding is performed
387  * if the appropriate bits are (un)set on the superblock.
388  */
389 int hfsplus_compare_dentry(const struct dentry *dentry,
390 		unsigned int len, const char *str, const struct qstr *name)
391 {
392 	struct super_block *sb = dentry->d_sb;
393 	int casefold, decompose, size;
394 	int dsize1, dsize2, len1, len2;
395 	const u16 *dstr1, *dstr2;
396 	const char *astr1, *astr2;
397 	u16 c1, c2;
398 	wchar_t c;
399 
400 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
401 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
402 	astr1 = str;
403 	len1 = len;
404 	astr2 = name->name;
405 	len2 = name->len;
406 	dsize1 = dsize2 = 0;
407 	dstr1 = dstr2 = NULL;
408 
409 	while (len1 > 0 && len2 > 0) {
410 		if (!dsize1) {
411 			size = asc2unichar(sb, astr1, len1, &c);
412 			astr1 += size;
413 			len1 -= size;
414 
415 			if (decompose)
416 				dstr1 = decompose_unichar(c, &dsize1);
417 			if (!decompose || !dstr1) {
418 				c1 = c;
419 				dstr1 = &c1;
420 				dsize1 = 1;
421 			}
422 		}
423 
424 		if (!dsize2) {
425 			size = asc2unichar(sb, astr2, len2, &c);
426 			astr2 += size;
427 			len2 -= size;
428 
429 			if (decompose)
430 				dstr2 = decompose_unichar(c, &dsize2);
431 			if (!decompose || !dstr2) {
432 				c2 = c;
433 				dstr2 = &c2;
434 				dsize2 = 1;
435 			}
436 		}
437 
438 		c1 = *dstr1;
439 		c2 = *dstr2;
440 		if (casefold) {
441 			c1 = case_fold(c1);
442 			if (!c1) {
443 				dstr1++;
444 				dsize1--;
445 				continue;
446 			}
447 			c2 = case_fold(c2);
448 			if (!c2) {
449 				dstr2++;
450 				dsize2--;
451 				continue;
452 			}
453 		}
454 		if (c1 < c2)
455 			return -1;
456 		else if (c1 > c2)
457 			return 1;
458 
459 		dstr1++;
460 		dsize1--;
461 		dstr2++;
462 		dsize2--;
463 	}
464 
465 	if (len1 < len2)
466 		return -1;
467 	if (len1 > len2)
468 		return 1;
469 	return 0;
470 }
471