xref: /linux/fs/unicode/utf8-core.c (revision 2330437da0994321020777c605a2a8cb0ecb7001)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/module.h>
3 #include <linux/kernel.h>
4 #include <linux/string.h>
5 #include <linux/slab.h>
6 #include <linux/parser.h>
7 #include <linux/errno.h>
8 #include <linux/stringhash.h>
9 
10 #include "utf8n.h"
11 
12 int utf8_validate(const struct unicode_map *um, const struct qstr *str)
13 {
14 	if (utf8nlen(um, UTF8_NFDI, str->name, str->len) < 0)
15 		return -1;
16 	return 0;
17 }
18 EXPORT_SYMBOL(utf8_validate);
19 
20 int utf8_strncmp(const struct unicode_map *um,
21 		 const struct qstr *s1, const struct qstr *s2)
22 {
23 	struct utf8cursor cur1, cur2;
24 	int c1, c2;
25 
26 	if (utf8ncursor(&cur1, um, UTF8_NFDI, s1->name, s1->len) < 0)
27 		return -EINVAL;
28 
29 	if (utf8ncursor(&cur2, um, UTF8_NFDI, s2->name, s2->len) < 0)
30 		return -EINVAL;
31 
32 	do {
33 		c1 = utf8byte(&cur1);
34 		c2 = utf8byte(&cur2);
35 
36 		if (c1 < 0 || c2 < 0)
37 			return -EINVAL;
38 		if (c1 != c2)
39 			return 1;
40 	} while (c1);
41 
42 	return 0;
43 }
44 EXPORT_SYMBOL(utf8_strncmp);
45 
46 int utf8_strncasecmp(const struct unicode_map *um,
47 		     const struct qstr *s1, const struct qstr *s2)
48 {
49 	struct utf8cursor cur1, cur2;
50 	int c1, c2;
51 
52 	if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
53 		return -EINVAL;
54 
55 	if (utf8ncursor(&cur2, um, UTF8_NFDICF, s2->name, s2->len) < 0)
56 		return -EINVAL;
57 
58 	do {
59 		c1 = utf8byte(&cur1);
60 		c2 = utf8byte(&cur2);
61 
62 		if (c1 < 0 || c2 < 0)
63 			return -EINVAL;
64 		if (c1 != c2)
65 			return 1;
66 	} while (c1);
67 
68 	return 0;
69 }
70 EXPORT_SYMBOL(utf8_strncasecmp);
71 
72 /* String cf is expected to be a valid UTF-8 casefolded
73  * string.
74  */
75 int utf8_strncasecmp_folded(const struct unicode_map *um,
76 			    const struct qstr *cf,
77 			    const struct qstr *s1)
78 {
79 	struct utf8cursor cur1;
80 	int c1, c2;
81 	int i = 0;
82 
83 	if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
84 		return -EINVAL;
85 
86 	do {
87 		c1 = utf8byte(&cur1);
88 		c2 = cf->name[i++];
89 		if (c1 < 0)
90 			return -EINVAL;
91 		if (c1 != c2)
92 			return 1;
93 	} while (c1);
94 
95 	return 0;
96 }
97 EXPORT_SYMBOL(utf8_strncasecmp_folded);
98 
99 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
100 		  unsigned char *dest, size_t dlen)
101 {
102 	struct utf8cursor cur;
103 	size_t nlen = 0;
104 
105 	if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
106 		return -EINVAL;
107 
108 	for (nlen = 0; nlen < dlen; nlen++) {
109 		int c = utf8byte(&cur);
110 
111 		dest[nlen] = c;
112 		if (!c)
113 			return nlen;
114 		if (c == -1)
115 			break;
116 	}
117 	return -EINVAL;
118 }
119 EXPORT_SYMBOL(utf8_casefold);
120 
121 int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
122 		       struct qstr *str)
123 {
124 	struct utf8cursor cur;
125 	int c;
126 	unsigned long hash = init_name_hash(salt);
127 
128 	if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
129 		return -EINVAL;
130 
131 	while ((c = utf8byte(&cur))) {
132 		if (c < 0)
133 			return -EINVAL;
134 		hash = partial_name_hash((unsigned char)c, hash);
135 	}
136 	str->hash = end_name_hash(hash);
137 	return 0;
138 }
139 EXPORT_SYMBOL(utf8_casefold_hash);
140 
141 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
142 		   unsigned char *dest, size_t dlen)
143 {
144 	struct utf8cursor cur;
145 	ssize_t nlen = 0;
146 
147 	if (utf8ncursor(&cur, um, UTF8_NFDI, str->name, str->len) < 0)
148 		return -EINVAL;
149 
150 	for (nlen = 0; nlen < dlen; nlen++) {
151 		int c = utf8byte(&cur);
152 
153 		dest[nlen] = c;
154 		if (!c)
155 			return nlen;
156 		if (c == -1)
157 			break;
158 	}
159 	return -EINVAL;
160 }
161 EXPORT_SYMBOL(utf8_normalize);
162 
163 static const struct utf8data *find_table_version(const struct utf8data *table,
164 		size_t nr_entries, unsigned int version)
165 {
166 	size_t i = nr_entries - 1;
167 
168 	while (version < table[i].maxage)
169 		i--;
170 	if (version > table[i].maxage)
171 		return NULL;
172 	return &table[i];
173 }
174 
175 struct unicode_map *utf8_load(unsigned int version)
176 {
177 	struct unicode_map *um;
178 
179 	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
180 	if (!um)
181 		return ERR_PTR(-ENOMEM);
182 	um->version = version;
183 
184 	um->tables = symbol_request(utf8_data_table);
185 	if (!um->tables)
186 		goto out_free_um;
187 
188 	if (!utf8version_is_supported(um, version))
189 		goto out_symbol_put;
190 	um->ntab[UTF8_NFDI] = find_table_version(um->tables->utf8nfdidata,
191 			um->tables->utf8nfdidata_size, um->version);
192 	if (!um->ntab[UTF8_NFDI])
193 		goto out_symbol_put;
194 	um->ntab[UTF8_NFDICF] = find_table_version(um->tables->utf8nfdicfdata,
195 			um->tables->utf8nfdicfdata_size, um->version);
196 	if (!um->ntab[UTF8_NFDICF])
197 		goto out_symbol_put;
198 	return um;
199 
200 out_symbol_put:
201 	symbol_put(utf8_data_table);
202 out_free_um:
203 	kfree(um);
204 	return ERR_PTR(-EINVAL);
205 }
206 EXPORT_SYMBOL(utf8_load);
207 
208 void utf8_unload(struct unicode_map *um)
209 {
210 	if (um) {
211 		symbol_put(utf8_data_table);
212 		kfree(um);
213 	}
214 }
215 EXPORT_SYMBOL(utf8_unload);
216 
217 /**
218  * utf8_parse_version - Parse a UTF-8 version number from a string
219  *
220  * @version: input string
221  *
222  * Returns the parsed version on success, negative code on error
223  */
224 int utf8_parse_version(char *version)
225 {
226 	substring_t args[3];
227 	unsigned int maj, min, rev;
228 	static const struct match_token token[] = {
229 		{1, "%d.%d.%d"},
230 		{0, NULL}
231 	};
232 
233 	if (match_token(version, token, args) != 1)
234 		return -EINVAL;
235 
236 	if (match_int(&args[0], &maj) || match_int(&args[1], &min) ||
237 	    match_int(&args[2], &rev))
238 		return -EINVAL;
239 
240 	return UNICODE_AGE(maj, min, rev);
241 }
242 EXPORT_SYMBOL(utf8_parse_version);
243