xref: /linux/fs/unicode/utf8-core.c (revision ec8a42e7343234802b9054874fe01810880289ce)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/module.h>
3 #include <linux/kernel.h>
4 #include <linux/string.h>
5 #include <linux/slab.h>
6 #include <linux/parser.h>
7 #include <linux/errno.h>
8 #include <linux/unicode.h>
9 #include <linux/stringhash.h>
10 
11 #include "utf8n.h"
12 
13 int utf8_validate(const struct unicode_map *um, const struct qstr *str)
14 {
15 	const struct utf8data *data = utf8nfdi(um->version);
16 
17 	if (utf8nlen(data, str->name, str->len) < 0)
18 		return -1;
19 	return 0;
20 }
21 EXPORT_SYMBOL(utf8_validate);
22 
23 int utf8_strncmp(const struct unicode_map *um,
24 		 const struct qstr *s1, const struct qstr *s2)
25 {
26 	const struct utf8data *data = utf8nfdi(um->version);
27 	struct utf8cursor cur1, cur2;
28 	int c1, c2;
29 
30 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
31 		return -EINVAL;
32 
33 	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
34 		return -EINVAL;
35 
36 	do {
37 		c1 = utf8byte(&cur1);
38 		c2 = utf8byte(&cur2);
39 
40 		if (c1 < 0 || c2 < 0)
41 			return -EINVAL;
42 		if (c1 != c2)
43 			return 1;
44 	} while (c1);
45 
46 	return 0;
47 }
48 EXPORT_SYMBOL(utf8_strncmp);
49 
50 int utf8_strncasecmp(const struct unicode_map *um,
51 		     const struct qstr *s1, const struct qstr *s2)
52 {
53 	const struct utf8data *data = utf8nfdicf(um->version);
54 	struct utf8cursor cur1, cur2;
55 	int c1, c2;
56 
57 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
58 		return -EINVAL;
59 
60 	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
61 		return -EINVAL;
62 
63 	do {
64 		c1 = utf8byte(&cur1);
65 		c2 = utf8byte(&cur2);
66 
67 		if (c1 < 0 || c2 < 0)
68 			return -EINVAL;
69 		if (c1 != c2)
70 			return 1;
71 	} while (c1);
72 
73 	return 0;
74 }
75 EXPORT_SYMBOL(utf8_strncasecmp);
76 
77 /* String cf is expected to be a valid UTF-8 casefolded
78  * string.
79  */
80 int utf8_strncasecmp_folded(const struct unicode_map *um,
81 			    const struct qstr *cf,
82 			    const struct qstr *s1)
83 {
84 	const struct utf8data *data = utf8nfdicf(um->version);
85 	struct utf8cursor cur1;
86 	int c1, c2;
87 	int i = 0;
88 
89 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
90 		return -EINVAL;
91 
92 	do {
93 		c1 = utf8byte(&cur1);
94 		c2 = cf->name[i++];
95 		if (c1 < 0)
96 			return -EINVAL;
97 		if (c1 != c2)
98 			return 1;
99 	} while (c1);
100 
101 	return 0;
102 }
103 EXPORT_SYMBOL(utf8_strncasecmp_folded);
104 
105 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
106 		  unsigned char *dest, size_t dlen)
107 {
108 	const struct utf8data *data = utf8nfdicf(um->version);
109 	struct utf8cursor cur;
110 	size_t nlen = 0;
111 
112 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
113 		return -EINVAL;
114 
115 	for (nlen = 0; nlen < dlen; nlen++) {
116 		int c = utf8byte(&cur);
117 
118 		dest[nlen] = c;
119 		if (!c)
120 			return nlen;
121 		if (c == -1)
122 			break;
123 	}
124 	return -EINVAL;
125 }
126 EXPORT_SYMBOL(utf8_casefold);
127 
128 int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
129 		       struct qstr *str)
130 {
131 	const struct utf8data *data = utf8nfdicf(um->version);
132 	struct utf8cursor cur;
133 	int c;
134 	unsigned long hash = init_name_hash(salt);
135 
136 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
137 		return -EINVAL;
138 
139 	while ((c = utf8byte(&cur))) {
140 		if (c < 0)
141 			return -EINVAL;
142 		hash = partial_name_hash((unsigned char)c, hash);
143 	}
144 	str->hash = end_name_hash(hash);
145 	return 0;
146 }
147 EXPORT_SYMBOL(utf8_casefold_hash);
148 
149 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
150 		   unsigned char *dest, size_t dlen)
151 {
152 	const struct utf8data *data = utf8nfdi(um->version);
153 	struct utf8cursor cur;
154 	ssize_t nlen = 0;
155 
156 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
157 		return -EINVAL;
158 
159 	for (nlen = 0; nlen < dlen; nlen++) {
160 		int c = utf8byte(&cur);
161 
162 		dest[nlen] = c;
163 		if (!c)
164 			return nlen;
165 		if (c == -1)
166 			break;
167 	}
168 	return -EINVAL;
169 }
170 
171 EXPORT_SYMBOL(utf8_normalize);
172 
173 static int utf8_parse_version(const char *version, unsigned int *maj,
174 			      unsigned int *min, unsigned int *rev)
175 {
176 	substring_t args[3];
177 	char version_string[12];
178 	static const struct match_token token[] = {
179 		{1, "%d.%d.%d"},
180 		{0, NULL}
181 	};
182 
183 	strncpy(version_string, version, sizeof(version_string));
184 
185 	if (match_token(version_string, token, args) != 1)
186 		return -EINVAL;
187 
188 	if (match_int(&args[0], maj) || match_int(&args[1], min) ||
189 	    match_int(&args[2], rev))
190 		return -EINVAL;
191 
192 	return 0;
193 }
194 
195 struct unicode_map *utf8_load(const char *version)
196 {
197 	struct unicode_map *um = NULL;
198 	int unicode_version;
199 
200 	if (version) {
201 		unsigned int maj, min, rev;
202 
203 		if (utf8_parse_version(version, &maj, &min, &rev) < 0)
204 			return ERR_PTR(-EINVAL);
205 
206 		if (!utf8version_is_supported(maj, min, rev))
207 			return ERR_PTR(-EINVAL);
208 
209 		unicode_version = UNICODE_AGE(maj, min, rev);
210 	} else {
211 		unicode_version = utf8version_latest();
212 		printk(KERN_WARNING"UTF-8 version not specified. "
213 		       "Assuming latest supported version (%d.%d.%d).",
214 		       (unicode_version >> 16) & 0xff,
215 		       (unicode_version >> 8) & 0xff,
216 		       (unicode_version & 0xff));
217 	}
218 
219 	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
220 	if (!um)
221 		return ERR_PTR(-ENOMEM);
222 
223 	um->charset = "UTF-8";
224 	um->version = unicode_version;
225 
226 	return um;
227 }
228 EXPORT_SYMBOL(utf8_load);
229 
230 void utf8_unload(struct unicode_map *um)
231 {
232 	kfree(um);
233 }
234 EXPORT_SYMBOL(utf8_unload);
235 
236 MODULE_LICENSE("GPL v2");
237