xref: /linux/fs/nls/nls_ucs2_utils.h (revision 3ba84ac69b53e6ee07c31d54554e00793d7b144f)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * Some of the source code in this file came from fs/cifs/cifs_unicode.c
4  * and then via server/unicode.c
5  * cifs_unicode:  Unicode kernel case support
6  *
7  * Function:
8  *     Convert a unicode character to upper or lower case using
9  *     compressed tables.
10  *
11  *   Copyright (c) International Business Machines  Corp., 2000,2009
12  *
13  *
14  * Notes:
15  *     These APIs are based on the C library functions.  The semantics
16  *     should match the C functions but with expanded size operands.
17  *
18  *     The upper/lower functions are based on a table created by mkupr.
19  *     This is a compressed table of upper and lower case conversion.
20  *
21  */
22 #ifndef _NLS_UCS2_UTILS_H
23 #define _NLS_UCS2_UTILS_H
24 
25 #include <asm/byteorder.h>
26 #include <linux/types.h>
27 #include <linux/nls.h>
28 #include <linux/unicode.h>
29 #include "nls_ucs2_data.h"
30 
31 /*
32  * Windows maps these to the user defined 16 bit Unicode range since they are
33  * reserved symbols (along with \ and /), otherwise illegal to store
34  * in filenames in NTFS
35  */
36 #define UNI_ASTERISK    ((__u16)('*' + 0xF000))
37 #define UNI_QUESTION    ((__u16)('?' + 0xF000))
38 #define UNI_COLON       ((__u16)(':' + 0xF000))
39 #define UNI_GRTRTHAN    ((__u16)('>' + 0xF000))
40 #define UNI_LESSTHAN    ((__u16)('<' + 0xF000))
41 #define UNI_PIPE        ((__u16)('|' + 0xF000))
42 #define UNI_SLASH       ((__u16)('\\' + 0xF000))
43 
44 /*
45  * UniStrcat:  Concatenate the second string to the first
46  *
47  * Returns:
48  *     Address of the first string
49  */
50 static inline wchar_t *UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
51 {
52 	wchar_t *anchor = ucs1;	/* save a pointer to start of ucs1 */
53 
54 	while (*ucs1++)
55 	/*NULL*/;	/* To end of first string */
56 	ucs1--;			/* Return to the null */
57 	while ((*ucs1++ = *ucs2++))
58 	/*NULL*/;	/* copy string 2 over */
59 	return anchor;
60 }
61 
62 /*
63  * UniStrchr:  Find a character in a string
64  *
65  * Returns:
66  *     Address of first occurrence of character in string
67  *     or NULL if the character is not in the string
68  */
69 static inline wchar_t *UniStrchr(const wchar_t *ucs, wchar_t uc)
70 {
71 	while ((*ucs != uc) && *ucs)
72 		ucs++;
73 
74 	if (*ucs == uc)
75 		return (wchar_t *)ucs;
76 	return NULL;
77 }
78 
79 /*
80  * UniStrcmp:  Compare two strings
81  *
82  * Returns:
83  *     < 0:  First string is less than second
84  *     = 0:  Strings are equal
85  *     > 0:  First string is greater than second
86  */
87 static inline int UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
88 {
89 	while ((*ucs1 == *ucs2) && *ucs1) {
90 		ucs1++;
91 		ucs2++;
92 	}
93 	return (int)*ucs1 - (int)*ucs2;
94 }
95 
96 /*
97  * UniStrcpy:  Copy a string
98  */
99 static inline wchar_t *UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
100 {
101 	wchar_t *anchor = ucs1;	/* save the start of result string */
102 
103 	while ((*ucs1++ = *ucs2++))
104 	/*NULL*/;
105 	return anchor;
106 }
107 
108 /*
109  * UniStrlen:  Return the length of a string (in 16 bit Unicode chars not bytes)
110  */
111 static inline size_t UniStrlen(const wchar_t *ucs1)
112 {
113 	int i = 0;
114 
115 	while (*ucs1++)
116 		i++;
117 	return i;
118 }
119 
120 /*
121  * UniStrnlen:  Return the length (in 16 bit Unicode chars not bytes) of a
122  *		string (length limited)
123  */
124 static inline size_t UniStrnlen(const wchar_t *ucs1, int maxlen)
125 {
126 	int i = 0;
127 
128 	while (*ucs1++) {
129 		i++;
130 		if (i >= maxlen)
131 			break;
132 	}
133 	return i;
134 }
135 
136 /*
137  * UniStrncat:  Concatenate length limited string
138  */
139 static inline wchar_t *UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
140 {
141 	wchar_t *anchor = ucs1;	/* save pointer to string 1 */
142 
143 	while (*ucs1++)
144 	/*NULL*/;
145 	ucs1--;			/* point to null terminator of s1 */
146 	while (n-- && (*ucs1 = *ucs2)) {	/* copy s2 after s1 */
147 		ucs1++;
148 		ucs2++;
149 	}
150 	*ucs1 = 0;		/* Null terminate the result */
151 	return anchor;
152 }
153 
154 /*
155  * UniStrncmp:  Compare length limited string
156  */
157 static inline int UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
158 {
159 	if (!n)
160 		return 0;	/* Null strings are equal */
161 	while ((*ucs1 == *ucs2) && *ucs1 && --n) {
162 		ucs1++;
163 		ucs2++;
164 	}
165 	return (int)*ucs1 - (int)*ucs2;
166 }
167 
168 /*
169  * UniStrncmp_le:  Compare length limited string - native to little-endian
170  */
171 static inline int
172 UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
173 {
174 	if (!n)
175 		return 0;	/* Null strings are equal */
176 	while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
177 		ucs1++;
178 		ucs2++;
179 	}
180 	return (int)*ucs1 - (int)__le16_to_cpu(*ucs2);
181 }
182 
183 /*
184  * UniStrncpy:  Copy length limited string with pad
185  */
186 static inline wchar_t *UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
187 {
188 	wchar_t *anchor = ucs1;
189 
190 	while (n-- && *ucs2)	/* Copy the strings */
191 		*ucs1++ = *ucs2++;
192 
193 	n++;
194 	while (n--)		/* Pad with nulls */
195 		*ucs1++ = 0;
196 	return anchor;
197 }
198 
199 /*
200  * UniStrncpy_le:  Copy length limited string with pad to little-endian
201  */
202 static inline wchar_t *UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
203 {
204 	wchar_t *anchor = ucs1;
205 
206 	while (n-- && *ucs2)	/* Copy the strings */
207 		*ucs1++ = __le16_to_cpu(*ucs2++);
208 
209 	n++;
210 	while (n--)		/* Pad with nulls */
211 		*ucs1++ = 0;
212 	return anchor;
213 }
214 
215 /*
216  * UniStrstr:  Find a string in a string
217  *
218  * Returns:
219  *     Address of first match found
220  *     NULL if no matching string is found
221  */
222 static inline wchar_t *UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
223 {
224 	const wchar_t *anchor1 = ucs1;
225 	const wchar_t *anchor2 = ucs2;
226 
227 	while (*ucs1) {
228 		if (*ucs1 == *ucs2) {
229 			/* Partial match found */
230 			ucs1++;
231 			ucs2++;
232 		} else {
233 			if (!*ucs2)	/* Match found */
234 				return (wchar_t *)anchor1;
235 			ucs1 = ++anchor1;	/* No match */
236 			ucs2 = anchor2;
237 		}
238 	}
239 
240 	if (!*ucs2)		/* Both end together */
241 		return (wchar_t *)anchor1;	/* Match found */
242 	return NULL;		/* No match */
243 }
244 
245 #ifndef UNIUPR_NOUPPER
246 /*
247  * UniToupper:  Convert a unicode character to upper case
248  */
249 static inline wchar_t UniToupper(register wchar_t uc)
250 {
251 	register const struct UniCaseRange *rp;
252 
253 	if (uc < sizeof(NlsUniUpperTable)) {
254 		/* Latin characters */
255 		return uc + NlsUniUpperTable[uc];	/* Use base tables */
256 	}
257 
258 	rp = NlsUniUpperRange;	/* Use range tables */
259 	while (rp->start) {
260 		if (uc < rp->start)	/* Before start of range */
261 			return uc;	/* Uppercase = input */
262 		if (uc <= rp->end)	/* In range */
263 			return uc + rp->table[uc - rp->start];
264 		rp++;	/* Try next range */
265 	}
266 	return uc;		/* Past last range */
267 }
268 
269 /*
270  * UniStrupr:  Upper case a unicode string
271  */
272 static inline __le16 *UniStrupr(register __le16 *upin)
273 {
274 	register __le16 *up;
275 
276 	up = upin;
277 	while (*up) {		/* For all characters */
278 		*up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
279 		up++;
280 	}
281 	return upin;		/* Return input pointer */
282 }
283 #endif				/* UNIUPR_NOUPPER */
284 
285 #endif /* _NLS_UCS2_UTILS_H */
286