xref: /illumos-gate/usr/src/common/smbsrv/smb_string.c (revision df3cd224ef765c29101e4110546062199562f757)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifdef _KERNEL
27 #include <sys/types.h>
28 #include <sys/sunddi.h>
29 #else
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <strings.h>
34 #endif
35 #include <sys/u8_textprep.h>
36 #include <smbsrv/alloc.h>
37 #include <smbsrv/string.h>
38 #include <smbsrv/cp_usascii.h>
39 #include <smbsrv/cp_unicode.h>
40 
41 #define	UNICODE_N_ENTRIES	(sizeof (a_unicode) / sizeof (a_unicode[0]))
42 
43 /*
44  * Global pointer to the current codepage: defaults to ASCII,
45  * and a flag indicating whether the codepage is Unicode or ASCII.
46  */
47 static smb_codepage_t *current_codepage = usascii_codepage;
48 static boolean_t is_unicode = B_FALSE;
49 
50 static smb_codepage_t *smb_unicode_init(void);
51 
52 /*
53  * strsubst
54  *
55  * Scan a string replacing all occurrences of orgchar with newchar.
56  * Returns a pointer to s, or null of s is null.
57  */
58 char *
59 strsubst(char *s, char orgchar, char newchar)
60 {
61 	char *p = s;
62 
63 	if (p == 0)
64 		return (0);
65 
66 	while (*p) {
67 		if (*p == orgchar)
68 			*p = newchar;
69 		++p;
70 	}
71 
72 	return (s);
73 }
74 
75 /*
76  * strcanon
77  *
78  * Normalize a string by reducing all the repeated characters in
79  * buf as defined by class. For example;
80  *
81  *		char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
82  *		strcanon(buf, "/\\");
83  *
84  * Would result in buf containing the following string:
85  *
86  *		/d1/d2/d3\d4\f1.txt
87  *
88  * This function modifies the contents of buf in place and returns
89  * a pointer to buf.
90  */
91 char *
92 strcanon(char *buf, const char *class)
93 {
94 	char *p = buf;
95 	char *q = buf;
96 	char *r;
97 
98 	while (*p) {
99 		*q++ = *p;
100 
101 		if ((r = strchr(class, *p)) != 0) {
102 			while (*p == *r)
103 				++p;
104 		} else
105 			++p;
106 	}
107 
108 	*q = '\0';
109 	return (buf);
110 }
111 
112 void
113 smb_codepage_init(void)
114 {
115 	smb_codepage_t *cp;
116 
117 	if (is_unicode)
118 		return;
119 
120 	if ((cp = smb_unicode_init()) != NULL) {
121 		current_codepage = cp;
122 		is_unicode = B_TRUE;
123 	} else {
124 		current_codepage = usascii_codepage;
125 		is_unicode = B_FALSE;
126 	}
127 }
128 
129 /*
130  * Determine whether or not a character is an uppercase character.
131  * This function operates on the current codepage table. Returns
132  * non-zero if the character is uppercase. Otherwise returns zero.
133  */
134 int
135 smb_isupper(int c)
136 {
137 	uint16_t mask = is_unicode ? 0xffff : 0xff;
138 
139 	return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
140 }
141 
142 /*
143  * Determine whether or not a character is an lowercase character.
144  * This function operates on the current codepage table. Returns
145  * non-zero if the character is lowercase. Otherwise returns zero.
146  */
147 int
148 smb_islower(int c)
149 {
150 	uint16_t mask = is_unicode ? 0xffff : 0xff;
151 
152 	return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
153 }
154 
155 /*
156  * Convert individual characters to their uppercase equivalent value.
157  * If the specified character is lowercase, the uppercase value will
158  * be returned. Otherwise the original value will be returned.
159  */
160 int
161 smb_toupper(int c)
162 {
163 	uint16_t mask = is_unicode ? 0xffff : 0xff;
164 
165 	return (current_codepage[c & mask].upper);
166 }
167 
168 /*
169  * Convert individual characters to their lowercase equivalent value.
170  * If the specified character is uppercase, the lowercase value will
171  * be returned. Otherwise the original value will be returned.
172  */
173 int
174 smb_tolower(int c)
175 {
176 	uint16_t mask = is_unicode ? 0xffff : 0xff;
177 
178 	return (current_codepage[c & mask].lower);
179 }
180 
181 /*
182  * Convert a string to uppercase using the appropriate codepage. The
183  * string is converted in place. A pointer to the string is returned.
184  * There is an assumption here that uppercase and lowercase values
185  * always result encode to the same length.
186  */
187 char *
188 smb_strupr(char *s)
189 {
190 	smb_wchar_t c;
191 	char *p = s;
192 
193 	while (*p) {
194 		if (smb_isascii(*p)) {
195 			*p = smb_toupper(*p);
196 			p++;
197 		} else {
198 			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
199 				return (0);
200 
201 			if (c == 0)
202 				break;
203 
204 			c = smb_toupper(c);
205 			p += smb_wctomb(p, c);
206 		}
207 	}
208 
209 	return (s);
210 }
211 
212 /*
213  * Convert a string to lowercase using the appropriate codepage. The
214  * string is converted in place. A pointer to the string is returned.
215  * There is an assumption here that uppercase and lowercase values
216  * always result encode to the same length.
217  */
218 char *
219 smb_strlwr(char *s)
220 {
221 	smb_wchar_t c;
222 	char *p = s;
223 
224 	while (*p) {
225 		if (smb_isascii(*p)) {
226 			*p = smb_tolower(*p);
227 			p++;
228 		} else {
229 			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
230 				return (0);
231 
232 			if (c == 0)
233 				break;
234 
235 			c = smb_tolower(c);
236 			p += smb_wctomb(p, c);
237 		}
238 	}
239 
240 	return (s);
241 }
242 
243 /*
244  * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
245  * -1 is returned if "s" is not a valid multi-byte string.
246  */
247 int
248 smb_isstrlwr(const char *s)
249 {
250 	smb_wchar_t c;
251 	int n;
252 	const char *p = s;
253 
254 	while (*p) {
255 		if (smb_isascii(*p) && smb_isupper(*p))
256 			return (0);
257 		else {
258 			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
259 				return (-1);
260 
261 			if (c == 0)
262 				break;
263 
264 			if (smb_isupper(c))
265 				return (0);
266 
267 			p += n;
268 		}
269 	}
270 
271 	return (1);
272 }
273 
274 /*
275  * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
276  * -1 is returned if "s" is not a valid multi-byte string.
277  */
278 int
279 smb_isstrupr(const char *s)
280 {
281 	smb_wchar_t c;
282 	int n;
283 	const char *p = s;
284 
285 	while (*p) {
286 		if (smb_isascii(*p) && smb_islower(*p))
287 			return (0);
288 		else {
289 			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
290 				return (-1);
291 
292 			if (c == 0)
293 				break;
294 
295 			if (smb_islower(c))
296 				return (0);
297 
298 			p += n;
299 		}
300 	}
301 
302 	return (1);
303 }
304 
305 /*
306  * Compare the null-terminated strings s1 and s2 and return an integer
307  * greater than, equal to or less than 0 dependent on whether s1 is
308  * lexicographically greater than, equal to or less than s2 after
309  * translation of each character to lowercase.  The original strings
310  * are not modified.
311  *
312  * If n is non-zero, at most n bytes are compared.  Otherwise, the strings
313  * are compared until a null terminator is encountered.
314  *
315  * Out:    0 if strings are equal
316  *       < 0 if first string < second string
317  *       > 0 if first string > second string
318  */
319 int
320 smb_strcasecmp(const char *s1, const char *s2, size_t n)
321 {
322 	int	err = 0;
323 	int	rc;
324 
325 	rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
326 	if (err != 0)
327 		return (-1);
328 	return (rc);
329 }
330 
331 /*
332  * First build a codepage based on cp_unicode.h.  Then build the unicode
333  * codepage from this interim codepage by copying the entries over while
334  * fixing them and filling in the gaps.
335  */
336 static smb_codepage_t *
337 smb_unicode_init(void)
338 {
339 	smb_codepage_t	*unicode;
340 	uint32_t	a = 0;
341 	uint32_t	b = 0;
342 
343 	unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
344 	if (unicode == NULL)
345 		return (NULL);
346 
347 	while (b != 0xffff) {
348 		/*
349 		 * If there is a gap in the standard,
350 		 * fill in the gap with no-case entries.
351 		 */
352 		if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
353 			unicode[b].ctype = CODEPAGE_ISNONE;
354 			unicode[b].upper = (smb_wchar_t)b;
355 			unicode[b].lower = (smb_wchar_t)b;
356 			b++;
357 			continue;
358 		}
359 
360 		/*
361 		 * Copy the entry and fixup as required.
362 		 */
363 		switch (a_unicode[a].ctype) {
364 		case CODEPAGE_ISNONE:
365 			/*
366 			 * Replace 0xffff in upper/lower fields with its val.
367 			 */
368 			unicode[b].ctype = CODEPAGE_ISNONE;
369 			unicode[b].upper = (smb_wchar_t)b;
370 			unicode[b].lower = (smb_wchar_t)b;
371 			break;
372 		case CODEPAGE_ISUPPER:
373 			/*
374 			 * Some characters may have case yet not have
375 			 * case conversion.  Treat them as no-case.
376 			 */
377 			if (a_unicode[a].lower == 0xffff) {
378 				unicode[b].ctype = CODEPAGE_ISNONE;
379 				unicode[b].upper = (smb_wchar_t)b;
380 				unicode[b].lower = (smb_wchar_t)b;
381 			} else {
382 				unicode[b].ctype = CODEPAGE_ISUPPER;
383 				unicode[b].upper = (smb_wchar_t)b;
384 				unicode[b].lower = a_unicode[a].lower;
385 			}
386 			break;
387 		case CODEPAGE_ISLOWER:
388 			/*
389 			 * Some characters may have case yet not have
390 			 * case conversion.  Treat them as no-case.
391 			 */
392 			if (a_unicode[a].upper == 0xffff) {
393 				unicode[b].ctype = CODEPAGE_ISNONE;
394 				unicode[b].upper = (smb_wchar_t)b;
395 				unicode[b].lower = (smb_wchar_t)b;
396 			} else {
397 				unicode[b].ctype = CODEPAGE_ISLOWER;
398 				unicode[b].upper = a_unicode[a].upper;
399 				unicode[b].lower = (smb_wchar_t)b;
400 			}
401 			break;
402 		default:
403 			MEM_FREE("unicode", unicode);
404 			return (NULL);
405 		}
406 
407 		a++;
408 		b++;
409 	};
410 
411 	return (unicode);
412 }
413