xref: /illumos-gate/usr/src/common/smbsrv/smb_string.c (revision 48bbca816818409505a6e214d0911fda44e622e3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
26  */
27 
28 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
29 #include <sys/types.h>
30 #include <sys/sunddi.h>
31 #else
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <strings.h>
36 #endif
37 #include <sys/u8_textprep.h>
38 #include <smbsrv/alloc.h>
39 #include <sys/errno.h>
40 #include <smbsrv/string.h>
41 #include <smbsrv/cp_usascii.h>
42 #include <smbsrv/cp_unicode.h>
43 
44 #define	UNICODE_N_ENTRIES	(sizeof (a_unicode) / sizeof (a_unicode[0]))
45 
46 /*
47  * Global pointer to the current codepage: defaults to ASCII,
48  * and a flag indicating whether the codepage is Unicode or ASCII.
49  */
50 static const smb_codepage_t *current_codepage = usascii_codepage;
51 static boolean_t is_unicode = B_FALSE;
52 
53 static smb_codepage_t *smb_unicode_init(void);
54 
55 /*
56  * strsubst
57  *
58  * Scan a string replacing all occurrences of orgchar with newchar.
59  * Returns a pointer to s, or null of s is null.
60  */
61 char *
62 strsubst(char *s, char orgchar, char newchar)
63 {
64 	char *p = s;
65 
66 	if (p == 0)
67 		return (0);
68 
69 	while (*p) {
70 		if (*p == orgchar)
71 			*p = newchar;
72 		++p;
73 	}
74 
75 	return (s);
76 }
77 
78 /*
79  * strcanon
80  *
81  * Normalize a string by reducing all the repeated characters in
82  * buf as defined by class. For example;
83  *
84  *		char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
85  *		strcanon(buf, "/\\");
86  *
87  * Would result in buf containing the following string:
88  *
89  *		/d1/d2/d3\d4\f1.txt
90  *
91  * This function modifies the contents of buf in place and returns
92  * a pointer to buf.
93  */
94 char *
95 strcanon(char *buf, const char *class)
96 {
97 	char *p = buf;
98 	char *q = buf;
99 	char *r;
100 
101 	while (*p) {
102 		*q++ = *p;
103 
104 		if ((r = strchr(class, *p)) != 0) {
105 			while (*p == *r)
106 				++p;
107 		} else
108 			++p;
109 	}
110 
111 	*q = '\0';
112 	return (buf);
113 }
114 
115 void
116 smb_codepage_init(void)
117 {
118 	const smb_codepage_t *cp;
119 
120 	if (is_unicode)
121 		return;
122 
123 	if ((cp = smb_unicode_init()) != NULL) {
124 		current_codepage = cp;
125 		is_unicode = B_TRUE;
126 	} else {
127 		current_codepage = usascii_codepage;
128 		is_unicode = B_FALSE;
129 	}
130 }
131 
132 /*
133  * Determine whether or not a character is an uppercase character.
134  * This function operates on the current codepage table. Returns
135  * non-zero if the character is uppercase. Otherwise returns zero.
136  */
137 int
138 smb_isupper(int c)
139 {
140 	uint16_t mask = is_unicode ? 0xffff : 0xff;
141 
142 	return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
143 }
144 
145 /*
146  * Determine whether or not a character is an lowercase character.
147  * This function operates on the current codepage table. Returns
148  * non-zero if the character is lowercase. Otherwise returns zero.
149  */
150 int
151 smb_islower(int c)
152 {
153 	uint16_t mask = is_unicode ? 0xffff : 0xff;
154 
155 	return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
156 }
157 
158 /*
159  * Convert individual characters to their uppercase equivalent value.
160  * If the specified character is lowercase, the uppercase value will
161  * be returned. Otherwise the original value will be returned.
162  */
163 int
164 smb_toupper(int c)
165 {
166 	uint16_t mask = is_unicode ? 0xffff : 0xff;
167 
168 	return (current_codepage[c & mask].upper);
169 }
170 
171 /*
172  * Convert individual characters to their lowercase equivalent value.
173  * If the specified character is uppercase, the lowercase value will
174  * be returned. Otherwise the original value will be returned.
175  */
176 int
177 smb_tolower(int c)
178 {
179 	uint16_t mask = is_unicode ? 0xffff : 0xff;
180 
181 	return (current_codepage[c & mask].lower);
182 }
183 
184 /*
185  * Convert a string to uppercase using the appropriate codepage. The
186  * string is converted in place. A pointer to the string is returned.
187  * There is an assumption here that uppercase and lowercase values
188  * always result encode to the same length.
189  */
190 char *
191 smb_strupr(char *s)
192 {
193 	smb_wchar_t c;
194 	char *p = s;
195 
196 	while (*p) {
197 		if (smb_isascii(*p)) {
198 			*p = smb_toupper(*p);
199 			p++;
200 		} else {
201 			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
202 				return (0);
203 
204 			if (c == 0)
205 				break;
206 
207 			c = smb_toupper(c);
208 			p += smb_wctomb(p, c);
209 		}
210 	}
211 
212 	return (s);
213 }
214 
215 /*
216  * Convert a string to lowercase using the appropriate codepage. The
217  * string is converted in place. A pointer to the string is returned.
218  * There is an assumption here that uppercase and lowercase values
219  * always result encode to the same length.
220  */
221 char *
222 smb_strlwr(char *s)
223 {
224 	smb_wchar_t c;
225 	char *p = s;
226 
227 	while (*p) {
228 		if (smb_isascii(*p)) {
229 			*p = smb_tolower(*p);
230 			p++;
231 		} else {
232 			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
233 				return (0);
234 
235 			if (c == 0)
236 				break;
237 
238 			c = smb_tolower(c);
239 			p += smb_wctomb(p, c);
240 		}
241 	}
242 
243 	return (s);
244 }
245 
246 /*
247  * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
248  * -1 is returned if "s" is not a valid multi-byte string.
249  */
250 int
251 smb_isstrlwr(const char *s)
252 {
253 	smb_wchar_t c;
254 	int n;
255 	const char *p = s;
256 
257 	while (*p) {
258 		if (smb_isascii(*p) && smb_isupper(*p))
259 			return (0);
260 		else {
261 			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
262 				return (-1);
263 
264 			if (c == 0)
265 				break;
266 
267 			if (smb_isupper(c))
268 				return (0);
269 
270 			p += n;
271 		}
272 	}
273 
274 	return (1);
275 }
276 
277 /*
278  * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
279  * -1 is returned if "s" is not a valid multi-byte string.
280  */
281 int
282 smb_isstrupr(const char *s)
283 {
284 	smb_wchar_t c;
285 	int n;
286 	const char *p = s;
287 
288 	while (*p) {
289 		if (smb_isascii(*p) && smb_islower(*p))
290 			return (0);
291 		else {
292 			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
293 				return (-1);
294 
295 			if (c == 0)
296 				break;
297 
298 			if (smb_islower(c))
299 				return (0);
300 
301 			p += n;
302 		}
303 	}
304 
305 	return (1);
306 }
307 
308 /*
309  * Compare the null-terminated strings s1 and s2 and return an integer
310  * greater than, equal to or less than 0 dependent on whether s1 is
311  * lexicographically greater than, equal to or less than s2 after
312  * translation of each character to lowercase.  The original strings
313  * are not modified.
314  *
315  * If n is non-zero, at most n bytes are compared.  Otherwise, the strings
316  * are compared until a null terminator is encountered.
317  *
318  * Out:    0 if strings are equal
319  *       < 0 if first string < second string
320  *       > 0 if first string > second string
321  */
322 int
323 smb_strcasecmp(const char *s1, const char *s2, size_t n)
324 {
325 	int	err = 0;
326 	int	rc;
327 
328 	rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
329 	if (err != 0)
330 		return (-1);
331 	return (rc);
332 }
333 
334 /*
335  * First build a codepage based on cp_unicode.h.  Then build the unicode
336  * codepage from this interim codepage by copying the entries over while
337  * fixing them and filling in the gaps.
338  */
339 static smb_codepage_t *
340 smb_unicode_init(void)
341 {
342 	smb_codepage_t	*unicode;
343 	uint32_t	a = 0;
344 	uint32_t	b = 0;
345 
346 	unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
347 	if (unicode == NULL)
348 		return (NULL);
349 
350 	while (b != 0xffff) {
351 		/*
352 		 * If there is a gap in the standard,
353 		 * fill in the gap with no-case entries.
354 		 */
355 		if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
356 			unicode[b].ctype = CODEPAGE_ISNONE;
357 			unicode[b].upper = (smb_wchar_t)b;
358 			unicode[b].lower = (smb_wchar_t)b;
359 			b++;
360 			continue;
361 		}
362 
363 		/*
364 		 * Copy the entry and fixup as required.
365 		 */
366 		switch (a_unicode[a].ctype) {
367 		case CODEPAGE_ISNONE:
368 			/*
369 			 * Replace 0xffff in upper/lower fields with its val.
370 			 */
371 			unicode[b].ctype = CODEPAGE_ISNONE;
372 			unicode[b].upper = (smb_wchar_t)b;
373 			unicode[b].lower = (smb_wchar_t)b;
374 			break;
375 		case CODEPAGE_ISUPPER:
376 			/*
377 			 * Some characters may have case yet not have
378 			 * case conversion.  Treat them as no-case.
379 			 */
380 			if (a_unicode[a].lower == 0xffff) {
381 				unicode[b].ctype = CODEPAGE_ISNONE;
382 				unicode[b].upper = (smb_wchar_t)b;
383 				unicode[b].lower = (smb_wchar_t)b;
384 			} else {
385 				unicode[b].ctype = CODEPAGE_ISUPPER;
386 				unicode[b].upper = (smb_wchar_t)b;
387 				unicode[b].lower = a_unicode[a].lower;
388 			}
389 			break;
390 		case CODEPAGE_ISLOWER:
391 			/*
392 			 * Some characters may have case yet not have
393 			 * case conversion.  Treat them as no-case.
394 			 */
395 			if (a_unicode[a].upper == 0xffff) {
396 				unicode[b].ctype = CODEPAGE_ISNONE;
397 				unicode[b].upper = (smb_wchar_t)b;
398 				unicode[b].lower = (smb_wchar_t)b;
399 			} else {
400 				unicode[b].ctype = CODEPAGE_ISLOWER;
401 				unicode[b].upper = a_unicode[a].upper;
402 				unicode[b].lower = (smb_wchar_t)b;
403 			}
404 			break;
405 		default:
406 			MEM_FREE("unicode", unicode);
407 			return (NULL);
408 		}
409 
410 		a++;
411 		b++;
412 	};
413 
414 	return (unicode);
415 }
416 
417 /*
418  * Parse a UNC path (\\server\share\path) into its components.
419  * Although a standard UNC path starts with two '\', in DFS
420  * all UNC paths start with one '\'. So, this function only
421  * checks for one.
422  *
423  * A valid UNC must at least contain two components i.e. server
424  * and share. The path is parsed to:
425  *
426  * unc_server	server or domain name with no leading/trailing '\'
427  * unc_share	share name with no leading/trailing '\'
428  * unc_path	relative path to the share with no leading/trailing '\'
429  * 		it is valid for unc_path to be NULL.
430  *
431  * Upon successful return of this function, smb_unc_free()
432  * MUST be called when returned 'unc' is no longer needed.
433  *
434  * Returns 0 on success, otherwise returns an errno code.
435  */
436 int
437 smb_unc_init(const char *path, smb_unc_t *unc)
438 {
439 	char *p;
440 
441 	if (path == NULL || unc == NULL || (*path != '\\' && *path != '/'))
442 		return (EINVAL);
443 
444 	bzero(unc, sizeof (smb_unc_t));
445 
446 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
447 	unc->unc_buf = smb_mem_strdup(path);
448 #else
449 	if ((unc->unc_buf = strdup(path)) == NULL)
450 		return (ENOMEM);
451 #endif
452 
453 	(void) strsubst(unc->unc_buf, '\\', '/');
454 	(void) strcanon(unc->unc_buf, "/");
455 
456 	unc->unc_server = unc->unc_buf + 1;
457 	if (*unc->unc_server == '\0') {
458 		smb_unc_free(unc);
459 		return (EINVAL);
460 	}
461 
462 	if ((p = strchr(unc->unc_server, '/')) == NULL) {
463 		smb_unc_free(unc);
464 		return (EINVAL);
465 	}
466 
467 	*p++ = '\0';
468 	unc->unc_share = p;
469 
470 	if (*unc->unc_share == '\0') {
471 		smb_unc_free(unc);
472 		return (EINVAL);
473 	}
474 
475 	unc->unc_path = strchr(unc->unc_share, '/');
476 	if ((p = unc->unc_path) == NULL)
477 		return (0);
478 
479 	unc->unc_path++;
480 	*p = '\0';
481 
482 	/* remove the last '/' if any */
483 	if ((p = strchr(unc->unc_path, '\0')) != NULL) {
484 		if (*(--p) == '/')
485 			*p = '\0';
486 	}
487 
488 	return (0);
489 }
490 
491 void
492 smb_unc_free(smb_unc_t *unc)
493 {
494 	if (unc == NULL)
495 		return;
496 
497 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
498 	smb_mem_free(unc->unc_buf);
499 #else
500 	free(unc->unc_buf);
501 #endif
502 	unc->unc_buf = NULL;
503 }
504