xref: /illumos-gate/usr/src/common/smbsrv/smb_string.c (revision 8412fdadc46d5bd0355a53fda7bda83e60803108)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifdef _KERNEL
28 #include <sys/types.h>
29 #include <sys/sunddi.h>
30 #else
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #endif
36 #include <sys/u8_textprep.h>
37 #include <smbsrv/alloc.h>
38 #include <sys/errno.h>
39 #include <smbsrv/string.h>
40 #include <smbsrv/cp_usascii.h>
41 #include <smbsrv/cp_unicode.h>
42 
43 #define	UNICODE_N_ENTRIES	(sizeof (a_unicode) / sizeof (a_unicode[0]))
44 
45 /*
46  * Global pointer to the current codepage: defaults to ASCII,
47  * and a flag indicating whether the codepage is Unicode or ASCII.
48  */
49 static const smb_codepage_t *current_codepage = usascii_codepage;
50 static boolean_t is_unicode = B_FALSE;
51 
52 static smb_codepage_t *smb_unicode_init(void);
53 
54 /*
55  * strsubst
56  *
57  * Scan a string replacing all occurrences of orgchar with newchar.
58  * Returns a pointer to s, or null of s is null.
59  */
60 char *
61 strsubst(char *s, char orgchar, char newchar)
62 {
63 	char *p = s;
64 
65 	if (p == 0)
66 		return (0);
67 
68 	while (*p) {
69 		if (*p == orgchar)
70 			*p = newchar;
71 		++p;
72 	}
73 
74 	return (s);
75 }
76 
77 /*
78  * strcanon
79  *
80  * Normalize a string by reducing all the repeated characters in
81  * buf as defined by class. For example;
82  *
83  *		char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
84  *		strcanon(buf, "/\\");
85  *
86  * Would result in buf containing the following string:
87  *
88  *		/d1/d2/d3\d4\f1.txt
89  *
90  * This function modifies the contents of buf in place and returns
91  * a pointer to buf.
92  */
93 char *
94 strcanon(char *buf, const char *class)
95 {
96 	char *p = buf;
97 	char *q = buf;
98 	char *r;
99 
100 	while (*p) {
101 		*q++ = *p;
102 
103 		if ((r = strchr(class, *p)) != 0) {
104 			while (*p == *r)
105 				++p;
106 		} else
107 			++p;
108 	}
109 
110 	*q = '\0';
111 	return (buf);
112 }
113 
114 void
115 smb_codepage_init(void)
116 {
117 	const smb_codepage_t *cp;
118 
119 	if (is_unicode)
120 		return;
121 
122 	if ((cp = smb_unicode_init()) != NULL) {
123 		current_codepage = cp;
124 		is_unicode = B_TRUE;
125 	} else {
126 		current_codepage = usascii_codepage;
127 		is_unicode = B_FALSE;
128 	}
129 }
130 
131 /*
132  * Determine whether or not a character is an uppercase character.
133  * This function operates on the current codepage table. Returns
134  * non-zero if the character is uppercase. Otherwise returns zero.
135  */
136 int
137 smb_isupper(int c)
138 {
139 	uint16_t mask = is_unicode ? 0xffff : 0xff;
140 
141 	return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
142 }
143 
144 /*
145  * Determine whether or not a character is an lowercase character.
146  * This function operates on the current codepage table. Returns
147  * non-zero if the character is lowercase. Otherwise returns zero.
148  */
149 int
150 smb_islower(int c)
151 {
152 	uint16_t mask = is_unicode ? 0xffff : 0xff;
153 
154 	return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
155 }
156 
157 /*
158  * Convert individual characters to their uppercase equivalent value.
159  * If the specified character is lowercase, the uppercase value will
160  * be returned. Otherwise the original value will be returned.
161  */
162 int
163 smb_toupper(int c)
164 {
165 	uint16_t mask = is_unicode ? 0xffff : 0xff;
166 
167 	return (current_codepage[c & mask].upper);
168 }
169 
170 /*
171  * Convert individual characters to their lowercase equivalent value.
172  * If the specified character is uppercase, the lowercase value will
173  * be returned. Otherwise the original value will be returned.
174  */
175 int
176 smb_tolower(int c)
177 {
178 	uint16_t mask = is_unicode ? 0xffff : 0xff;
179 
180 	return (current_codepage[c & mask].lower);
181 }
182 
183 /*
184  * Convert a string to uppercase using the appropriate codepage. The
185  * string is converted in place. A pointer to the string is returned.
186  * There is an assumption here that uppercase and lowercase values
187  * always result encode to the same length.
188  */
189 char *
190 smb_strupr(char *s)
191 {
192 	smb_wchar_t c;
193 	char *p = s;
194 
195 	while (*p) {
196 		if (smb_isascii(*p)) {
197 			*p = smb_toupper(*p);
198 			p++;
199 		} else {
200 			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
201 				return (0);
202 
203 			if (c == 0)
204 				break;
205 
206 			c = smb_toupper(c);
207 			p += smb_wctomb(p, c);
208 		}
209 	}
210 
211 	return (s);
212 }
213 
214 /*
215  * Convert a string to lowercase using the appropriate codepage. The
216  * string is converted in place. A pointer to the string is returned.
217  * There is an assumption here that uppercase and lowercase values
218  * always result encode to the same length.
219  */
220 char *
221 smb_strlwr(char *s)
222 {
223 	smb_wchar_t c;
224 	char *p = s;
225 
226 	while (*p) {
227 		if (smb_isascii(*p)) {
228 			*p = smb_tolower(*p);
229 			p++;
230 		} else {
231 			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
232 				return (0);
233 
234 			if (c == 0)
235 				break;
236 
237 			c = smb_tolower(c);
238 			p += smb_wctomb(p, c);
239 		}
240 	}
241 
242 	return (s);
243 }
244 
245 /*
246  * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
247  * -1 is returned if "s" is not a valid multi-byte string.
248  */
249 int
250 smb_isstrlwr(const char *s)
251 {
252 	smb_wchar_t c;
253 	int n;
254 	const char *p = s;
255 
256 	while (*p) {
257 		if (smb_isascii(*p) && smb_isupper(*p))
258 			return (0);
259 		else {
260 			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
261 				return (-1);
262 
263 			if (c == 0)
264 				break;
265 
266 			if (smb_isupper(c))
267 				return (0);
268 
269 			p += n;
270 		}
271 	}
272 
273 	return (1);
274 }
275 
276 /*
277  * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
278  * -1 is returned if "s" is not a valid multi-byte string.
279  */
280 int
281 smb_isstrupr(const char *s)
282 {
283 	smb_wchar_t c;
284 	int n;
285 	const char *p = s;
286 
287 	while (*p) {
288 		if (smb_isascii(*p) && smb_islower(*p))
289 			return (0);
290 		else {
291 			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
292 				return (-1);
293 
294 			if (c == 0)
295 				break;
296 
297 			if (smb_islower(c))
298 				return (0);
299 
300 			p += n;
301 		}
302 	}
303 
304 	return (1);
305 }
306 
307 /*
308  * Compare the null-terminated strings s1 and s2 and return an integer
309  * greater than, equal to or less than 0 dependent on whether s1 is
310  * lexicographically greater than, equal to or less than s2 after
311  * translation of each character to lowercase.  The original strings
312  * are not modified.
313  *
314  * If n is non-zero, at most n bytes are compared.  Otherwise, the strings
315  * are compared until a null terminator is encountered.
316  *
317  * Out:    0 if strings are equal
318  *       < 0 if first string < second string
319  *       > 0 if first string > second string
320  */
321 int
322 smb_strcasecmp(const char *s1, const char *s2, size_t n)
323 {
324 	int	err = 0;
325 	int	rc;
326 
327 	rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
328 	if (err != 0)
329 		return (-1);
330 	return (rc);
331 }
332 
333 /*
334  * First build a codepage based on cp_unicode.h.  Then build the unicode
335  * codepage from this interim codepage by copying the entries over while
336  * fixing them and filling in the gaps.
337  */
338 static smb_codepage_t *
339 smb_unicode_init(void)
340 {
341 	smb_codepage_t	*unicode;
342 	uint32_t	a = 0;
343 	uint32_t	b = 0;
344 
345 	unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
346 	if (unicode == NULL)
347 		return (NULL);
348 
349 	while (b != 0xffff) {
350 		/*
351 		 * If there is a gap in the standard,
352 		 * fill in the gap with no-case entries.
353 		 */
354 		if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
355 			unicode[b].ctype = CODEPAGE_ISNONE;
356 			unicode[b].upper = (smb_wchar_t)b;
357 			unicode[b].lower = (smb_wchar_t)b;
358 			b++;
359 			continue;
360 		}
361 
362 		/*
363 		 * Copy the entry and fixup as required.
364 		 */
365 		switch (a_unicode[a].ctype) {
366 		case CODEPAGE_ISNONE:
367 			/*
368 			 * Replace 0xffff in upper/lower fields with its val.
369 			 */
370 			unicode[b].ctype = CODEPAGE_ISNONE;
371 			unicode[b].upper = (smb_wchar_t)b;
372 			unicode[b].lower = (smb_wchar_t)b;
373 			break;
374 		case CODEPAGE_ISUPPER:
375 			/*
376 			 * Some characters may have case yet not have
377 			 * case conversion.  Treat them as no-case.
378 			 */
379 			if (a_unicode[a].lower == 0xffff) {
380 				unicode[b].ctype = CODEPAGE_ISNONE;
381 				unicode[b].upper = (smb_wchar_t)b;
382 				unicode[b].lower = (smb_wchar_t)b;
383 			} else {
384 				unicode[b].ctype = CODEPAGE_ISUPPER;
385 				unicode[b].upper = (smb_wchar_t)b;
386 				unicode[b].lower = a_unicode[a].lower;
387 			}
388 			break;
389 		case CODEPAGE_ISLOWER:
390 			/*
391 			 * Some characters may have case yet not have
392 			 * case conversion.  Treat them as no-case.
393 			 */
394 			if (a_unicode[a].upper == 0xffff) {
395 				unicode[b].ctype = CODEPAGE_ISNONE;
396 				unicode[b].upper = (smb_wchar_t)b;
397 				unicode[b].lower = (smb_wchar_t)b;
398 			} else {
399 				unicode[b].ctype = CODEPAGE_ISLOWER;
400 				unicode[b].upper = a_unicode[a].upper;
401 				unicode[b].lower = (smb_wchar_t)b;
402 			}
403 			break;
404 		default:
405 			MEM_FREE("unicode", unicode);
406 			return (NULL);
407 		}
408 
409 		a++;
410 		b++;
411 	};
412 
413 	return (unicode);
414 }
415 
416 /*
417  * Parse a UNC path (\\server\share\path) into its components.
418  * Although a standard UNC path starts with two '\', in DFS
419  * all UNC paths start with one '\'. So, this function only
420  * checks for one.
421  *
422  * A valid UNC must at least contain two components i.e. server
423  * and share. The path is parsed to:
424  *
425  * unc_server	server or domain name with no leading/trailing '\'
426  * unc_share	share name with no leading/trailing '\'
427  * unc_path	relative path to the share with no leading/trailing '\'
428  * 		it is valid for unc_path to be NULL.
429  *
430  * Upon successful return of this function, smb_unc_free()
431  * MUST be called when returned 'unc' is no longer needed.
432  *
433  * Returns 0 on success, otherwise returns an errno code.
434  */
435 int
436 smb_unc_init(const char *path, smb_unc_t *unc)
437 {
438 	char *p;
439 
440 	if (path == NULL || unc == NULL || (*path != '\\' && *path != '/'))
441 		return (EINVAL);
442 
443 	bzero(unc, sizeof (smb_unc_t));
444 
445 #ifdef _KERNEL
446 	unc->unc_buf = smb_mem_strdup(path);
447 #else
448 	if ((unc->unc_buf = strdup(path)) == NULL)
449 		return (ENOMEM);
450 #endif
451 
452 	(void) strsubst(unc->unc_buf, '\\', '/');
453 	(void) strcanon(unc->unc_buf, "/");
454 
455 	unc->unc_server = unc->unc_buf + 1;
456 	if (*unc->unc_server == '\0') {
457 		smb_unc_free(unc);
458 		return (EINVAL);
459 	}
460 
461 	if ((p = strchr(unc->unc_server, '/')) == NULL) {
462 		smb_unc_free(unc);
463 		return (EINVAL);
464 	}
465 
466 	*p++ = '\0';
467 	unc->unc_share = p;
468 
469 	if (*unc->unc_share == '\0') {
470 		smb_unc_free(unc);
471 		return (EINVAL);
472 	}
473 
474 	unc->unc_path = strchr(unc->unc_share, '/');
475 	if ((p = unc->unc_path) == NULL)
476 		return (0);
477 
478 	unc->unc_path++;
479 	*p = '\0';
480 
481 	/* remove the last '/' if any */
482 	if ((p = strchr(unc->unc_path, '\0')) != NULL) {
483 		if (*(--p) == '/')
484 			*p = '\0';
485 	}
486 
487 	return (0);
488 }
489 
490 void
491 smb_unc_free(smb_unc_t *unc)
492 {
493 	if (unc == NULL)
494 		return;
495 
496 #ifdef _KERNEL
497 	smb_mem_free(unc->unc_buf);
498 #else
499 	free(unc->unc_buf);
500 #endif
501 	unc->unc_buf = NULL;
502 }
503