xref: /illumos-gate/usr/src/common/smbsrv/smb_string.c (revision 9e88c82d66b3fb22f1b1f25cbc4632977358de62)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
26  * Copyright (c) 2017 by Delphix. All rights reserved.
27  */
28 
29 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
30 #include <sys/types.h>
31 #include <sys/sunddi.h>
32 #else
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <strings.h>
37 #endif
38 #include <sys/u8_textprep.h>
39 #include <smbsrv/alloc.h>
40 #include <sys/errno.h>
41 #include <smbsrv/string.h>
42 #include <smbsrv/cp_usascii.h>
43 #include <smbsrv/cp_unicode.h>
44 
45 #define	UNICODE_N_ENTRIES	(sizeof (a_unicode) / sizeof (a_unicode[0]))
46 
47 /*
48  * Global pointer to the current codepage: defaults to ASCII,
49  * and a flag indicating whether the codepage is Unicode or ASCII.
50  */
51 static const smb_codepage_t *current_codepage = usascii_codepage;
52 static boolean_t is_unicode = B_FALSE;
53 
54 static smb_codepage_t *unicode_codepage = NULL;
55 
56 static smb_codepage_t *smb_unicode_init(void);
57 
58 /*
59  * strsubst
60  *
61  * Scan a string replacing all occurrences of orgchar with newchar.
62  * Returns a pointer to s, or null of s is null.
63  */
64 char *
65 strsubst(char *s, char orgchar, char newchar)
66 {
67 	char *p = s;
68 
69 	if (p == 0)
70 		return (0);
71 
72 	while (*p) {
73 		if (*p == orgchar)
74 			*p = newchar;
75 		++p;
76 	}
77 
78 	return (s);
79 }
80 
81 /*
82  * strcanon
83  *
84  * Normalize a string by reducing all the repeated characters in
85  * buf as defined by class. For example;
86  *
87  *		char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
88  *		strcanon(buf, "/\\");
89  *
90  * Would result in buf containing the following string:
91  *
92  *		/d1/d2/d3\d4\f1.txt
93  *
94  * This function modifies the contents of buf in place and returns
95  * a pointer to buf.
96  */
97 char *
98 strcanon(char *buf, const char *class)
99 {
100 	char *p = buf;
101 	char *q = buf;
102 	char *r;
103 
104 	while (*p) {
105 		*q++ = *p;
106 
107 		if ((r = strchr(class, *p)) != 0) {
108 			while (*p == *r)
109 				++p;
110 		} else
111 			++p;
112 	}
113 
114 	*q = '\0';
115 	return (buf);
116 }
117 
118 void
119 smb_codepage_init(void)
120 {
121 	smb_codepage_t *cp;
122 
123 	if (is_unicode)
124 		return;
125 
126 	if ((cp = smb_unicode_init()) != NULL) {
127 		current_codepage = cp;
128 		unicode_codepage = cp;
129 		is_unicode = B_TRUE;
130 	} else {
131 		current_codepage = usascii_codepage;
132 		is_unicode = B_FALSE;
133 	}
134 }
135 
136 void
137 smb_codepage_fini(void)
138 {
139 	if (unicode_codepage != NULL) {
140 		MEM_FREE("unicode", unicode_codepage);
141 		unicode_codepage = NULL;
142 		current_codepage = NULL;
143 	}
144 }
145 
146 /*
147  * Determine whether or not a character is an uppercase character.
148  * This function operates on the current codepage table. Returns
149  * non-zero if the character is uppercase. Otherwise returns zero.
150  */
151 int
152 smb_isupper(int c)
153 {
154 	uint16_t mask = is_unicode ? 0xffff : 0xff;
155 
156 	return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
157 }
158 
159 /*
160  * Determine whether or not a character is an lowercase character.
161  * This function operates on the current codepage table. Returns
162  * non-zero if the character is lowercase. Otherwise returns zero.
163  */
164 int
165 smb_islower(int c)
166 {
167 	uint16_t mask = is_unicode ? 0xffff : 0xff;
168 
169 	return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
170 }
171 
172 /*
173  * Convert individual characters to their uppercase equivalent value.
174  * If the specified character is lowercase, the uppercase value will
175  * be returned. Otherwise the original value will be returned.
176  */
177 uint32_t
178 smb_toupper(uint32_t c)
179 {
180 	uint16_t mask = is_unicode ? 0xffff : 0xff;
181 
182 	return (current_codepage[c & mask].upper);
183 }
184 
185 /*
186  * Convert individual characters to their lowercase equivalent value.
187  * If the specified character is uppercase, the lowercase value will
188  * be returned. Otherwise the original value will be returned.
189  */
190 uint32_t
191 smb_tolower(uint32_t c)
192 {
193 	uint16_t mask = is_unicode ? 0xffff : 0xff;
194 
195 	return (current_codepage[c & mask].lower);
196 }
197 
198 /*
199  * Convert a string to uppercase using the appropriate codepage. The
200  * string is converted in place. A pointer to the string is returned.
201  * There is an assumption here that uppercase and lowercase values
202  * always result encode to the same length.
203  */
204 char *
205 smb_strupr(char *s)
206 {
207 	uint32_t c;
208 	char *p = s;
209 
210 	while (*p) {
211 		if (smb_isascii(*p)) {
212 			*p = smb_toupper(*p);
213 			p++;
214 		} else {
215 			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
216 				return (0);
217 
218 			if (c == 0)
219 				break;
220 
221 			c = smb_toupper(c);
222 			p += smb_wctomb(p, c);
223 		}
224 	}
225 
226 	return (s);
227 }
228 
229 /*
230  * Convert a string to lowercase using the appropriate codepage. The
231  * string is converted in place. A pointer to the string is returned.
232  * There is an assumption here that uppercase and lowercase values
233  * always result encode to the same length.
234  */
235 char *
236 smb_strlwr(char *s)
237 {
238 	uint32_t c;
239 	char *p = s;
240 
241 	while (*p) {
242 		if (smb_isascii(*p)) {
243 			*p = smb_tolower(*p);
244 			p++;
245 		} else {
246 			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
247 				return (0);
248 
249 			if (c == 0)
250 				break;
251 
252 			c = smb_tolower(c);
253 			p += smb_wctomb(p, c);
254 		}
255 	}
256 
257 	return (s);
258 }
259 
260 /*
261  * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
262  * -1 is returned if "s" is not a valid multi-byte string.
263  */
264 int
265 smb_isstrlwr(const char *s)
266 {
267 	uint32_t c;
268 	int n;
269 	const char *p = s;
270 
271 	while (*p) {
272 		if (smb_isascii(*p) && smb_isupper(*p))
273 			return (0);
274 		else {
275 			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
276 				return (-1);
277 
278 			if (c == 0)
279 				break;
280 
281 			if (smb_isupper(c))
282 				return (0);
283 
284 			p += n;
285 		}
286 	}
287 
288 	return (1);
289 }
290 
291 /*
292  * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
293  * -1 is returned if "s" is not a valid multi-byte string.
294  */
295 int
296 smb_isstrupr(const char *s)
297 {
298 	uint32_t c;
299 	int n;
300 	const char *p = s;
301 
302 	while (*p) {
303 		if (smb_isascii(*p) && smb_islower(*p))
304 			return (0);
305 		else {
306 			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
307 				return (-1);
308 
309 			if (c == 0)
310 				break;
311 
312 			if (smb_islower(c))
313 				return (0);
314 
315 			p += n;
316 		}
317 	}
318 
319 	return (1);
320 }
321 
322 /*
323  * Compare the null-terminated strings s1 and s2 and return an integer
324  * greater than, equal to or less than 0 dependent on whether s1 is
325  * lexicographically greater than, equal to or less than s2 after
326  * translation of each character to lowercase.  The original strings
327  * are not modified.
328  *
329  * If n is non-zero, at most n bytes are compared.  Otherwise, the strings
330  * are compared until a null terminator is encountered.
331  *
332  * Out:    0 if strings are equal
333  *       < 0 if first string < second string
334  *       > 0 if first string > second string
335  */
336 int
337 smb_strcasecmp(const char *s1, const char *s2, size_t n)
338 {
339 	int	err = 0;
340 	int	rc;
341 
342 	rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
343 	if (err != 0)
344 		return (-1);
345 	return (rc);
346 }
347 
348 /*
349  * First build a codepage based on cp_unicode.h.  Then build the unicode
350  * codepage from this interim codepage by copying the entries over while
351  * fixing them and filling in the gaps.
352  */
353 static smb_codepage_t *
354 smb_unicode_init(void)
355 {
356 	smb_codepage_t	*unicode;
357 	uint32_t	a = 0;
358 	uint32_t	b = 0;
359 
360 	unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
361 	if (unicode == NULL)
362 		return (NULL);
363 
364 	while (b != 0xffff) {
365 		/*
366 		 * If there is a gap in the standard,
367 		 * fill in the gap with no-case entries.
368 		 */
369 		if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
370 			unicode[b].ctype = CODEPAGE_ISNONE;
371 			unicode[b].upper = (smb_wchar_t)b;
372 			unicode[b].lower = (smb_wchar_t)b;
373 			b++;
374 			continue;
375 		}
376 
377 		/*
378 		 * Copy the entry and fixup as required.
379 		 */
380 		switch (a_unicode[a].ctype) {
381 		case CODEPAGE_ISNONE:
382 			/*
383 			 * Replace 0xffff in upper/lower fields with its val.
384 			 */
385 			unicode[b].ctype = CODEPAGE_ISNONE;
386 			unicode[b].upper = (smb_wchar_t)b;
387 			unicode[b].lower = (smb_wchar_t)b;
388 			break;
389 		case CODEPAGE_ISUPPER:
390 			/*
391 			 * Some characters may have case yet not have
392 			 * case conversion.  Treat them as no-case.
393 			 */
394 			if (a_unicode[a].lower == 0xffff) {
395 				unicode[b].ctype = CODEPAGE_ISNONE;
396 				unicode[b].upper = (smb_wchar_t)b;
397 				unicode[b].lower = (smb_wchar_t)b;
398 			} else {
399 				unicode[b].ctype = CODEPAGE_ISUPPER;
400 				unicode[b].upper = (smb_wchar_t)b;
401 				unicode[b].lower = a_unicode[a].lower;
402 			}
403 			break;
404 		case CODEPAGE_ISLOWER:
405 			/*
406 			 * Some characters may have case yet not have
407 			 * case conversion.  Treat them as no-case.
408 			 */
409 			if (a_unicode[a].upper == 0xffff) {
410 				unicode[b].ctype = CODEPAGE_ISNONE;
411 				unicode[b].upper = (smb_wchar_t)b;
412 				unicode[b].lower = (smb_wchar_t)b;
413 			} else {
414 				unicode[b].ctype = CODEPAGE_ISLOWER;
415 				unicode[b].upper = a_unicode[a].upper;
416 				unicode[b].lower = (smb_wchar_t)b;
417 			}
418 			break;
419 		default:
420 			MEM_FREE("unicode", unicode);
421 			return (NULL);
422 		}
423 
424 		a++;
425 		b++;
426 	};
427 
428 	return (unicode);
429 }
430 
431 /*
432  * Parse a UNC path (\\server\share\path) into its components.
433  * Although a standard UNC path starts with two '\', in DFS
434  * all UNC paths start with one '\'. So, this function only
435  * checks for one.
436  *
437  * A valid UNC must at least contain two components i.e. server
438  * and share. The path is parsed to:
439  *
440  * unc_server	server or domain name with no leading/trailing '\'
441  * unc_share	share name with no leading/trailing '\'
442  * unc_path	relative path to the share with no leading/trailing '\'
443  *		it is valid for unc_path to be NULL.
444  *
445  * Upon successful return of this function, smb_unc_free()
446  * MUST be called when returned 'unc' is no longer needed.
447  *
448  * Returns 0 on success, otherwise returns an errno code.
449  */
450 int
451 smb_unc_init(const char *path, smb_unc_t *unc)
452 {
453 	char *p;
454 
455 	if (path == NULL || unc == NULL || (*path != '\\' && *path != '/'))
456 		return (EINVAL);
457 
458 	bzero(unc, sizeof (smb_unc_t));
459 
460 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
461 	unc->unc_buf = smb_mem_strdup(path);
462 #else
463 	if ((unc->unc_buf = strdup(path)) == NULL)
464 		return (ENOMEM);
465 #endif
466 
467 	(void) strsubst(unc->unc_buf, '\\', '/');
468 	(void) strcanon(unc->unc_buf, "/");
469 
470 	unc->unc_server = unc->unc_buf + 1;
471 	if (*unc->unc_server == '\0') {
472 		smb_unc_free(unc);
473 		return (EINVAL);
474 	}
475 
476 	if ((p = strchr(unc->unc_server, '/')) == NULL) {
477 		smb_unc_free(unc);
478 		return (EINVAL);
479 	}
480 
481 	*p++ = '\0';
482 	unc->unc_share = p;
483 
484 	if (*unc->unc_share == '\0') {
485 		smb_unc_free(unc);
486 		return (EINVAL);
487 	}
488 
489 	unc->unc_path = strchr(unc->unc_share, '/');
490 	if ((p = unc->unc_path) == NULL)
491 		return (0);
492 
493 	unc->unc_path++;
494 	*p = '\0';
495 
496 	/* remove the last '/' if any */
497 	if ((p = strchr(unc->unc_path, '\0')) != NULL) {
498 		if (*(--p) == '/')
499 			*p = '\0';
500 	}
501 
502 	return (0);
503 }
504 
505 void
506 smb_unc_free(smb_unc_t *unc)
507 {
508 	if (unc == NULL)
509 		return;
510 
511 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
512 	smb_mem_free(unc->unc_buf);
513 #else
514 	free(unc->unc_buf);
515 #endif
516 	unc->unc_buf = NULL;
517 }
518