1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
26 * Copyright (c) 2017 by Delphix. All rights reserved.
27 */
28
29 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
30 #include <sys/types.h>
31 #include <sys/sunddi.h>
32 #else
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <strings.h>
37 #endif
38 #include <sys/u8_textprep.h>
39 #include <smbsrv/alloc.h>
40 #include <sys/errno.h>
41 #include <smbsrv/string.h>
42 #include <smbsrv/cp_usascii.h>
43 #include <smbsrv/cp_unicode.h>
44
45 #define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0]))
46
47 /*
48 * Global pointer to the current codepage: defaults to ASCII,
49 * and a flag indicating whether the codepage is Unicode or ASCII.
50 */
51 static const smb_codepage_t *current_codepage = usascii_codepage;
52 static boolean_t is_unicode = B_FALSE;
53
54 static smb_codepage_t *unicode_codepage = NULL;
55
56 static smb_codepage_t *smb_unicode_init(void);
57
58 /*
59 * strsubst
60 *
61 * Scan a string replacing all occurrences of orgchar with newchar.
62 * Returns a pointer to s, or null of s is null.
63 */
64 char *
strsubst(char * s,char orgchar,char newchar)65 strsubst(char *s, char orgchar, char newchar)
66 {
67 char *p = s;
68
69 if (p == 0)
70 return (0);
71
72 while (*p) {
73 if (*p == orgchar)
74 *p = newchar;
75 ++p;
76 }
77
78 return (s);
79 }
80
81 /*
82 * strcanon
83 *
84 * Normalize a string by reducing all the repeated characters in
85 * buf as defined by class. For example;
86 *
87 * char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
88 * strcanon(buf, "/\\");
89 *
90 * Would result in buf containing the following string:
91 *
92 * /d1/d2/d3\d4\f1.txt
93 *
94 * This function modifies the contents of buf in place and returns
95 * a pointer to buf.
96 */
97 char *
strcanon(char * buf,const char * class)98 strcanon(char *buf, const char *class)
99 {
100 char *p = buf;
101 char *q = buf;
102 char *r;
103
104 while (*p) {
105 *q++ = *p;
106
107 if ((r = strchr(class, *p)) != 0) {
108 while (*p == *r)
109 ++p;
110 } else
111 ++p;
112 }
113
114 *q = '\0';
115 return (buf);
116 }
117
118 void
smb_codepage_init(void)119 smb_codepage_init(void)
120 {
121 smb_codepage_t *cp;
122
123 if (is_unicode)
124 return;
125
126 if ((cp = smb_unicode_init()) != NULL) {
127 current_codepage = cp;
128 unicode_codepage = cp;
129 is_unicode = B_TRUE;
130 } else {
131 current_codepage = usascii_codepage;
132 is_unicode = B_FALSE;
133 }
134 }
135
136 void
smb_codepage_fini(void)137 smb_codepage_fini(void)
138 {
139 if (unicode_codepage != NULL) {
140 MEM_FREE("unicode", unicode_codepage);
141 unicode_codepage = NULL;
142 current_codepage = NULL;
143 }
144 }
145
146 /*
147 * Determine whether or not a character is an uppercase character.
148 * This function operates on the current codepage table. Returns
149 * non-zero if the character is uppercase. Otherwise returns zero.
150 */
151 int
smb_isupper(int c)152 smb_isupper(int c)
153 {
154 uint16_t mask = is_unicode ? 0xffff : 0xff;
155
156 return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
157 }
158
159 /*
160 * Determine whether or not a character is an lowercase character.
161 * This function operates on the current codepage table. Returns
162 * non-zero if the character is lowercase. Otherwise returns zero.
163 */
164 int
smb_islower(int c)165 smb_islower(int c)
166 {
167 uint16_t mask = is_unicode ? 0xffff : 0xff;
168
169 return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
170 }
171
172 /*
173 * Convert individual characters to their uppercase equivalent value.
174 * If the specified character is lowercase, the uppercase value will
175 * be returned. Otherwise the original value will be returned.
176 */
177 uint32_t
smb_toupper(uint32_t c)178 smb_toupper(uint32_t c)
179 {
180 uint16_t mask = is_unicode ? 0xffff : 0xff;
181
182 return (current_codepage[c & mask].upper);
183 }
184
185 /*
186 * Convert individual characters to their lowercase equivalent value.
187 * If the specified character is uppercase, the lowercase value will
188 * be returned. Otherwise the original value will be returned.
189 */
190 uint32_t
smb_tolower(uint32_t c)191 smb_tolower(uint32_t c)
192 {
193 uint16_t mask = is_unicode ? 0xffff : 0xff;
194
195 return (current_codepage[c & mask].lower);
196 }
197
198 /*
199 * Convert a string to uppercase using the appropriate codepage. The
200 * string is converted in place. A pointer to the string is returned.
201 * There is an assumption here that uppercase and lowercase values
202 * always result encode to the same length.
203 */
204 char *
smb_strupr(char * s)205 smb_strupr(char *s)
206 {
207 uint32_t c;
208 char *p = s;
209
210 while (*p) {
211 if (smb_isascii(*p)) {
212 *p = smb_toupper(*p);
213 p++;
214 } else {
215 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
216 return (0);
217
218 if (c == 0)
219 break;
220
221 c = smb_toupper(c);
222 p += smb_wctomb(p, c);
223 }
224 }
225
226 return (s);
227 }
228
229 /*
230 * Convert a string to lowercase using the appropriate codepage. The
231 * string is converted in place. A pointer to the string is returned.
232 * There is an assumption here that uppercase and lowercase values
233 * always result encode to the same length.
234 */
235 char *
smb_strlwr(char * s)236 smb_strlwr(char *s)
237 {
238 uint32_t c;
239 char *p = s;
240
241 while (*p) {
242 if (smb_isascii(*p)) {
243 *p = smb_tolower(*p);
244 p++;
245 } else {
246 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
247 return (0);
248
249 if (c == 0)
250 break;
251
252 c = smb_tolower(c);
253 p += smb_wctomb(p, c);
254 }
255 }
256
257 return (s);
258 }
259
260 /*
261 * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
262 * -1 is returned if "s" is not a valid multi-byte string.
263 */
264 int
smb_isstrlwr(const char * s)265 smb_isstrlwr(const char *s)
266 {
267 uint32_t c;
268 int n;
269 const char *p = s;
270
271 while (*p) {
272 if (smb_isascii(*p) && smb_isupper(*p))
273 return (0);
274 else {
275 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
276 return (-1);
277
278 if (c == 0)
279 break;
280
281 if (smb_isupper(c))
282 return (0);
283
284 p += n;
285 }
286 }
287
288 return (1);
289 }
290
291 /*
292 * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
293 * -1 is returned if "s" is not a valid multi-byte string.
294 */
295 int
smb_isstrupr(const char * s)296 smb_isstrupr(const char *s)
297 {
298 uint32_t c;
299 int n;
300 const char *p = s;
301
302 while (*p) {
303 if (smb_isascii(*p) && smb_islower(*p))
304 return (0);
305 else {
306 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
307 return (-1);
308
309 if (c == 0)
310 break;
311
312 if (smb_islower(c))
313 return (0);
314
315 p += n;
316 }
317 }
318
319 return (1);
320 }
321
322 /*
323 * Compare the null-terminated strings s1 and s2 and return an integer
324 * greater than, equal to or less than 0 dependent on whether s1 is
325 * lexicographically greater than, equal to or less than s2 after
326 * translation of each character to lowercase. The original strings
327 * are not modified.
328 *
329 * If n is non-zero, at most n bytes are compared. Otherwise, the strings
330 * are compared until a null terminator is encountered.
331 *
332 * Out: 0 if strings are equal
333 * < 0 if first string < second string
334 * > 0 if first string > second string
335 */
336 int
smb_strcasecmp(const char * s1,const char * s2,size_t n)337 smb_strcasecmp(const char *s1, const char *s2, size_t n)
338 {
339 int err = 0;
340 int rc;
341
342 rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
343 if (err != 0)
344 return (-1);
345 return (rc);
346 }
347
348 /*
349 * First build a codepage based on cp_unicode.h. Then build the unicode
350 * codepage from this interim codepage by copying the entries over while
351 * fixing them and filling in the gaps.
352 */
353 static smb_codepage_t *
smb_unicode_init(void)354 smb_unicode_init(void)
355 {
356 smb_codepage_t *unicode;
357 uint32_t a = 0;
358 uint32_t b = 0;
359
360 unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
361 if (unicode == NULL)
362 return (NULL);
363
364 while (b != 0xffff) {
365 /*
366 * If there is a gap in the standard,
367 * fill in the gap with no-case entries.
368 */
369 if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
370 unicode[b].ctype = CODEPAGE_ISNONE;
371 unicode[b].upper = (smb_wchar_t)b;
372 unicode[b].lower = (smb_wchar_t)b;
373 b++;
374 continue;
375 }
376
377 /*
378 * Copy the entry and fixup as required.
379 */
380 switch (a_unicode[a].ctype) {
381 case CODEPAGE_ISNONE:
382 /*
383 * Replace 0xffff in upper/lower fields with its val.
384 */
385 unicode[b].ctype = CODEPAGE_ISNONE;
386 unicode[b].upper = (smb_wchar_t)b;
387 unicode[b].lower = (smb_wchar_t)b;
388 break;
389 case CODEPAGE_ISUPPER:
390 /*
391 * Some characters may have case yet not have
392 * case conversion. Treat them as no-case.
393 */
394 if (a_unicode[a].lower == 0xffff) {
395 unicode[b].ctype = CODEPAGE_ISNONE;
396 unicode[b].upper = (smb_wchar_t)b;
397 unicode[b].lower = (smb_wchar_t)b;
398 } else {
399 unicode[b].ctype = CODEPAGE_ISUPPER;
400 unicode[b].upper = (smb_wchar_t)b;
401 unicode[b].lower = a_unicode[a].lower;
402 }
403 break;
404 case CODEPAGE_ISLOWER:
405 /*
406 * Some characters may have case yet not have
407 * case conversion. Treat them as no-case.
408 */
409 if (a_unicode[a].upper == 0xffff) {
410 unicode[b].ctype = CODEPAGE_ISNONE;
411 unicode[b].upper = (smb_wchar_t)b;
412 unicode[b].lower = (smb_wchar_t)b;
413 } else {
414 unicode[b].ctype = CODEPAGE_ISLOWER;
415 unicode[b].upper = a_unicode[a].upper;
416 unicode[b].lower = (smb_wchar_t)b;
417 }
418 break;
419 default:
420 MEM_FREE("unicode", unicode);
421 return (NULL);
422 }
423
424 a++;
425 b++;
426 };
427
428 return (unicode);
429 }
430
431 /*
432 * Parse a UNC path (\\server\share\path) into its components.
433 * Although a standard UNC path starts with two '\', in DFS
434 * all UNC paths start with one '\'. So, this function only
435 * checks for one.
436 *
437 * A valid UNC must at least contain two components i.e. server
438 * and share. The path is parsed to:
439 *
440 * unc_server server or domain name with no leading/trailing '\'
441 * unc_share share name with no leading/trailing '\'
442 * unc_path relative path to the share with no leading/trailing '\'
443 * it is valid for unc_path to be NULL.
444 *
445 * Upon successful return of this function, smb_unc_free()
446 * MUST be called when returned 'unc' is no longer needed.
447 *
448 * Returns 0 on success, otherwise returns an errno code.
449 */
450 int
smb_unc_init(const char * path,smb_unc_t * unc)451 smb_unc_init(const char *path, smb_unc_t *unc)
452 {
453 char *p;
454
455 if (path == NULL || unc == NULL || (*path != '\\' && *path != '/'))
456 return (EINVAL);
457
458 bzero(unc, sizeof (smb_unc_t));
459
460 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
461 unc->unc_buf = smb_mem_strdup(path);
462 #else
463 if ((unc->unc_buf = strdup(path)) == NULL)
464 return (ENOMEM);
465 #endif
466
467 (void) strsubst(unc->unc_buf, '\\', '/');
468 (void) strcanon(unc->unc_buf, "/");
469
470 unc->unc_server = unc->unc_buf + 1;
471 if (*unc->unc_server == '\0') {
472 smb_unc_free(unc);
473 return (EINVAL);
474 }
475
476 if ((p = strchr(unc->unc_server, '/')) == NULL) {
477 smb_unc_free(unc);
478 return (EINVAL);
479 }
480
481 *p++ = '\0';
482 unc->unc_share = p;
483
484 if (*unc->unc_share == '\0') {
485 smb_unc_free(unc);
486 return (EINVAL);
487 }
488
489 unc->unc_path = strchr(unc->unc_share, '/');
490 if ((p = unc->unc_path) == NULL)
491 return (0);
492
493 unc->unc_path++;
494 *p = '\0';
495
496 /* remove the last '/' if any */
497 if ((p = strchr(unc->unc_path, '\0')) != NULL) {
498 if (*(--p) == '/')
499 *p = '\0';
500 }
501
502 return (0);
503 }
504
505 void
smb_unc_free(smb_unc_t * unc)506 smb_unc_free(smb_unc_t *unc)
507 {
508 if (unc == NULL)
509 return;
510
511 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
512 smb_mem_free(unc->unc_buf);
513 #else
514 free(unc->unc_buf);
515 #endif
516 unc->unc_buf = NULL;
517 }
518