1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
26 */
27
28 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
29 #include <sys/types.h>
30 #include <sys/sunddi.h>
31 #else
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <strings.h>
36 #endif
37 #include <sys/u8_textprep.h>
38 #include <smbsrv/alloc.h>
39 #include <sys/errno.h>
40 #include <smbsrv/string.h>
41 #include <smbsrv/cp_usascii.h>
42 #include <smbsrv/cp_unicode.h>
43
44 #define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0]))
45
46 /*
47 * Global pointer to the current codepage: defaults to ASCII,
48 * and a flag indicating whether the codepage is Unicode or ASCII.
49 */
50 static const smb_codepage_t *current_codepage = usascii_codepage;
51 static boolean_t is_unicode = B_FALSE;
52
53 static smb_codepage_t *smb_unicode_init(void);
54
55 /*
56 * strsubst
57 *
58 * Scan a string replacing all occurrences of orgchar with newchar.
59 * Returns a pointer to s, or null of s is null.
60 */
61 char *
strsubst(char * s,char orgchar,char newchar)62 strsubst(char *s, char orgchar, char newchar)
63 {
64 char *p = s;
65
66 if (p == 0)
67 return (0);
68
69 while (*p) {
70 if (*p == orgchar)
71 *p = newchar;
72 ++p;
73 }
74
75 return (s);
76 }
77
78 /*
79 * strcanon
80 *
81 * Normalize a string by reducing all the repeated characters in
82 * buf as defined by class. For example;
83 *
84 * char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
85 * strcanon(buf, "/\\");
86 *
87 * Would result in buf containing the following string:
88 *
89 * /d1/d2/d3\d4\f1.txt
90 *
91 * This function modifies the contents of buf in place and returns
92 * a pointer to buf.
93 */
94 char *
strcanon(char * buf,const char * class)95 strcanon(char *buf, const char *class)
96 {
97 char *p = buf;
98 char *q = buf;
99 char *r;
100
101 while (*p) {
102 *q++ = *p;
103
104 if ((r = strchr(class, *p)) != 0) {
105 while (*p == *r)
106 ++p;
107 } else
108 ++p;
109 }
110
111 *q = '\0';
112 return (buf);
113 }
114
115 void
smb_codepage_init(void)116 smb_codepage_init(void)
117 {
118 const smb_codepage_t *cp;
119
120 if (is_unicode)
121 return;
122
123 if ((cp = smb_unicode_init()) != NULL) {
124 current_codepage = cp;
125 is_unicode = B_TRUE;
126 } else {
127 current_codepage = usascii_codepage;
128 is_unicode = B_FALSE;
129 }
130 }
131
132 /*
133 * Determine whether or not a character is an uppercase character.
134 * This function operates on the current codepage table. Returns
135 * non-zero if the character is uppercase. Otherwise returns zero.
136 */
137 int
smb_isupper(int c)138 smb_isupper(int c)
139 {
140 uint16_t mask = is_unicode ? 0xffff : 0xff;
141
142 return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
143 }
144
145 /*
146 * Determine whether or not a character is an lowercase character.
147 * This function operates on the current codepage table. Returns
148 * non-zero if the character is lowercase. Otherwise returns zero.
149 */
150 int
smb_islower(int c)151 smb_islower(int c)
152 {
153 uint16_t mask = is_unicode ? 0xffff : 0xff;
154
155 return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
156 }
157
158 /*
159 * Convert individual characters to their uppercase equivalent value.
160 * If the specified character is lowercase, the uppercase value will
161 * be returned. Otherwise the original value will be returned.
162 */
163 int
smb_toupper(int c)164 smb_toupper(int c)
165 {
166 uint16_t mask = is_unicode ? 0xffff : 0xff;
167
168 return (current_codepage[c & mask].upper);
169 }
170
171 /*
172 * Convert individual characters to their lowercase equivalent value.
173 * If the specified character is uppercase, the lowercase value will
174 * be returned. Otherwise the original value will be returned.
175 */
176 int
smb_tolower(int c)177 smb_tolower(int c)
178 {
179 uint16_t mask = is_unicode ? 0xffff : 0xff;
180
181 return (current_codepage[c & mask].lower);
182 }
183
184 /*
185 * Convert a string to uppercase using the appropriate codepage. The
186 * string is converted in place. A pointer to the string is returned.
187 * There is an assumption here that uppercase and lowercase values
188 * always result encode to the same length.
189 */
190 char *
smb_strupr(char * s)191 smb_strupr(char *s)
192 {
193 smb_wchar_t c;
194 char *p = s;
195
196 while (*p) {
197 if (smb_isascii(*p)) {
198 *p = smb_toupper(*p);
199 p++;
200 } else {
201 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
202 return (0);
203
204 if (c == 0)
205 break;
206
207 c = smb_toupper(c);
208 p += smb_wctomb(p, c);
209 }
210 }
211
212 return (s);
213 }
214
215 /*
216 * Convert a string to lowercase using the appropriate codepage. The
217 * string is converted in place. A pointer to the string is returned.
218 * There is an assumption here that uppercase and lowercase values
219 * always result encode to the same length.
220 */
221 char *
smb_strlwr(char * s)222 smb_strlwr(char *s)
223 {
224 smb_wchar_t c;
225 char *p = s;
226
227 while (*p) {
228 if (smb_isascii(*p)) {
229 *p = smb_tolower(*p);
230 p++;
231 } else {
232 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
233 return (0);
234
235 if (c == 0)
236 break;
237
238 c = smb_tolower(c);
239 p += smb_wctomb(p, c);
240 }
241 }
242
243 return (s);
244 }
245
246 /*
247 * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
248 * -1 is returned if "s" is not a valid multi-byte string.
249 */
250 int
smb_isstrlwr(const char * s)251 smb_isstrlwr(const char *s)
252 {
253 smb_wchar_t c;
254 int n;
255 const char *p = s;
256
257 while (*p) {
258 if (smb_isascii(*p) && smb_isupper(*p))
259 return (0);
260 else {
261 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
262 return (-1);
263
264 if (c == 0)
265 break;
266
267 if (smb_isupper(c))
268 return (0);
269
270 p += n;
271 }
272 }
273
274 return (1);
275 }
276
277 /*
278 * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
279 * -1 is returned if "s" is not a valid multi-byte string.
280 */
281 int
smb_isstrupr(const char * s)282 smb_isstrupr(const char *s)
283 {
284 smb_wchar_t c;
285 int n;
286 const char *p = s;
287
288 while (*p) {
289 if (smb_isascii(*p) && smb_islower(*p))
290 return (0);
291 else {
292 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
293 return (-1);
294
295 if (c == 0)
296 break;
297
298 if (smb_islower(c))
299 return (0);
300
301 p += n;
302 }
303 }
304
305 return (1);
306 }
307
308 /*
309 * Compare the null-terminated strings s1 and s2 and return an integer
310 * greater than, equal to or less than 0 dependent on whether s1 is
311 * lexicographically greater than, equal to or less than s2 after
312 * translation of each character to lowercase. The original strings
313 * are not modified.
314 *
315 * If n is non-zero, at most n bytes are compared. Otherwise, the strings
316 * are compared until a null terminator is encountered.
317 *
318 * Out: 0 if strings are equal
319 * < 0 if first string < second string
320 * > 0 if first string > second string
321 */
322 int
smb_strcasecmp(const char * s1,const char * s2,size_t n)323 smb_strcasecmp(const char *s1, const char *s2, size_t n)
324 {
325 int err = 0;
326 int rc;
327
328 rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
329 if (err != 0)
330 return (-1);
331 return (rc);
332 }
333
334 /*
335 * First build a codepage based on cp_unicode.h. Then build the unicode
336 * codepage from this interim codepage by copying the entries over while
337 * fixing them and filling in the gaps.
338 */
339 static smb_codepage_t *
smb_unicode_init(void)340 smb_unicode_init(void)
341 {
342 smb_codepage_t *unicode;
343 uint32_t a = 0;
344 uint32_t b = 0;
345
346 unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
347 if (unicode == NULL)
348 return (NULL);
349
350 while (b != 0xffff) {
351 /*
352 * If there is a gap in the standard,
353 * fill in the gap with no-case entries.
354 */
355 if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
356 unicode[b].ctype = CODEPAGE_ISNONE;
357 unicode[b].upper = (smb_wchar_t)b;
358 unicode[b].lower = (smb_wchar_t)b;
359 b++;
360 continue;
361 }
362
363 /*
364 * Copy the entry and fixup as required.
365 */
366 switch (a_unicode[a].ctype) {
367 case CODEPAGE_ISNONE:
368 /*
369 * Replace 0xffff in upper/lower fields with its val.
370 */
371 unicode[b].ctype = CODEPAGE_ISNONE;
372 unicode[b].upper = (smb_wchar_t)b;
373 unicode[b].lower = (smb_wchar_t)b;
374 break;
375 case CODEPAGE_ISUPPER:
376 /*
377 * Some characters may have case yet not have
378 * case conversion. Treat them as no-case.
379 */
380 if (a_unicode[a].lower == 0xffff) {
381 unicode[b].ctype = CODEPAGE_ISNONE;
382 unicode[b].upper = (smb_wchar_t)b;
383 unicode[b].lower = (smb_wchar_t)b;
384 } else {
385 unicode[b].ctype = CODEPAGE_ISUPPER;
386 unicode[b].upper = (smb_wchar_t)b;
387 unicode[b].lower = a_unicode[a].lower;
388 }
389 break;
390 case CODEPAGE_ISLOWER:
391 /*
392 * Some characters may have case yet not have
393 * case conversion. Treat them as no-case.
394 */
395 if (a_unicode[a].upper == 0xffff) {
396 unicode[b].ctype = CODEPAGE_ISNONE;
397 unicode[b].upper = (smb_wchar_t)b;
398 unicode[b].lower = (smb_wchar_t)b;
399 } else {
400 unicode[b].ctype = CODEPAGE_ISLOWER;
401 unicode[b].upper = a_unicode[a].upper;
402 unicode[b].lower = (smb_wchar_t)b;
403 }
404 break;
405 default:
406 MEM_FREE("unicode", unicode);
407 return (NULL);
408 }
409
410 a++;
411 b++;
412 };
413
414 return (unicode);
415 }
416
417 /*
418 * Parse a UNC path (\\server\share\path) into its components.
419 * Although a standard UNC path starts with two '\', in DFS
420 * all UNC paths start with one '\'. So, this function only
421 * checks for one.
422 *
423 * A valid UNC must at least contain two components i.e. server
424 * and share. The path is parsed to:
425 *
426 * unc_server server or domain name with no leading/trailing '\'
427 * unc_share share name with no leading/trailing '\'
428 * unc_path relative path to the share with no leading/trailing '\'
429 * it is valid for unc_path to be NULL.
430 *
431 * Upon successful return of this function, smb_unc_free()
432 * MUST be called when returned 'unc' is no longer needed.
433 *
434 * Returns 0 on success, otherwise returns an errno code.
435 */
436 int
smb_unc_init(const char * path,smb_unc_t * unc)437 smb_unc_init(const char *path, smb_unc_t *unc)
438 {
439 char *p;
440
441 if (path == NULL || unc == NULL || (*path != '\\' && *path != '/'))
442 return (EINVAL);
443
444 bzero(unc, sizeof (smb_unc_t));
445
446 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
447 unc->unc_buf = smb_mem_strdup(path);
448 #else
449 if ((unc->unc_buf = strdup(path)) == NULL)
450 return (ENOMEM);
451 #endif
452
453 (void) strsubst(unc->unc_buf, '\\', '/');
454 (void) strcanon(unc->unc_buf, "/");
455
456 unc->unc_server = unc->unc_buf + 1;
457 if (*unc->unc_server == '\0') {
458 smb_unc_free(unc);
459 return (EINVAL);
460 }
461
462 if ((p = strchr(unc->unc_server, '/')) == NULL) {
463 smb_unc_free(unc);
464 return (EINVAL);
465 }
466
467 *p++ = '\0';
468 unc->unc_share = p;
469
470 if (*unc->unc_share == '\0') {
471 smb_unc_free(unc);
472 return (EINVAL);
473 }
474
475 unc->unc_path = strchr(unc->unc_share, '/');
476 if ((p = unc->unc_path) == NULL)
477 return (0);
478
479 unc->unc_path++;
480 *p = '\0';
481
482 /* remove the last '/' if any */
483 if ((p = strchr(unc->unc_path, '\0')) != NULL) {
484 if (*(--p) == '/')
485 *p = '\0';
486 }
487
488 return (0);
489 }
490
491 void
smb_unc_free(smb_unc_t * unc)492 smb_unc_free(smb_unc_t *unc)
493 {
494 if (unc == NULL)
495 return;
496
497 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
498 smb_mem_free(unc->unc_buf);
499 #else
500 free(unc->unc_buf);
501 #endif
502 unc->unc_buf = NULL;
503 }
504