xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_mangle_name.c (revision 48215d30bccaf4a9d58050835b3eb6ed630a2fde)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"@(#)smb_mangle_name.c	1.3	08/08/07 SMI"
27 
28 #include <sys/types.h>
29 #include <sys/sunddi.h>
30 #include <sys/errno.h>
31 #include <smbsrv/string.h>
32 #include <smbsrv/ctype.h>
33 #include <smbsrv/smb_i18n.h>
34 #include <smbsrv/smb_vops.h>
35 #include <smbsrv/smb_incl.h>
36 #include <smbsrv/smb_fsops.h>
37 
38 static int smb_match_unknown(char *name, char *pattern);
39 static int smb_is_reserved_dos_name(char *name);
40 static int smb_match_reserved(char *name, char *rsrv);
41 
42 /*
43  * smb_match_name
44  *
45  * This function will mangle the "name" field and save the resulted
46  * shortname to the "shortname" field and 8.3 name to "name83" field.
47  * The three fields, "name", "shortname" and "name83" will then be
48  * sent for pattern match with "pattern" field.
49  *
50  * The 0 is returned when the name is a reserved dos name, no match
51  * for the pattern or any type of failure. The 1 is returned when
52  * there is a match.
53  */
54 int
55 smb_match_name(ino64_t fileid, char *name, char *shortname,
56     char *name83, char *pattern, boolean_t ignore_case)
57 {
58 	int rc = 0;
59 	int force;
60 
61 	/* Leading or trailing dots are disallowed */
62 	if (smb_is_reserved_dos_name(name))
63 		return (0);
64 
65 	for (force = 0; (force < 2 && rc == 0); force++) {
66 		(void) smb_mangle_name(fileid, name, shortname, name83, force);
67 
68 		rc = smb_match_ci(pattern, name);
69 
70 		/* If no match, check for shortname (if any) */
71 
72 		if (rc == 0 && strchr(pattern, '~'))
73 			if (*shortname != 0)
74 				rc = smb_match_ci(pattern, shortname);
75 
76 		/*
77 		 * Sigh... DOS Shells use short name
78 		 * interchangeably with long case sensitive
79 		 * names. So check that too...
80 		 */
81 		if ((rc == 0) && !ignore_case)
82 			rc = smb_match83(pattern, name83);
83 
84 		/*
85 		 * Still not found and potentially a premangled name...
86 		 * Check to see if the butt-head programmer is
87 		 * assuming that we mangle names in the same manner
88 		 * as NT...
89 		 */
90 		if (rc == 0)
91 			rc = smb_match_unknown(name, pattern);
92 	}
93 
94 	return (rc);
95 }
96 
97 /*
98  * smb_match_unknown
99  *
100  * I couldn't figure out what the assumptions of this peice of
101  * code about the format of pattern and name are and so how
102  * it's trying to match them.  I just cleaned it up a little bit!
103  *
104  * If anybody could figure out what this is doing, please put
105  * comment here and change the function's name!
106  */
107 static int
108 smb_match_unknown(char *name, char *pattern)
109 {
110 	int rc;
111 	char nc, pc;
112 	char *np, *pp;
113 
114 	rc = 0;
115 	if (utf8_isstrupr(pattern) <= 0)
116 		return (rc);
117 
118 	np = name;
119 	pp = pattern;
120 
121 	pc = *pattern;
122 	while ((nc = *np++) != 0) {
123 		if (nc == ' ')
124 			continue;
125 
126 		nc = mts_toupper(nc);
127 		if ((pc = *pp++) != nc)
128 			break;
129 	}
130 
131 	if ((pc == '~') &&
132 	    (pp != (pattern + 1)) &&
133 	    ((pc = *pp++) != 0)) {
134 		while (mts_isdigit(pc))
135 			pc = *pp++;
136 
137 		if (pc == '.') {
138 			while ((nc = *np++) != 0) {
139 				if (nc == '.')
140 					break;
141 			}
142 
143 			while ((nc = *np++) != 0) {
144 				nc = mts_toupper(nc);
145 				if ((pc = *pp++) != nc)
146 					break;
147 			}
148 		}
149 
150 		if (pc == 0)
151 			rc = 1;
152 	}
153 
154 	return (rc);
155 }
156 
157 /*
158  * smb_match_reserved
159  *
160  * Checks if the given name matches given
161  * DOS reserved name prefix.
162  *
163  * Returns 1 if match, 0 otherwise
164  */
165 static int
166 smb_match_reserved(char *name, char *rsrv)
167 {
168 	char ch;
169 
170 	int len = strlen(rsrv);
171 	return (!utf8_strncasecmp(rsrv, name, len) &&
172 	    ((ch = *(name + len)) == 0 || ch == '.'));
173 }
174 
175 /*
176  * smb_is_reserved_dos_name
177  *
178  * This function checks if the name is a reserved dos name.
179  *
180  * The function returns 1 when the name is a reserved dos name;
181  * otherwise, it returns 0.
182  */
183 static int
184 smb_is_reserved_dos_name(char *name)
185 {
186 	char	ch;
187 
188 	/*
189 	 * Eliminate all names reserved by DOS and Windows.
190 	 */
191 	ch = mts_toupper(*name);
192 
193 	switch (ch) {
194 	case 'A':
195 		if (smb_match_reserved(name, "AUX"))
196 			return (1);
197 		break;
198 
199 	case 'C':
200 		if (smb_match_reserved(name, "CLOCK$") ||
201 		    smb_match_reserved(name, "COM1") ||
202 		    smb_match_reserved(name, "COM2") ||
203 		    smb_match_reserved(name, "COM3") ||
204 		    smb_match_reserved(name, "COM4") ||
205 		    smb_match_reserved(name, "CON")) {
206 			return (1);
207 		}
208 
209 		break;
210 
211 	case 'L':
212 		if ((utf8_strncasecmp("LPT1", name, 4) == 0) ||
213 		    (utf8_strncasecmp("LPT2", name, 4) == 0) ||
214 		    (utf8_strncasecmp("LPT3", name, 4) == 0))
215 			return (1);
216 		break;
217 
218 	case 'N':
219 		if (smb_match_reserved(name, "NUL"))
220 			return (1);
221 		break;
222 
223 	case 'P':
224 		if (smb_match_reserved(name, "PRN"))
225 			return (1);
226 	}
227 
228 	/*
229 	 * If the server is configured to support Catia Version 5
230 	 * deployments, any filename that contains backslash will
231 	 * have already been translated to the UTF-8 encoding of
232 	 * Latin Small Letter Y with Diaeresis. Thus, the check
233 	 * for backslash in the filename is not necessary.
234 	 */
235 #ifdef CATIA_SUPPORT
236 	/* XXX Catia support */
237 	if ((get_caps() & NFCAPS_CATIA) == 0) {
238 		while (*name != 0) {
239 			if (*name == '\\')
240 				return (1);
241 			name++;
242 		}
243 	}
244 #endif /* CATIA_SUPPORT */
245 
246 	return (0);
247 }
248 
249 /*
250  * Characters we don't allow in DOS file names.
251  * If a filename contains any of these chars, it should
252  * get mangled.
253  *
254  * '.' is also an invalid DOS char but since it's a special
255  * case it doesn't appear in the list.
256  */
257 static char *invalid_dos_chars =
258 	"\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
259 	"\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
260 	" \"/\\:|<>*?";
261 
262 /*
263  * According to MSKB article #142982, Windows deletes invalid chars and
264  * spaces from file name in mangling process; and invalid chars include:
265  * ."/\[]:;=,
266  *
267  * But some of these chars and some other chars (e.g. +) are replaced
268  * with underscore (_). They are introduced here as special chars.
269  */
270 static char *special_chars = "[];=,+";
271 
272 #define	isinvalid(c)	(strchr(invalid_dos_chars, c) || (c & 0x80))
273 
274 /*
275  * smb_needs_mangle
276  *
277  * Determines whether the given name needs to get mangled.
278  *
279  * Here are the (known) rules:
280  *
281  *	1st char is dot (.)
282  *	name length > 12 chars
283  *	# dots > 1
284  *	# dots == 0 and length > 8
285  *	# dots == 1 and name isn't 8.3
286  *	contains illegal chars
287  */
288 int
289 smb_needs_mangle(char *name, char **dot_pos)
290 {
291 	int len, ndots;
292 	char *namep;
293 	char *last_dot;
294 
295 	/*
296 	 * Returning (1) for these cases forces consistency with how
297 	 * these names are treated (smb_mangle_name() will produce an 8.3 name
298 	 * for these)
299 	 */
300 	if ((strcmp(name, ".") == 0) || (strcmp(name, "..") == 0))
301 		return (1);
302 
303 	/* skip the leading dots (if any) */
304 	for (namep = name; *namep == '.'; namep++)
305 		;
306 
307 	len = ndots = 0;
308 	last_dot = 0;
309 	for (; *namep; namep++) {
310 		len++;
311 		if (*namep == '.') {
312 			/* keep the position of last dot */
313 			last_dot = namep;
314 			ndots++;
315 		}
316 	}
317 	*dot_pos = last_dot;
318 
319 	/* Windows mangles names like .a, .abc, or .abcd */
320 	if (*name == '.')
321 		return (1);
322 
323 	if (len > 12)
324 		return (1);
325 
326 	switch (ndots) {
327 	case 0:
328 		/* no dot */
329 		if (len > 8)
330 			return (1);
331 		break;
332 
333 	case 1:
334 		/* just one dot */
335 		/*LINTED E_PTR_DIFF_OVERFLOW*/
336 		if (((last_dot - name) > 8) ||		/* name length > 8 */
337 		    (strlen(last_dot + 1) > 3))		/* extention > 3 */
338 			return (1);
339 		break;
340 
341 	default:
342 		/* more than one dot */
343 		return (1);
344 	}
345 
346 	for (namep = name; *namep; namep++) {
347 		if (!mts_isascii(*namep) ||
348 		    strchr(special_chars, *namep) ||
349 		    strchr(invalid_dos_chars, *namep))
350 			return (1);
351 	}
352 
353 	return (0);
354 }
355 
356 /*
357  * smb_needs_shortname
358  *
359  * Determine whether a shortname should be generated for a file name that is
360  * already in 8.3 format.
361  *
362  * Paramters:
363  *   name - original file name
364  *
365  * Return:
366  *   1 - Shortname is required to be generated.
367  *   0 - No shortname needs to be generated.
368  *
369  * Note
370  * =======
371  * Windows NT server:       shortname is created only if either
372  *                          the filename or extension portion of
373  *                          a file is made up of mixed case.
374  * Windows 2000 server:     shortname is not created regardless
375  *                          of the case.
376  * Windows 2003 server:     [Same as Windows NT server.]
377  *
378  * StorEdge will conform to the rule used by Windows NT/2003 server.
379  *
380  * For instance:
381  *    File      | Create shortname?
382  * ================================
383  *  nf.txt      | N
384  *  NF.TXT      | N
385  *  NF.txt      | N
386  *  nf          | N
387  *  NF          | N
388  *  nF.txt      | Y
389  *  nf.TxT      | Y
390  *  Nf          | Y
391  *  nF          | Y
392  *
393  */
394 static int
395 smb_needs_shortname(char *name)
396 {
397 	char buf[9];
398 	int len;
399 	int create = 0;
400 	const char *dot_pos = 0;
401 
402 	dot_pos = strrchr(name, '.');
403 	/*LINTED E_PTRDIFF_OVERFLOW*/
404 	len = (!dot_pos) ? strlen(name) : (dot_pos - name);
405 	/* First, examine the name portion of the file */
406 	if (len) {
407 		(void) snprintf(buf, len + 1, "%s", name);
408 		/* if the name contains both lower and upper cases */
409 		if (utf8_isstrupr(buf) == 0 && utf8_isstrlwr(buf) == 0) {
410 			/* create shortname */
411 			create = 1;
412 		} else 	if (dot_pos) {
413 			/* Next, examine the extension portion of the file */
414 			(void) snprintf(buf, sizeof (buf), "%s", dot_pos + 1);
415 			/*
416 			 * if the extension contains both lower and upper
417 			 * cases
418 			 */
419 			if (utf8_isstrupr(buf) == 0 && utf8_isstrlwr(buf) == 0)
420 				/* create shortname */
421 				create = 1;
422 		}
423 	}
424 
425 	return (create);
426 }
427 
428 /*
429  * smb_mangle_char
430  *
431  * If given char is an invalid DOS character or it's not an
432  * ascii char, it should be deleted from mangled and 8.3 name.
433  *
434  * If given char is one of special chars, it should be replaced
435  * with '_'.
436  *
437  * Otherwise just make it upper case.
438  */
439 static unsigned char
440 smb_mangle_char(unsigned char ch)
441 {
442 	if (isinvalid(ch))
443 		return (0);
444 
445 	if (strchr(special_chars, ch))
446 		return ('_');
447 
448 	return (mts_toupper(ch));
449 }
450 
451 /*
452  * smb_generate_mangle
453  *
454  * Generates a mangle string which contains
455  * at least 2 (considering fileid cannot be 0)
456  * and at most 7 chars.
457  *
458  * Returns the number of chars in the generated mangle.
459  */
460 static int
461 smb_generate_mangle(ino64_t fileid, unsigned char *mangle_buf)
462 {
463 	/*
464 	 * 36**6 = 2176782336: more than enough to express inodes in 6
465 	 * chars
466 	 */
467 	static char *base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
468 	unsigned char *manglep = mangle_buf;
469 
470 	for (*manglep++ = '~'; fileid > 0; fileid /= 36)
471 		*manglep++ = base36[fileid % 36];
472 	*manglep = 0;
473 
474 	/*LINTED E_PTRDIFF_OVERFLOW*/
475 	return (manglep - mangle_buf);
476 }
477 
478 /*
479  * smb_maybe_mangled_name
480  *
481  * returns true if the passed name can possibly be a mangled name.
482  * mangled names should be valid dos file names hence less than 12 characters
483  * long and should contain at least one tilde character.
484  *
485  * note that this function can be further enhanced to check for invalid
486  * dos characters/character patterns (such as "file..1.c") but this version
487  * should be sufficient in most cases.
488  */
489 int
490 smb_maybe_mangled_name(char *name)
491 {
492 	int i, has_tilde = 0;
493 
494 	for (i = 0; *name && (i < 12); i++, name++) {
495 		if ((*name == '~') && (i < 8))
496 			has_tilde = 1;
497 
498 		if (*name == '.' && has_tilde == 0)
499 			return (0);
500 	}
501 
502 	return ((*name == 0) && has_tilde);
503 }
504 
505 /*
506  * smb_mangle_name
507  *
508  * Microsoft knowledge base article #142982 describes how Windows
509  * generates 8.3 filenames from long file names. Some other details
510  * can be found in article #114816.
511  *
512  * The function first checks to see whether the given name needs mangling.
513  * If not, and the force parameter is not set, then no mangling is done,
514  * but both the shortname (if needed) and the 8.3 name are produced and
515  * returned.
516  *
517  * If the "force" parameter is set (as will be the case for case-insensitive
518  * collisions), then the name will be mangled.
519  *
520  * Whenever mangling is needed, both the shortname and the 8.3 names are
521  * produced and returned.
522  *
523  * For example, the xxx.xy in 8.3 format will be "xxx     .xy ".
524  */
525 
526 int smb_mangle_name(
527 	ino64_t fileid,		/* inode number to generate unique mangle */
528 	char *name,		/* original file name */
529 	char *shortname,	/* mangled name (if applicable) */
530 	char *name83,		/* (mangled) name in 8.3 format */
531 	int force)		/* force mangling even if mangling is not */
532 				/* needed according to standard algorithm */
533 {
534 	int avail;
535 	unsigned char ch;
536 	unsigned char mangle_buf[8];
537 	unsigned char *namep;
538 	unsigned char *manglep;
539 	unsigned char *out_short;
540 	unsigned char *out_83;
541 	char *dot_pos = NULL;
542 
543 	/*
544 	 * NOTE:
545 	 * This function used to consider filename case
546 	 * in order to mangle. I removed those checks.
547 	 */
548 
549 	*shortname = *name83 = 0;
550 
551 	/* Allow dot and dot dot up front */
552 	if (strcmp(name, ".") == 0) {
553 		/* no shortname */
554 		(void) strcpy(name83, ".       .   ");
555 		return (1);
556 	}
557 
558 	if (strcmp(name, "..") == 0) {
559 		/* no shortname */
560 		(void) strcpy(name83, "..      .   ");
561 		return (1);
562 	}
563 
564 	out_short = (unsigned char *)shortname;
565 	out_83 = (unsigned char *)name83;
566 
567 	if ((smb_needs_mangle(name, &dot_pos) == 0) && (force == 0)) {
568 		/* no mangle */
569 
570 		/* check if shortname is required or not */
571 		if (smb_needs_shortname(name)) {
572 			namep = (unsigned char *)name;
573 			while (*namep)
574 				*out_short++ = mts_toupper(*namep++);
575 			*out_short = '\0';
576 		}
577 
578 		out_83 = (unsigned char *)name83;
579 		(void) strcpy((char *)out_83, "        .   ");
580 		while (*name && *name != '.')
581 			*out_83++ = mts_toupper(*name++);
582 
583 		if (*name == '.') {
584 			/* copy extension */
585 			name++;
586 			out_83 = (unsigned char *)name83 + 9;
587 			while (*name)
588 				*out_83++ = mts_toupper(*name++);
589 		}
590 		return (1);
591 	}
592 
593 	avail = 8 - smb_generate_mangle(fileid, mangle_buf);
594 
595 	/*
596 	 * generated mangle part has always less than 8 chars, so
597 	 * use the chars before the first dot in filename
598 	 * and try to generate a full 8 char name.
599 	 */
600 
601 	/* skip the leading dots (if any) */
602 	for (namep = (unsigned char *)name; *namep == '.'; namep++)
603 		;
604 
605 	for (; avail && *namep && (*namep != '.'); namep++) {
606 		ch = smb_mangle_char(*namep);
607 		if (ch == 0)
608 			continue;
609 		*out_short++ = *out_83++ = ch;
610 		avail--;
611 	}
612 
613 	/* Copy in mangled part */
614 	manglep = mangle_buf;
615 
616 	while (*manglep)
617 		*out_short++ = *out_83++ = *(manglep++);
618 
619 	/* Pad any leftover in 8.3 name with spaces */
620 	while (avail--)
621 		*out_83++ = ' ';
622 
623 	/* Work on extension now */
624 	avail = 3;
625 	*out_83++ = '.';
626 	if (dot_pos) {
627 		namep = (unsigned char *)dot_pos + 1;
628 		if (*namep != 0) {
629 			*out_short++ = '.';
630 			for (; avail && *namep; namep++) {
631 				ch = smb_mangle_char(*namep);
632 				if (ch == 0)
633 					continue;
634 
635 				*out_short++ = *out_83++ = ch;
636 				avail--;
637 			}
638 		}
639 	}
640 
641 	while (avail--)
642 		*out_83++ = ' ';
643 
644 	*out_short = *out_83 = '\0';
645 
646 	return (1);
647 }
648 
649 /*
650  * smb_unmangle_name
651  *
652  * Given a mangled name, try to find the real file name as it appears
653  * in the directory entry. If the name does not contain a ~, it is most
654  * likely not a mangled name but the caller can still try to get the
655  * actual on-disk name by setting the "od" parameter.
656  *
657  * Returns 0 if a name has been returned in real_name. There are three
658  * possible scenarios:
659  *  1. Name did not contain a ~ and "od" was not set, in which
660  *     case, real_name contains name.
661  *  2. Name did not contain a ~ and "od" was set, in which
662  *     case, real_name contains the actual directory entry name.
663  *  3. Name did contain a ~, in which case, name was mangled and
664  *     real_name contains the actual directory entry name.
665  *
666  * EINVAL: a parameter was invalid.
667  * ENOENT: an unmangled name could not be found.
668  */
669 
670 int
671 smb_unmangle_name(struct smb_request *sr, cred_t *cred, smb_node_t *dir_node,
672 	char *name, char *real_name, int realname_size, char *shortname,
673 	char *name83, int od)
674 {
675 	int err;
676 	int len;
677 	int force = 0;
678 	ino64_t inode;
679 	uint32_t cookie;
680 	struct smb_node *snode = NULL;
681 	smb_attr_t ret_attr;
682 	char *dot_pos = NULL;
683 	char *readdir_name;
684 	char *shortp;
685 	char namebuf[SMB_SHORTNAMELEN];
686 
687 	if (dir_node == NULL || name == NULL || real_name == NULL ||
688 	    realname_size == 0)
689 		return (EINVAL);
690 
691 	*real_name = '\0';
692 	snode = NULL;
693 
694 	if (smb_maybe_mangled_name(name) == 0) {
695 		if (od == 0) {
696 			(void) strlcpy(real_name, name, realname_size);
697 			return (0);
698 		}
699 
700 		err = smb_fsop_lookup(sr, cred, 0, sr->tid_tree->t_snode,
701 		    dir_node, name, &snode, &ret_attr, NULL, NULL);
702 
703 		if (err != 0)
704 			return (err);
705 
706 		(void) strlcpy(real_name, snode->od_name, realname_size);
707 		smb_node_release(snode);
708 		return (0);
709 	}
710 
711 	if (shortname == 0)
712 		shortname = namebuf;
713 	if (name83 == 0)
714 		name83 = namebuf;
715 
716 	cookie = 0;
717 
718 	readdir_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
719 
720 	snode = NULL;
721 	while (cookie != 0x7FFFFFFF) {
722 
723 		len = realname_size - 1;
724 
725 		err = smb_fsop_readdir(sr, cred, dir_node, &cookie,
726 		    readdir_name, &len, &inode, NULL, &snode, &ret_attr);
727 
728 		if (err || (cookie == 0x7FFFFFFF))
729 			break;
730 
731 		readdir_name[len] = 0;
732 
733 		/*
734 		 * smb_fsop_readdir() may return a mangled name if the
735 		 * name has a case collision.
736 		 *
737 		 * If readdir_name is not a mangled name, we mangle
738 		 * readdir_name to see if it will match the name the
739 		 * client passed in.
740 		 *
741 		 * If smb_needs_mangle() does not succeed, we try again
742 		 * using the force flag.  It is possible that the client
743 		 * is using a mangled name that resulted from a prior
744 		 * case collision which no longer exists in the directory.
745 		 * smb_needs_mangle(), with the force flag, will produce
746 		 * a mangled name regardless of whether the name passed in
747 		 * meets standard DOS criteria for name mangling.
748 		 */
749 
750 		if (smb_maybe_mangled_name(readdir_name)) {
751 			shortp = readdir_name;
752 		} else {
753 			if (smb_needs_mangle(readdir_name, &dot_pos) == 0)
754 				force = 1;
755 			(void) smb_mangle_name(inode, readdir_name, shortname,
756 			    name83, force);
757 			shortp = shortname;
758 		}
759 
760 		if (utf8_strcasecmp(name, shortp) == 0) {
761 			kmem_free(readdir_name, MAXNAMELEN);
762 			(void) strlcpy(real_name, snode->od_name,
763 			    realname_size);
764 
765 			smb_node_release(snode);
766 
767 			return (0);
768 		} else {
769 			smb_node_release(snode);
770 			snode = NULL;
771 		}
772 	}
773 
774 	kmem_free(readdir_name, MAXNAMELEN);
775 
776 	return (ENOENT);
777 }
778