xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_mangle_name.c (revision 3d393ee6c37fa10ac512ed6d36109ad616dc7c1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"@(#)smb_mangle_name.c	1.3	08/08/07 SMI"
27 
28 #include <sys/types.h>
29 #include <sys/sunddi.h>
30 #include <sys/errno.h>
31 #include <smbsrv/string.h>
32 #include <smbsrv/ctype.h>
33 #include <smbsrv/smb_i18n.h>
34 #include <smbsrv/smb_vops.h>
35 #include <smbsrv/smb_incl.h>
36 #include <smbsrv/smb_fsops.h>
37 
38 static int smb_match_unknown(char *name, char *pattern);
39 static int smb_is_reserved_dos_name(char *name);
40 static int smb_match_reserved(char *name, char *rsrv);
41 
42 /*
43  * smb_match_name
44  *
45  * This function will mangle the "name" field and save the resulted
46  * shortname to the "shortname" field and 8.3 name to "name83" field.
47  * The three fields, "name", "shortname" and "name83" will then be
48  * sent for pattern match with "pattern" field.
49  *
50  * The 0 is returned when the name is a reserved dos name, no match
51  * for the pattern or any type of failure. The 1 is returned when
52  * there is a match.
53  */
54 int
55 smb_match_name(ino64_t fileid, char *name, char *pattern, boolean_t ignore_case)
56 {
57 	int rc = 0;
58 	int force;
59 	char name83[SMB_SHORTNAMELEN];
60 	char shortname[SMB_SHORTNAMELEN];
61 
62 	/* Leading or trailing dots are disallowed */
63 	if (smb_is_reserved_dos_name(name))
64 		return (0);
65 
66 	for (force = 0; (force < 2 && rc == 0); force++) {
67 		(void) smb_mangle_name(fileid, name, shortname, name83, force);
68 
69 		rc = smb_match_ci(pattern, name);
70 
71 		/* If no match, check for shortname (if any) */
72 
73 		if (rc == 0 && strchr(pattern, '~'))
74 			if (*shortname != 0)
75 				rc = smb_match_ci(pattern, shortname);
76 
77 		/*
78 		 * Sigh... DOS Shells use short name
79 		 * interchangeably with long case sensitive
80 		 * names. So check that too...
81 		 */
82 		if ((rc == 0) && !ignore_case)
83 			rc = smb_match83(pattern, name83);
84 
85 		/*
86 		 * Still not found and potentially a premangled name...
87 		 * Check to see if the butt-head programmer is
88 		 * assuming that we mangle names in the same manner
89 		 * as NT...
90 		 */
91 		if (rc == 0)
92 			rc = smb_match_unknown(name, pattern);
93 	}
94 
95 	return (rc);
96 }
97 
98 /*
99  * smb_match_unknown
100  *
101  * I couldn't figure out what the assumptions of this peice of
102  * code about the format of pattern and name are and so how
103  * it's trying to match them.  I just cleaned it up a little bit!
104  *
105  * If anybody could figure out what this is doing, please put
106  * comment here and change the function's name!
107  */
108 static int
109 smb_match_unknown(char *name, char *pattern)
110 {
111 	int rc;
112 	char nc, pc;
113 	char *np, *pp;
114 
115 	rc = 0;
116 	if (utf8_isstrupr(pattern) <= 0)
117 		return (rc);
118 
119 	np = name;
120 	pp = pattern;
121 
122 	pc = *pattern;
123 	while ((nc = *np++) != 0) {
124 		if (nc == ' ')
125 			continue;
126 
127 		nc = mts_toupper(nc);
128 		if ((pc = *pp++) != nc)
129 			break;
130 	}
131 
132 	if ((pc == '~') &&
133 	    (pp != (pattern + 1)) &&
134 	    ((pc = *pp++) != 0)) {
135 		while (mts_isdigit(pc))
136 			pc = *pp++;
137 
138 		if (pc == '.') {
139 			while ((nc = *np++) != 0) {
140 				if (nc == '.')
141 					break;
142 			}
143 
144 			while ((nc = *np++) != 0) {
145 				nc = mts_toupper(nc);
146 				if ((pc = *pp++) != nc)
147 					break;
148 			}
149 		}
150 
151 		if (pc == 0)
152 			rc = 1;
153 	}
154 
155 	return (rc);
156 }
157 
158 /*
159  * smb_match_reserved
160  *
161  * Checks if the given name matches given
162  * DOS reserved name prefix.
163  *
164  * Returns 1 if match, 0 otherwise
165  */
166 static int
167 smb_match_reserved(char *name, char *rsrv)
168 {
169 	char ch;
170 
171 	int len = strlen(rsrv);
172 	return (!utf8_strncasecmp(rsrv, name, len) &&
173 	    ((ch = *(name + len)) == 0 || ch == '.'));
174 }
175 
176 /*
177  * smb_is_reserved_dos_name
178  *
179  * This function checks if the name is a reserved dos name.
180  *
181  * The function returns 1 when the name is a reserved dos name;
182  * otherwise, it returns 0.
183  */
184 static int
185 smb_is_reserved_dos_name(char *name)
186 {
187 	char	ch;
188 
189 	/*
190 	 * Eliminate all names reserved by DOS and Windows.
191 	 */
192 	ch = mts_toupper(*name);
193 
194 	switch (ch) {
195 	case 'A':
196 		if (smb_match_reserved(name, "AUX"))
197 			return (1);
198 		break;
199 
200 	case 'C':
201 		if (smb_match_reserved(name, "CLOCK$") ||
202 		    smb_match_reserved(name, "COM1") ||
203 		    smb_match_reserved(name, "COM2") ||
204 		    smb_match_reserved(name, "COM3") ||
205 		    smb_match_reserved(name, "COM4") ||
206 		    smb_match_reserved(name, "CON")) {
207 			return (1);
208 		}
209 
210 		break;
211 
212 	case 'L':
213 		if ((utf8_strncasecmp("LPT1", name, 4) == 0) ||
214 		    (utf8_strncasecmp("LPT2", name, 4) == 0) ||
215 		    (utf8_strncasecmp("LPT3", name, 4) == 0))
216 			return (1);
217 		break;
218 
219 	case 'N':
220 		if (smb_match_reserved(name, "NUL"))
221 			return (1);
222 		break;
223 
224 	case 'P':
225 		if (smb_match_reserved(name, "PRN"))
226 			return (1);
227 	}
228 
229 	/*
230 	 * If the server is configured to support Catia Version 5
231 	 * deployments, any filename that contains backslash will
232 	 * have already been translated to the UTF-8 encoding of
233 	 * Latin Small Letter Y with Diaeresis. Thus, the check
234 	 * for backslash in the filename is not necessary.
235 	 */
236 #ifdef CATIA_SUPPORT
237 	/* XXX Catia support */
238 	if ((get_caps() & NFCAPS_CATIA) == 0) {
239 		while (*name != 0) {
240 			if (*name == '\\')
241 				return (1);
242 			name++;
243 		}
244 	}
245 #endif /* CATIA_SUPPORT */
246 
247 	return (0);
248 }
249 
250 /*
251  * Characters we don't allow in DOS file names.
252  * If a filename contains any of these chars, it should
253  * get mangled.
254  *
255  * '.' is also an invalid DOS char but since it's a special
256  * case it doesn't appear in the list.
257  */
258 static char *invalid_dos_chars =
259 	"\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
260 	"\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
261 	" \"/\\:|<>*?";
262 
263 /*
264  * According to MSKB article #142982, Windows deletes invalid chars and
265  * spaces from file name in mangling process; and invalid chars include:
266  * ."/\[]:;=,
267  *
268  * But some of these chars and some other chars (e.g. +) are replaced
269  * with underscore (_). They are introduced here as special chars.
270  */
271 static char *special_chars = "[];=,+";
272 
273 #define	isinvalid(c)	(strchr(invalid_dos_chars, c) || (c & 0x80))
274 
275 /*
276  * smb_needs_mangle
277  *
278  * Determines whether the given name needs to get mangled.
279  *
280  * Here are the (known) rules:
281  *
282  *	1st char is dot (.)
283  *	name length > 12 chars
284  *	# dots > 1
285  *	# dots == 0 and length > 8
286  *	# dots == 1 and name isn't 8.3
287  *	contains illegal chars
288  */
289 int
290 smb_needs_mangle(char *name, char **dot_pos)
291 {
292 	int len, ndots;
293 	char *namep;
294 	char *last_dot;
295 
296 	/*
297 	 * Returning (1) for these cases forces consistency with how
298 	 * these names are treated (smb_mangle_name() will produce an 8.3 name
299 	 * for these)
300 	 */
301 	if ((strcmp(name, ".") == 0) || (strcmp(name, "..") == 0))
302 		return (1);
303 
304 	/* skip the leading dots (if any) */
305 	for (namep = name; *namep == '.'; namep++)
306 		;
307 
308 	len = ndots = 0;
309 	last_dot = 0;
310 	for (; *namep; namep++) {
311 		len++;
312 		if (*namep == '.') {
313 			/* keep the position of last dot */
314 			last_dot = namep;
315 			ndots++;
316 		}
317 	}
318 	*dot_pos = last_dot;
319 
320 	/* Windows mangles names like .a, .abc, or .abcd */
321 	if (*name == '.')
322 		return (1);
323 
324 	if (len > 12)
325 		return (1);
326 
327 	switch (ndots) {
328 	case 0:
329 		/* no dot */
330 		if (len > 8)
331 			return (1);
332 		break;
333 
334 	case 1:
335 		/* just one dot */
336 		/*LINTED E_PTR_DIFF_OVERFLOW*/
337 		if (((last_dot - name) > 8) ||		/* name length > 8 */
338 		    (strlen(last_dot + 1) > 3))		/* extention > 3 */
339 			return (1);
340 		break;
341 
342 	default:
343 		/* more than one dot */
344 		return (1);
345 	}
346 
347 	for (namep = name; *namep; namep++) {
348 		if (!mts_isascii(*namep) ||
349 		    strchr(special_chars, *namep) ||
350 		    strchr(invalid_dos_chars, *namep))
351 			return (1);
352 	}
353 
354 	return (0);
355 }
356 
357 /*
358  * smb_needs_shortname
359  *
360  * Determine whether a shortname should be generated for a file name that is
361  * already in 8.3 format.
362  *
363  * Paramters:
364  *   name - original file name
365  *
366  * Return:
367  *   1 - Shortname is required to be generated.
368  *   0 - No shortname needs to be generated.
369  *
370  * Note
371  * =======
372  * Windows NT server:       shortname is created only if either
373  *                          the filename or extension portion of
374  *                          a file is made up of mixed case.
375  * Windows 2000 server:     shortname is not created regardless
376  *                          of the case.
377  * Windows 2003 server:     [Same as Windows NT server.]
378  *
379  * StorEdge will conform to the rule used by Windows NT/2003 server.
380  *
381  * For instance:
382  *    File      | Create shortname?
383  * ================================
384  *  nf.txt      | N
385  *  NF.TXT      | N
386  *  NF.txt      | N
387  *  nf          | N
388  *  NF          | N
389  *  nF.txt      | Y
390  *  nf.TxT      | Y
391  *  Nf          | Y
392  *  nF          | Y
393  *
394  */
395 static int
396 smb_needs_shortname(char *name)
397 {
398 	char buf[9];
399 	int len;
400 	int create = 0;
401 	const char *dot_pos = 0;
402 
403 	dot_pos = strrchr(name, '.');
404 	/*LINTED E_PTRDIFF_OVERFLOW*/
405 	len = (!dot_pos) ? strlen(name) : (dot_pos - name);
406 	/* First, examine the name portion of the file */
407 	if (len) {
408 		(void) snprintf(buf, len + 1, "%s", name);
409 		/* if the name contains both lower and upper cases */
410 		if (utf8_isstrupr(buf) == 0 && utf8_isstrlwr(buf) == 0) {
411 			/* create shortname */
412 			create = 1;
413 		} else 	if (dot_pos) {
414 			/* Next, examine the extension portion of the file */
415 			(void) snprintf(buf, sizeof (buf), "%s", dot_pos + 1);
416 			/*
417 			 * if the extension contains both lower and upper
418 			 * cases
419 			 */
420 			if (utf8_isstrupr(buf) == 0 && utf8_isstrlwr(buf) == 0)
421 				/* create shortname */
422 				create = 1;
423 		}
424 	}
425 
426 	return (create);
427 }
428 
429 /*
430  * smb_mangle_char
431  *
432  * If given char is an invalid DOS character or it's not an
433  * ascii char, it should be deleted from mangled and 8.3 name.
434  *
435  * If given char is one of special chars, it should be replaced
436  * with '_'.
437  *
438  * Otherwise just make it upper case.
439  */
440 static unsigned char
441 smb_mangle_char(unsigned char ch)
442 {
443 	if (isinvalid(ch))
444 		return (0);
445 
446 	if (strchr(special_chars, ch))
447 		return ('_');
448 
449 	return (mts_toupper(ch));
450 }
451 
452 /*
453  * smb_generate_mangle
454  *
455  * Generates a mangle string which contains
456  * at least 2 (considering fileid cannot be 0)
457  * and at most 7 chars.
458  *
459  * Returns the number of chars in the generated mangle.
460  */
461 static int
462 smb_generate_mangle(ino64_t fileid, unsigned char *mangle_buf)
463 {
464 	/*
465 	 * 36**6 = 2176782336: more than enough to express inodes in 6
466 	 * chars
467 	 */
468 	static char *base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
469 	unsigned char *manglep = mangle_buf;
470 
471 	for (*manglep++ = '~'; fileid > 0; fileid /= 36)
472 		*manglep++ = base36[fileid % 36];
473 	*manglep = 0;
474 
475 	/*LINTED E_PTRDIFF_OVERFLOW*/
476 	return (manglep - mangle_buf);
477 }
478 
479 /*
480  * smb_maybe_mangled_name
481  *
482  * returns true if the passed name can possibly be a mangled name.
483  * mangled names should be valid dos file names hence less than 12 characters
484  * long and should contain at least one tilde character.
485  *
486  * note that this function can be further enhanced to check for invalid
487  * dos characters/character patterns (such as "file..1.c") but this version
488  * should be sufficient in most cases.
489  */
490 int
491 smb_maybe_mangled_name(char *name)
492 {
493 	int i, has_tilde = 0;
494 
495 	for (i = 0; *name && (i < 12); i++, name++) {
496 		if ((*name == '~') && (i < 8))
497 			has_tilde = 1;
498 
499 		if (*name == '.' && has_tilde == 0)
500 			return (0);
501 	}
502 
503 	return ((*name == 0) && has_tilde);
504 }
505 
506 /*
507  * smb_mangle_name
508  *
509  * Microsoft knowledge base article #142982 describes how Windows
510  * generates 8.3 filenames from long file names. Some other details
511  * can be found in article #114816.
512  *
513  * The function first checks to see whether the given name needs mangling.
514  * If not, and the force parameter is not set, then no mangling is done,
515  * but both the shortname (if needed) and the 8.3 name are produced and
516  * returned.
517  *
518  * If the "force" parameter is set (as will be the case for case-insensitive
519  * collisions), then the name will be mangled.
520  *
521  * Whenever mangling is needed, both the shortname and the 8.3 names are
522  * produced and returned.
523  *
524  * For example, the xxx.xy in 8.3 format will be "xxx     .xy ".
525  */
526 
527 int smb_mangle_name(
528 	ino64_t fileid,		/* inode number to generate unique mangle */
529 	char *name,		/* original file name */
530 	char *shortname,	/* mangled name (if applicable) */
531 	char *name83,		/* (mangled) name in 8.3 format */
532 	int force)		/* force mangling even if mangling is not */
533 				/* needed according to standard algorithm */
534 {
535 	int avail;
536 	unsigned char ch;
537 	unsigned char mangle_buf[8];
538 	unsigned char *namep;
539 	unsigned char *manglep;
540 	unsigned char *out_short;
541 	unsigned char *out_83;
542 	char *dot_pos = NULL;
543 
544 	/*
545 	 * NOTE:
546 	 * This function used to consider filename case
547 	 * in order to mangle. I removed those checks.
548 	 */
549 
550 	*shortname = *name83 = 0;
551 
552 	/* Allow dot and dot dot up front */
553 	if (strcmp(name, ".") == 0) {
554 		/* no shortname */
555 		(void) strcpy(name83, ".       .   ");
556 		return (1);
557 	}
558 
559 	if (strcmp(name, "..") == 0) {
560 		/* no shortname */
561 		(void) strcpy(name83, "..      .   ");
562 		return (1);
563 	}
564 
565 	out_short = (unsigned char *)shortname;
566 	out_83 = (unsigned char *)name83;
567 
568 	if ((smb_needs_mangle(name, &dot_pos) == 0) && (force == 0)) {
569 		/* no mangle */
570 
571 		/* check if shortname is required or not */
572 		if (smb_needs_shortname(name)) {
573 			namep = (unsigned char *)name;
574 			while (*namep)
575 				*out_short++ = mts_toupper(*namep++);
576 			*out_short = '\0';
577 		}
578 
579 		out_83 = (unsigned char *)name83;
580 		(void) strcpy((char *)out_83, "        .   ");
581 		while (*name && *name != '.')
582 			*out_83++ = mts_toupper(*name++);
583 
584 		if (*name == '.') {
585 			/* copy extension */
586 			name++;
587 			out_83 = (unsigned char *)name83 + 9;
588 			while (*name)
589 				*out_83++ = mts_toupper(*name++);
590 		}
591 		return (1);
592 	}
593 
594 	avail = 8 - smb_generate_mangle(fileid, mangle_buf);
595 
596 	/*
597 	 * generated mangle part has always less than 8 chars, so
598 	 * use the chars before the first dot in filename
599 	 * and try to generate a full 8 char name.
600 	 */
601 
602 	/* skip the leading dots (if any) */
603 	for (namep = (unsigned char *)name; *namep == '.'; namep++)
604 		;
605 
606 	for (; avail && *namep && (*namep != '.'); namep++) {
607 		ch = smb_mangle_char(*namep);
608 		if (ch == 0)
609 			continue;
610 		*out_short++ = *out_83++ = ch;
611 		avail--;
612 	}
613 
614 	/* Copy in mangled part */
615 	manglep = mangle_buf;
616 
617 	while (*manglep)
618 		*out_short++ = *out_83++ = *(manglep++);
619 
620 	/* Pad any leftover in 8.3 name with spaces */
621 	while (avail--)
622 		*out_83++ = ' ';
623 
624 	/* Work on extension now */
625 	avail = 3;
626 	*out_83++ = '.';
627 	if (dot_pos) {
628 		namep = (unsigned char *)dot_pos + 1;
629 		if (*namep != 0) {
630 			*out_short++ = '.';
631 			for (; avail && *namep; namep++) {
632 				ch = smb_mangle_char(*namep);
633 				if (ch == 0)
634 					continue;
635 
636 				*out_short++ = *out_83++ = ch;
637 				avail--;
638 			}
639 		}
640 	}
641 
642 	while (avail--)
643 		*out_83++ = ' ';
644 
645 	*out_short = *out_83 = '\0';
646 
647 	return (1);
648 }
649 
650 /*
651  * smb_unmangle_name
652  *
653  * Given a mangled name, try to find the real file name as it appears
654  * in the directory entry. If the name does not contain a ~, it is most
655  * likely not a mangled name but the caller can still try to get the
656  * actual on-disk name by setting the "od" parameter.
657  *
658  * Returns 0 if a name has been returned in real_name. There are three
659  * possible scenarios:
660  *  1. Name did not contain a ~ and "od" was not set, in which
661  *     case, real_name contains name.
662  *  2. Name did not contain a ~ and "od" was set, in which
663  *     case, real_name contains the actual directory entry name.
664  *  3. Name did contain a ~, in which case, name was mangled and
665  *     real_name contains the actual directory entry name.
666  *
667  * EINVAL: a parameter was invalid.
668  * ENOENT: an unmangled name could not be found.
669  */
670 
671 int
672 smb_unmangle_name(struct smb_request *sr, cred_t *cred, smb_node_t *dir_node,
673 	char *name, char *real_name, int realname_size, char *shortname,
674 	char *name83, int ondisk)
675 {
676 	int err;
677 	struct smb_node *snode = NULL;
678 	smb_attr_t ret_attr;
679 	char namebuf[SMB_SHORTNAMELEN];
680 	char  *path;
681 	uint16_t odid;
682 	smb_odir_t *od;
683 	smb_odirent_t *odirent;
684 	boolean_t eos;
685 
686 	if (dir_node == NULL || name == NULL || real_name == NULL ||
687 	    realname_size == 0)
688 		return (EINVAL);
689 
690 	*real_name = '\0';
691 	snode = NULL;
692 
693 	if (smb_maybe_mangled_name(name) == 0) {
694 		if (ondisk == 0) {
695 			(void) strlcpy(real_name, name, realname_size);
696 			return (0);
697 		}
698 
699 		err = smb_fsop_lookup(sr, cred, 0, sr->tid_tree->t_snode,
700 		    dir_node, name, &snode, &ret_attr, NULL, NULL);
701 
702 		if (err != 0)
703 			return (err);
704 
705 		(void) strlcpy(real_name, snode->od_name, realname_size);
706 		smb_node_release(snode);
707 		return (0);
708 	}
709 
710 	if (shortname == 0)
711 		shortname = namebuf;
712 	if (name83 == 0)
713 		name83 = namebuf;
714 
715 	/* determine the pathname and open an smb_odir_t */
716 	path =  kmem_alloc(MAXNAMELEN, KM_SLEEP);
717 	if ((err = vnodetopath(sr->tid_tree->t_snode->vp, dir_node->vp, path,
718 	    MAXNAMELEN, kcred)) != 0)
719 		return (err);
720 
721 	if ((strlcat(path, "/*", MAXNAMELEN) >= MAXNAMELEN) ||
722 	    ((odid = smb_odir_open(sr, path, SMB_SEARCH_ATTRIBUTES)) == 0) ||
723 	    ((od = smb_tree_lookup_odir(sr->tid_tree, odid)) == NULL)) {
724 		err = ENOENT;
725 	}
726 	kmem_free(path, MAXNAMELEN);
727 	if (err != 0)
728 		return (err);
729 
730 	odirent = kmem_alloc(sizeof (smb_odirent_t), KM_SLEEP);
731 	for (;;) {
732 		err = smb_odir_read(sr, od, odirent, &eos);
733 		if ((err != 0) || (eos))
734 			break;
735 
736 		(void) smb_mangle_name(odirent->od_ino, odirent->od_name,
737 		    shortname, name83, 1);
738 
739 		if (utf8_strcasecmp(name, shortname) == 0) {
740 			(void) strlcpy(real_name, odirent->od_name,
741 			    realname_size);
742 			kmem_free(odirent, sizeof (smb_odirent_t));
743 			smb_odir_release(od);
744 			smb_odir_close(od);
745 			return (0);
746 		}
747 	}
748 
749 	kmem_free(odirent, sizeof (smb_odirent_t));
750 	smb_odir_release(od);
751 	smb_odir_close(od);
752 	return (ENOENT);
753 }
754