xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_mangle_name.c (revision e3f2c991a8548408db0a2787bd8b43d5124821d3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/sunddi.h>
29 #include <sys/errno.h>
30 #include <smbsrv/string.h>
31 #include <smbsrv/ctype.h>
32 #include <smbsrv/smb_i18n.h>
33 #include <smbsrv/smb_vops.h>
34 #include <smbsrv/smb_incl.h>
35 #include <smbsrv/smb_fsops.h>
36 
37 #define	SMB_NAME83_BASELEN	8
38 #define	SMB_NAME83_LEN		12
39 
40 /*
41  * Characters we don't allow in DOS file names.
42  * If a filename contains any of these chars, it should get mangled.
43  *
44  * '.' is also an invalid DOS char but since it's a special
45  * case it doesn't appear in the list.
46  */
47 static char *invalid_dos_chars =
48 	"\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
49 	"\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
50 	" \"/\\:|<>*?";
51 
52 /*
53  * According to MSKB article #142982, Windows deletes invalid chars and
54  * spaces from file name in mangling process; and invalid chars include:
55  * ."/\[]:;=,
56  *
57  * But some of these chars and some other chars (e.g. +) are replaced
58  * with underscore (_). They are introduced here as special chars.
59  */
60 static char *special_chars = "[];=,+";
61 
62 #define	isinvalid(c)	(strchr(invalid_dos_chars, c) || (c & 0x80))
63 
64 static int smb_match_unknown(char *name, char *pattern);
65 static boolean_t smb_is_reserved_dos_name(const char *name);
66 
67 /*
68  * smb_match_name
69  *
70  * This function will mangle the "name" field and save the resulted
71  * shortname to the "shortname" field and 8.3 name to "name83" field.
72  * The three fields, "name", "shortname" and "name83" will then be
73  * sent for pattern match with "pattern" field.
74  *
75  * The 0 is returned when the name is a reserved dos name, no match
76  * for the pattern or any type of failure. The 1 is returned when
77  * there is a match.
78  */
79 int
80 smb_match_name(ino64_t fileid, char *name, char *pattern, boolean_t ignore_case)
81 {
82 	int rc = 0;
83 	int force;
84 	char name83[SMB_SHORTNAMELEN];
85 	char shortname[SMB_SHORTNAMELEN];
86 
87 	/* Leading or trailing dots are disallowed */
88 	if (smb_is_reserved_dos_name(name))
89 		return (0);
90 
91 	for (force = 0; (force < 2 && rc == 0); force++) {
92 		(void) smb_mangle_name(fileid, name, shortname, name83, force);
93 
94 		rc = smb_match_ci(pattern, name);
95 
96 		/* If no match, check for shortname (if any) */
97 
98 		if (rc == 0 && strchr(pattern, '~'))
99 			if (*shortname != 0)
100 				rc = smb_match_ci(pattern, shortname);
101 
102 		/*
103 		 * Sigh... DOS Shells use short name
104 		 * interchangeably with long case sensitive
105 		 * names. So check that too...
106 		 */
107 		if ((rc == 0) && !ignore_case)
108 			rc = smb_match83(pattern, name83);
109 
110 		/*
111 		 * Still not found and potentially a premangled name...
112 		 * Check to see if the butt-head programmer is
113 		 * assuming that we mangle names in the same manner
114 		 * as NT...
115 		 */
116 		if (rc == 0)
117 			rc = smb_match_unknown(name, pattern);
118 	}
119 
120 	return (rc);
121 }
122 
123 /*
124  * smb_match_unknown
125  *
126  * I couldn't figure out what the assumptions of this peice of
127  * code about the format of pattern and name are and so how
128  * it's trying to match them.  I just cleaned it up a little bit!
129  *
130  * If anybody could figure out what this is doing, please put
131  * comment here and change the function's name!
132  */
133 static int
134 smb_match_unknown(char *name, char *pattern)
135 {
136 	int rc;
137 	char nc, pc;
138 	char *np, *pp;
139 
140 	rc = 0;
141 	if (utf8_isstrupr(pattern) <= 0)
142 		return (rc);
143 
144 	np = name;
145 	pp = pattern;
146 
147 	pc = *pattern;
148 	while ((nc = *np++) != 0) {
149 		if (nc == ' ')
150 			continue;
151 
152 		nc = mts_toupper(nc);
153 		if ((pc = *pp++) != nc)
154 			break;
155 	}
156 
157 	if ((pc == '~') &&
158 	    (pp != (pattern + 1)) &&
159 	    ((pc = *pp++) != 0)) {
160 		while (mts_isdigit(pc))
161 			pc = *pp++;
162 
163 		if (pc == '.') {
164 			while ((nc = *np++) != 0) {
165 				if (nc == '.')
166 					break;
167 			}
168 
169 			while ((nc = *np++) != 0) {
170 				nc = mts_toupper(nc);
171 				if ((pc = *pp++) != nc)
172 					break;
173 			}
174 		}
175 
176 		if (pc == 0)
177 			rc = 1;
178 	}
179 
180 	return (rc);
181 }
182 
183 /*
184  * Return true if name contains characters that are invalid in a file
185  * name or it is a reserved DOS device name.  Otherwise, returns false.
186  *
187  * Control characters (values 0 - 31) and the following characters are
188  * invalid:
189  *	< > : " / \ | ? *
190  */
191 boolean_t
192 smb_is_invalid_filename(const char *name)
193 {
194 	const char *p;
195 
196 	if ((p = strpbrk(name, invalid_dos_chars)) != NULL) {
197 		if (*p != ' ')
198 			return (B_TRUE);
199 	}
200 
201 	return (smb_is_reserved_dos_name(name));
202 }
203 
204 /*
205  * smb_is_reserved_dos_name
206  *
207  * This function checks if the name is a reserved DOS device name.
208  * The device name should not be followed immediately by an extension,
209  * for example, NUL.txt.
210  */
211 static boolean_t
212 smb_is_reserved_dos_name(const char *name)
213 {
214 	static char *cnames[] = { "CLOCK$", "COM1", "COM2", "COM3", "COM4",
215 		"COM5", "COM6", "COM7", "COM8", "COM9", "CON" };
216 	static char *lnames[] = { "LPT1", "LPT2", "LPT3", "LPT4", "LPT5",
217 		"LPT6", "LPT7", "LPT8", "LPT9" };
218 	static char *others[] = { "AUX", "NUL", "PRN" };
219 	char	**reserved;
220 	char	ch;
221 	int	n_reserved;
222 	int	len;
223 	int	i;
224 
225 	ch = mts_toupper(*name);
226 
227 	switch (ch) {
228 	case 'A':
229 	case 'N':
230 	case 'P':
231 		reserved = others;
232 		n_reserved = sizeof (others) / sizeof (others[0]);
233 		break;
234 	case 'C':
235 		reserved = cnames;
236 		n_reserved = sizeof (cnames) / sizeof (cnames[0]);
237 		break;
238 	case 'L':
239 		reserved = lnames;
240 		n_reserved = sizeof (lnames) / sizeof (lnames[0]);
241 		break;
242 	default:
243 		return (B_FALSE);
244 	}
245 
246 	for (i  = 0; i < n_reserved; ++i) {
247 		len = strlen(reserved[i]);
248 
249 		if (utf8_strncasecmp(reserved[i], name, len) == 0) {
250 			ch = *(name + len);
251 			if ((ch == '\0') || (ch == '.'))
252 				return (B_TRUE);
253 		}
254 	}
255 
256 	return (B_FALSE);
257 }
258 
259 /*
260  * smb_needs_mangle
261  *
262  * Determines whether the given name needs to get mangled.
263  *
264  * Here are the (known) rules:
265  *
266  *	1st char is dot (.)
267  *	name length > 12 chars
268  *	# dots > 1
269  *	# dots == 0 and length > 8
270  *	# dots == 1 and name isn't 8.3
271  *	contains illegal chars
272  */
273 int
274 smb_needs_mangle(char *name, char **dot_pos)
275 {
276 	int len, ndots;
277 	char *namep;
278 	char *last_dot;
279 
280 	/*
281 	 * Returning (1) for these cases forces consistency with how
282 	 * these names are treated (smb_mangle_name() will produce an 8.3 name
283 	 * for these)
284 	 */
285 	if ((strcmp(name, ".") == 0) || (strcmp(name, "..") == 0))
286 		return (1);
287 
288 	/* skip the leading dots (if any) */
289 	for (namep = name; *namep == '.'; namep++)
290 		;
291 
292 	len = ndots = 0;
293 	last_dot = 0;
294 	for (; *namep; namep++) {
295 		len++;
296 		if (*namep == '.') {
297 			/* keep the position of last dot */
298 			last_dot = namep;
299 			ndots++;
300 		}
301 	}
302 	*dot_pos = last_dot;
303 
304 	/* Windows mangles names like .a, .abc, or .abcd */
305 	if (*name == '.')
306 		return (1);
307 
308 	if (len > 12)
309 		return (1);
310 
311 	switch (ndots) {
312 	case 0:
313 		/* no dot */
314 		if (len > 8)
315 			return (1);
316 		break;
317 
318 	case 1:
319 		/* just one dot */
320 		/*LINTED E_PTR_DIFF_OVERFLOW*/
321 		if (((last_dot - name) > 8) ||		/* name length > 8 */
322 		    (strlen(last_dot + 1) > 3))		/* extention > 3 */
323 			return (1);
324 		break;
325 
326 	default:
327 		/* more than one dot */
328 		return (1);
329 	}
330 
331 	for (namep = name; *namep; namep++) {
332 		if (!mts_isascii(*namep) ||
333 		    strchr(special_chars, *namep) ||
334 		    strchr(invalid_dos_chars, *namep))
335 			return (1);
336 	}
337 
338 	return (0);
339 }
340 
341 /*
342  * smb_needs_shortname
343  *
344  * Determine whether a shortname should be generated for a file name that is
345  * already in 8.3 format.
346  *
347  * Paramters:
348  *   name - original file name
349  *
350  * Return:
351  *   1 - Shortname is required to be generated.
352  *   0 - No shortname needs to be generated.
353  *
354  * Note
355  * =======
356  * Windows NT server:       shortname is created only if either
357  *                          the filename or extension portion of
358  *                          a file is made up of mixed case.
359  * Windows 2000 server:     shortname is not created regardless
360  *                          of the case.
361  * Windows 2003 server:     [Same as Windows NT server.]
362  *
363  * StorEdge will conform to the rule used by Windows NT/2003 server.
364  *
365  * For instance:
366  *    File      | Create shortname?
367  * ================================
368  *  nf.txt      | N
369  *  NF.TXT      | N
370  *  NF.txt      | N
371  *  nf          | N
372  *  NF          | N
373  *  nF.txt      | Y
374  *  nf.TxT      | Y
375  *  Nf          | Y
376  *  nF          | Y
377  *
378  */
379 static int
380 smb_needs_shortname(char *name)
381 {
382 	char buf[9];
383 	int len;
384 	int create = 0;
385 	const char *dot_pos = 0;
386 
387 	dot_pos = strrchr(name, '.');
388 	/*LINTED E_PTRDIFF_OVERFLOW*/
389 	len = (!dot_pos) ? strlen(name) : (dot_pos - name);
390 	/* First, examine the name portion of the file */
391 	if (len) {
392 		(void) snprintf(buf, len + 1, "%s", name);
393 		/* if the name contains both lower and upper cases */
394 		if (utf8_isstrupr(buf) == 0 && utf8_isstrlwr(buf) == 0) {
395 			/* create shortname */
396 			create = 1;
397 		} else 	if (dot_pos) {
398 			/* Next, examine the extension portion of the file */
399 			(void) snprintf(buf, sizeof (buf), "%s", dot_pos + 1);
400 			/*
401 			 * if the extension contains both lower and upper
402 			 * cases
403 			 */
404 			if (utf8_isstrupr(buf) == 0 && utf8_isstrlwr(buf) == 0)
405 				/* create shortname */
406 				create = 1;
407 		}
408 	}
409 
410 	return (create);
411 }
412 
413 /*
414  * smb_mangle_char
415  *
416  * If given char is an invalid DOS character or it's not an
417  * ascii char, it should be deleted from mangled and 8.3 name.
418  *
419  * If given char is one of special chars, it should be replaced
420  * with '_'.
421  *
422  * Otherwise just make it upper case.
423  */
424 static unsigned char
425 smb_mangle_char(unsigned char ch)
426 {
427 	if (isinvalid(ch))
428 		return (0);
429 
430 	if (strchr(special_chars, ch))
431 		return ('_');
432 
433 	return (mts_toupper(ch));
434 }
435 
436 /*
437  * smb_generate_mangle
438  *
439  * Generate a mangle string containing at least 2 characters and at most
440  * (buflen - 1) characters.  Note: fid cannot be 0.
441  *
442  * Returns the number of chars in the generated mangle.
443  */
444 static int
445 smb_generate_mangle(uint64_t fid, unsigned char *buf, size_t buflen)
446 {
447 	static char *base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
448 	unsigned char *p = buf;
449 	int i;
450 
451 	if (fid == 0)
452 		fid = (uint64_t)-1;
453 
454 	*p++ = '~';
455 	for (i = 2; (i < buflen) && (fid > 0); fid /= 36, ++i)
456 		*p++ = base36[fid % 36];
457 	*p = '\0';
458 
459 	return (i - 1);
460 }
461 
462 /*
463  * smb_maybe_mangled_name
464  *
465  * Mangled names should be valid DOS file names: less than 12 characters
466  * long, contain at least one tilde character and conform to an 8.3 name
467  * format.
468  *
469  * Returns true if the name looks like a mangled name.
470  */
471 int
472 smb_maybe_mangled_name(char *name)
473 {
474 	const char *p;
475 	boolean_t has_tilde = B_FALSE;
476 	int ndots = 0;
477 	int i;
478 
479 	for (p = name, i = 0; (*p != '\0') && (i < SMB_NAME83_LEN); i++, p++) {
480 		if ((strchr(special_chars, *p) != NULL) ||
481 		    (strchr(invalid_dos_chars, *p) != NULL))
482 			return (B_FALSE);
483 
484 		if (*p == '.') {
485 			if ((++ndots) > 1)
486 				return (B_FALSE);
487 		}
488 
489 		if ((*p == '~') && (i < SMB_NAME83_BASELEN))
490 			has_tilde = B_TRUE;
491 
492 		if (*p == '.' && !has_tilde)
493 			return (B_FALSE);
494 	}
495 
496 	return ((*p == 0) && has_tilde);
497 }
498 
499 /*
500  * smb_mangle_name
501  *
502  * Microsoft knowledge base article #142982 describes how Windows
503  * generates 8.3 filenames from long file names. Some other details
504  * can be found in article #114816.
505  *
506  * The function first checks to see whether the given name needs mangling.
507  * If not, and the force parameter is not set, then no mangling is done,
508  * but both the shortname (if needed) and the 8.3 name are produced and
509  * returned.
510  *
511  * If the "force" parameter is set (as will be the case for case-insensitive
512  * collisions), then the name will be mangled.
513  *
514  * Whenever mangling is needed, both the shortname and the 8.3 names are
515  * produced and returned.
516  *
517  * For example, the xxx.xy in 8.3 format will be "xxx     .xy ".
518  */
519 
520 int smb_mangle_name(
521 	ino64_t fileid,		/* inode number to generate unique mangle */
522 	char *name,		/* original file name */
523 	char *shortname,	/* mangled name (if applicable) */
524 	char *name83,		/* (mangled) name in 8.3 format */
525 	int force)		/* force mangling even if mangling is not */
526 				/* needed according to standard algorithm */
527 {
528 	int avail, len;
529 	unsigned char ch;
530 	unsigned char mangle_buf[SMB_NAME83_BASELEN];
531 	unsigned char *namep;
532 	unsigned char *manglep;
533 	unsigned char *out_short;
534 	unsigned char *out_83;
535 	char *dot_pos = NULL;
536 
537 	/*
538 	 * NOTE:
539 	 * This function used to consider filename case
540 	 * in order to mangle. I removed those checks.
541 	 */
542 
543 	*shortname = *name83 = 0;
544 
545 	/* Allow dot and dot dot up front */
546 	if (strcmp(name, ".") == 0) {
547 		/* no shortname */
548 		(void) strcpy(name83, ".       .   ");
549 		return (1);
550 	}
551 
552 	if (strcmp(name, "..") == 0) {
553 		/* no shortname */
554 		(void) strcpy(name83, "..      .   ");
555 		return (1);
556 	}
557 
558 	out_short = (unsigned char *)shortname;
559 	out_83 = (unsigned char *)name83;
560 
561 	if ((smb_needs_mangle(name, &dot_pos) == 0) && (force == 0)) {
562 		/* no mangle */
563 
564 		/* check if shortname is required or not */
565 		if (smb_needs_shortname(name)) {
566 			namep = (unsigned char *)name;
567 			while (*namep)
568 				*out_short++ = mts_toupper(*namep++);
569 			*out_short = '\0';
570 		}
571 
572 		out_83 = (unsigned char *)name83;
573 		(void) strcpy((char *)out_83, "        .   ");
574 		while (*name && *name != '.')
575 			*out_83++ = mts_toupper(*name++);
576 
577 		if (*name == '.') {
578 			/* copy extension */
579 			name++;
580 			out_83 = (unsigned char *)name83 + 9;
581 			while (*name)
582 				*out_83++ = mts_toupper(*name++);
583 		}
584 		return (1);
585 	}
586 
587 	len = smb_generate_mangle(fileid, mangle_buf, SMB_NAME83_BASELEN);
588 	avail = SMB_NAME83_BASELEN - len;
589 
590 	/*
591 	 * generated mangle part has always less than 8 chars, so
592 	 * use the chars before the first dot in filename
593 	 * and try to generate a full 8 char name.
594 	 */
595 
596 	/* skip the leading dots (if any) */
597 	for (namep = (unsigned char *)name; *namep == '.'; namep++)
598 		;
599 
600 	for (; avail && *namep && (*namep != '.'); namep++) {
601 		ch = smb_mangle_char(*namep);
602 		if (ch == 0)
603 			continue;
604 		*out_short++ = *out_83++ = ch;
605 		avail--;
606 	}
607 
608 	/* Copy in mangled part */
609 	manglep = mangle_buf;
610 
611 	while (*manglep)
612 		*out_short++ = *out_83++ = *(manglep++);
613 
614 	/* Pad any leftover in 8.3 name with spaces */
615 	while (avail--)
616 		*out_83++ = ' ';
617 
618 	/* Work on extension now */
619 	avail = 3;
620 	*out_83++ = '.';
621 	if (dot_pos) {
622 		namep = (unsigned char *)dot_pos + 1;
623 		if (*namep != 0) {
624 			*out_short++ = '.';
625 			for (; avail && *namep; namep++) {
626 				ch = smb_mangle_char(*namep);
627 				if (ch == 0)
628 					continue;
629 
630 				*out_short++ = *out_83++ = ch;
631 				avail--;
632 			}
633 		}
634 	}
635 
636 	while (avail--)
637 		*out_83++ = ' ';
638 
639 	*out_short = *out_83 = '\0';
640 
641 	return (1);
642 }
643 
644 /*
645  * smb_unmangle_name
646  *
647  * Given a mangled name, try to find the real file name as it appears
648  * in the directory entry.
649  *
650  * smb_unmangle_name should only be called on names for which
651  * smb_maybe_mangled_name() is true
652  *
653  * File systems which support VFSFT_EDIRENT_FLAGS will return the
654  * directory entries as a buffer of edirent_t structure. Others will
655  * return a buffer of dirent64_t structures. A union is used for the
656  * the pointer into the buffer (bufptr, edp and dp).
657  * The ed_name/d_name is NULL terminated by the file system.
658  *
659  * Returns:
660  *   0       - SUCCESS. Unmangled name is returned in namebuf.
661  *   EINVAL  - a parameter was invalid.
662  *   ENOTDIR - dnode is not a directory node.
663  *   ENOENT  - an unmangled name could not be found.
664  */
665 #define	SMB_UNMANGLE_BUFSIZE	(4 * 1024)
666 int
667 smb_unmangle_name(smb_node_t *dnode, char *name, char *namebuf,
668     int buflen, uint32_t flags)
669 {
670 	int		err, eof, bufsize, reclen;
671 	uint64_t	offset;
672 	ino64_t		ino;
673 	boolean_t	is_edp;
674 	char		*namep, *buf;
675 	char		shortname[SMB_SHORTNAMELEN];
676 	char		name83[SMB_SHORTNAMELEN];
677 	vnode_t		*vp;
678 	union {
679 		char		*bufptr;
680 		edirent_t	*edp;
681 		dirent64_t	*dp;
682 	} u;
683 #define	bufptr	u.bufptr
684 #define	edp		u.edp
685 #define	dp		u.dp
686 
687 	if (dnode == NULL || name == NULL || namebuf == NULL || buflen == 0)
688 		return (EINVAL);
689 
690 	ASSERT(smb_maybe_mangled_name(name) != 0);
691 
692 	vp = dnode->vp;
693 	if (vp->v_type != VDIR)
694 		return (ENOTDIR);
695 
696 	*namebuf = '\0';
697 	is_edp = vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS);
698 
699 	buf = kmem_alloc(SMB_UNMANGLE_BUFSIZE, KM_SLEEP);
700 	bufsize = SMB_UNMANGLE_BUFSIZE;
701 	offset = 0;
702 
703 	while ((err = smb_vop_readdir(vp, offset, buf, &bufsize,
704 	    &eof, flags, kcred)) == 0) {
705 		if (bufsize == 0) {
706 			err = ENOENT;
707 			break;
708 		}
709 
710 		bufptr = buf;
711 		reclen = 0;
712 
713 		while ((bufptr += reclen) < buf + bufsize) {
714 			if (is_edp) {
715 				reclen = edp->ed_reclen;
716 				offset = edp->ed_off;
717 				ino = edp->ed_ino;
718 				namep = edp->ed_name;
719 			} else {
720 				reclen = dp->d_reclen;
721 				offset = dp->d_off;
722 				ino = dp->d_ino;
723 				namep = dp->d_name;
724 			}
725 
726 			(void) smb_mangle_name(ino, namep,
727 			    shortname, name83, 1);
728 
729 			if (utf8_strcasecmp(name, shortname) == 0) {
730 				(void) strlcpy(namebuf, namep, buflen);
731 				kmem_free(buf, SMB_UNMANGLE_BUFSIZE);
732 				return (0);
733 			}
734 		}
735 
736 		if (eof) {
737 			err = ENOENT;
738 			break;
739 		}
740 
741 		bufsize = SMB_UNMANGLE_BUFSIZE;
742 	}
743 
744 	kmem_free(buf, SMB_UNMANGLE_BUFSIZE);
745 	return (err);
746 }
747