xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_mangle_name.c (revision 98c507c4288789fc67365c4cb51f80eb641e7182)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/sunddi.h>
29 #include <sys/errno.h>
30 #include <smbsrv/string.h>
31 #include <smbsrv/smb_vops.h>
32 #include <smbsrv/smb_kproto.h>
33 #include <smbsrv/smb_fsops.h>
34 
35 /*
36  * Characters we don't allow in DOS file names.
37  * If a filename contains any of these chars, it should get mangled.
38  *
39  * '.' is also an invalid DOS char but since it's a special
40  * case it doesn't appear in the list.
41  */
42 static char *invalid_dos_chars =
43 	"\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
44 	"\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
45 	" \"/\\:|<>*?";
46 
47 /*
48  * According to MSKB article #142982, Windows deletes invalid chars and
49  * spaces from file name in mangling process; and invalid chars include:
50  * ."/\[]:;=,
51  *
52  * But some of these chars and some other chars (e.g. +) are replaced
53  * with underscore (_). They are introduced here as special chars.
54  */
55 static char *special_chars = "[];=,+";
56 
57 #define	isinvalid(c)	(strchr(invalid_dos_chars, c) || (c & 0x80))
58 
59 static int smb_match_unknown(char *name, char *pattern);
60 static boolean_t smb_is_reserved_dos_name(const char *name);
61 
62 /*
63  * smb_match_name
64  *
65  * This function will mangle the "name" field and save the resulted
66  * shortname to the "shortname" field and 8.3 name to "name83" field.
67  * The three fields, "name", "shortname" and "name83" will then be
68  * sent for pattern match with "pattern" field.
69  *
70  * The 0 is returned when the name is a reserved dos name, no match
71  * for the pattern or any type of failure. The 1 is returned when
72  * there is a match.
73  */
74 int
75 smb_match_name(ino64_t fileid, char *name, char *pattern, boolean_t ignore_case)
76 {
77 	int rc = 0;
78 	int force;
79 	char name83[SMB_SHORTNAMELEN];
80 	char shortname[SMB_SHORTNAMELEN];
81 
82 	/* Leading or trailing dots are disallowed */
83 	if (smb_is_reserved_dos_name(name))
84 		return (0);
85 
86 	for (force = 0; (force < 2 && rc == 0); force++) {
87 		(void) smb_mangle_name(fileid, name, shortname, name83, force);
88 
89 		rc = smb_match_ci(pattern, name);
90 
91 		/* If no match, check for shortname (if any) */
92 
93 		if (rc == 0 && strchr(pattern, '~'))
94 			if (*shortname != 0)
95 				rc = smb_match_ci(pattern, shortname);
96 
97 		/*
98 		 * Sigh... DOS Shells use short name
99 		 * interchangeably with long case sensitive
100 		 * names. So check that too...
101 		 */
102 		if ((rc == 0) && !ignore_case)
103 			rc = smb_match83(pattern, name83);
104 
105 		/*
106 		 * Still not found and potentially a premangled name...
107 		 * Check to see if the butt-head programmer is
108 		 * assuming that we mangle names in the same manner
109 		 * as NT...
110 		 */
111 		if (rc == 0)
112 			rc = smb_match_unknown(name, pattern);
113 	}
114 
115 	return (rc);
116 }
117 
118 /*
119  * smb_match_unknown
120  *
121  * I couldn't figure out what the assumptions of this peice of
122  * code about the format of pattern and name are and so how
123  * it's trying to match them.  I just cleaned it up a little bit!
124  *
125  * If anybody could figure out what this is doing, please put
126  * comment here and change the function's name!
127  */
128 static int
129 smb_match_unknown(char *name, char *pattern)
130 {
131 	int rc;
132 	char nc, pc;
133 	char *np, *pp;
134 
135 	rc = 0;
136 	if (smb_isstrupr(pattern) <= 0)
137 		return (rc);
138 
139 	np = name;
140 	pp = pattern;
141 
142 	pc = *pattern;
143 	while ((nc = *np++) != 0) {
144 		if (nc == ' ')
145 			continue;
146 
147 		nc = smb_toupper(nc);
148 		if ((pc = *pp++) != nc)
149 			break;
150 	}
151 
152 	if ((pc == '~') &&
153 	    (pp != (pattern + 1)) &&
154 	    ((pc = *pp++) != 0)) {
155 		while (smb_isdigit(pc))
156 			pc = *pp++;
157 
158 		if (pc == '.') {
159 			while ((nc = *np++) != 0) {
160 				if (nc == '.')
161 					break;
162 			}
163 
164 			while ((nc = *np++) != 0) {
165 				nc = smb_toupper(nc);
166 				if ((pc = *pp++) != nc)
167 					break;
168 			}
169 		}
170 
171 		if (pc == 0)
172 			rc = 1;
173 	}
174 
175 	return (rc);
176 }
177 
178 /*
179  * Return true if name contains characters that are invalid in a file
180  * name or it is a reserved DOS device name.  Otherwise, returns false.
181  *
182  * Control characters (values 0 - 31) and the following characters are
183  * invalid:
184  *	< > : " / \ | ? *
185  */
186 boolean_t
187 smb_is_invalid_filename(const char *name)
188 {
189 	const char *p;
190 
191 	if ((p = strpbrk(name, invalid_dos_chars)) != NULL) {
192 		if (*p != ' ')
193 			return (B_TRUE);
194 	}
195 
196 	return (smb_is_reserved_dos_name(name));
197 }
198 
199 /*
200  * smb_is_reserved_dos_name
201  *
202  * This function checks if the name is a reserved DOS device name.
203  * The device name should not be followed immediately by an extension,
204  * for example, NUL.txt.
205  */
206 static boolean_t
207 smb_is_reserved_dos_name(const char *name)
208 {
209 	static char *cnames[] = { "CLOCK$", "COM1", "COM2", "COM3", "COM4",
210 		"COM5", "COM6", "COM7", "COM8", "COM9", "CON" };
211 	static char *lnames[] = { "LPT1", "LPT2", "LPT3", "LPT4", "LPT5",
212 		"LPT6", "LPT7", "LPT8", "LPT9" };
213 	static char *others[] = { "AUX", "NUL", "PRN" };
214 	char	**reserved;
215 	char	ch;
216 	int	n_reserved;
217 	int	len;
218 	int	i;
219 
220 	ch = smb_toupper(*name);
221 
222 	switch (ch) {
223 	case 'A':
224 	case 'N':
225 	case 'P':
226 		reserved = others;
227 		n_reserved = sizeof (others) / sizeof (others[0]);
228 		break;
229 	case 'C':
230 		reserved = cnames;
231 		n_reserved = sizeof (cnames) / sizeof (cnames[0]);
232 		break;
233 	case 'L':
234 		reserved = lnames;
235 		n_reserved = sizeof (lnames) / sizeof (lnames[0]);
236 		break;
237 	default:
238 		return (B_FALSE);
239 	}
240 
241 	for (i  = 0; i < n_reserved; ++i) {
242 		len = strlen(reserved[i]);
243 
244 		if (smb_strcasecmp(reserved[i], name, len) == 0) {
245 			ch = *(name + len);
246 			if ((ch == '\0') || (ch == '.'))
247 				return (B_TRUE);
248 		}
249 	}
250 
251 	return (B_FALSE);
252 }
253 
254 /*
255  * smb_needs_mangle
256  *
257  * Determines whether the given name needs to get mangled.
258  *
259  * Here are the (known) rules:
260  *
261  *	1st char is dot (.)
262  *	name length > 12 chars
263  *	# dots > 1
264  *	# dots == 0 and length > 8
265  *	# dots == 1 and name isn't 8.3
266  *	contains illegal chars
267  */
268 int
269 smb_needs_mangle(char *name, char **dot_pos)
270 {
271 	int len, ndots;
272 	char *namep;
273 	char *last_dot;
274 
275 	/*
276 	 * Returning (1) for these cases forces consistency with how
277 	 * these names are treated (smb_mangle_name() will produce an 8.3 name
278 	 * for these)
279 	 */
280 	if ((strcmp(name, ".") == 0) || (strcmp(name, "..") == 0))
281 		return (1);
282 
283 	/* skip the leading dots (if any) */
284 	for (namep = name; *namep == '.'; namep++)
285 		;
286 
287 	len = ndots = 0;
288 	last_dot = 0;
289 	for (; *namep; namep++) {
290 		len++;
291 		if (*namep == '.') {
292 			/* keep the position of last dot */
293 			last_dot = namep;
294 			ndots++;
295 		}
296 	}
297 	*dot_pos = last_dot;
298 
299 	/* Windows mangles names like .a, .abc, or .abcd */
300 	if (*name == '.')
301 		return (1);
302 
303 	if (len > 12)
304 		return (1);
305 
306 	switch (ndots) {
307 	case 0:
308 		/* no dot */
309 		if (len > 8)
310 			return (1);
311 		break;
312 
313 	case 1:
314 		/* just one dot */
315 		/*LINTED E_PTR_DIFF_OVERFLOW*/
316 		if (((last_dot - name) > 8) ||		/* name length > 8 */
317 		    (strlen(last_dot + 1) > 3))		/* extention > 3 */
318 			return (1);
319 		break;
320 
321 	default:
322 		/* more than one dot */
323 		return (1);
324 	}
325 
326 	for (namep = name; *namep; namep++) {
327 		if (!smb_isascii(*namep) ||
328 		    strchr(special_chars, *namep) ||
329 		    strchr(invalid_dos_chars, *namep))
330 			return (1);
331 	}
332 
333 	return (0);
334 }
335 
336 /*
337  * smb_needs_shortname
338  *
339  * Determine whether a shortname should be generated for a file name that is
340  * already in 8.3 format.
341  *
342  * Paramters:
343  *   name - original file name
344  *
345  * Return:
346  *   1 - Shortname is required to be generated.
347  *   0 - No shortname needs to be generated.
348  *
349  * Note
350  * =======
351  * Windows NT server:       shortname is created only if either
352  *                          the filename or extension portion of
353  *                          a file is made up of mixed case.
354  * Windows 2000 server:     shortname is not created regardless
355  *                          of the case.
356  * Windows 2003 server:     [Same as Windows NT server.]
357  *
358  * StorEdge will conform to the rule used by Windows NT/2003 server.
359  *
360  * For instance:
361  *    File      | Create shortname?
362  * ================================
363  *  nf.txt      | N
364  *  NF.TXT      | N
365  *  NF.txt      | N
366  *  nf          | N
367  *  NF          | N
368  *  nF.txt      | Y
369  *  nf.TxT      | Y
370  *  Nf          | Y
371  *  nF          | Y
372  *
373  */
374 static int
375 smb_needs_shortname(char *name)
376 {
377 	char buf[9];
378 	int len;
379 	int create = 0;
380 	const char *dot_pos = 0;
381 
382 	dot_pos = strrchr(name, '.');
383 	/*LINTED E_PTRDIFF_OVERFLOW*/
384 	len = (!dot_pos) ? strlen(name) : (dot_pos - name);
385 	/* First, examine the name portion of the file */
386 	if (len) {
387 		(void) snprintf(buf, len + 1, "%s", name);
388 		/* if the name contains both lower and upper cases */
389 		if (smb_isstrupr(buf) == 0 && smb_isstrlwr(buf) == 0) {
390 			/* create shortname */
391 			create = 1;
392 		} else 	if (dot_pos) {
393 			/* Next, examine the extension portion of the file */
394 			(void) snprintf(buf, sizeof (buf), "%s", dot_pos + 1);
395 			/*
396 			 * if the extension contains both lower and upper
397 			 * cases
398 			 */
399 			if (smb_isstrupr(buf) == 0 && smb_isstrlwr(buf) == 0)
400 				/* create shortname */
401 				create = 1;
402 		}
403 	}
404 
405 	return (create);
406 }
407 
408 /*
409  * smb_mangle_char
410  *
411  * If given char is an invalid DOS character or it's not an
412  * ascii char, it should be deleted from mangled and 8.3 name.
413  *
414  * If given char is one of special chars, it should be replaced
415  * with '_'.
416  *
417  * Otherwise just make it upper case.
418  */
419 static unsigned char
420 smb_mangle_char(unsigned char ch)
421 {
422 	if (isinvalid(ch))
423 		return (0);
424 
425 	if (strchr(special_chars, ch))
426 		return ('_');
427 
428 	return (smb_toupper(ch));
429 }
430 
431 /*
432  * smb_generate_mangle
433  *
434  * Generate a mangle string containing at least 2 characters and at most
435  * (buflen - 1) characters.  Note: fid cannot be 0.
436  *
437  * Returns the number of chars in the generated mangle.
438  */
439 static int
440 smb_generate_mangle(uint64_t fid, unsigned char *buf, size_t buflen)
441 {
442 	static char *base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
443 	unsigned char *p = buf;
444 	int i;
445 
446 	if (fid == 0)
447 		fid = (uint64_t)-1;
448 
449 	*p++ = '~';
450 	for (i = 2; (i < buflen) && (fid > 0); fid /= 36, ++i)
451 		*p++ = base36[fid % 36];
452 	*p = '\0';
453 
454 	return (i - 1);
455 }
456 
457 /*
458  * smb_maybe_mangled_name
459  *
460  * Mangled names should be valid DOS file names: less than 12 characters
461  * long, contain at least one tilde character and conform to an 8.3 name
462  * format.
463  *
464  * Returns true if the name looks like a mangled name.
465  */
466 int
467 smb_maybe_mangled_name(char *name)
468 {
469 	const char *p;
470 	boolean_t has_tilde = B_FALSE;
471 	int ndots = 0;
472 	int i;
473 
474 	for (p = name, i = 0; (*p != '\0') && (i < SMB_NAME83_LEN); i++, p++) {
475 		if ((strchr(special_chars, *p) != NULL) ||
476 		    (strchr(invalid_dos_chars, *p) != NULL))
477 			return (B_FALSE);
478 
479 		if (*p == '.') {
480 			if ((++ndots) > 1)
481 				return (B_FALSE);
482 		}
483 
484 		if ((*p == '~') && (i < SMB_NAME83_BASELEN))
485 			has_tilde = B_TRUE;
486 
487 		if (*p == '.' && !has_tilde)
488 			return (B_FALSE);
489 	}
490 
491 	return ((*p == 0) && has_tilde);
492 }
493 
494 /*
495  * smb_mangle_name
496  *
497  * Microsoft knowledge base article #142982 describes how Windows
498  * generates 8.3 filenames from long file names. Some other details
499  * can be found in article #114816.
500  *
501  * The function first checks to see whether the given name needs mangling.
502  * If not, and the force parameter is not set, then no mangling is done,
503  * but both the shortname (if needed) and the 8.3 name are produced and
504  * returned.
505  *
506  * If the "force" parameter is set (as will be the case for case-insensitive
507  * collisions), then the name will be mangled.
508  *
509  * Whenever mangling is needed, both the shortname and the 8.3 names are
510  * produced and returned.
511  *
512  * For example, the xxx.xy in 8.3 format will be "xxx     .xy ".
513  */
514 
515 int smb_mangle_name(
516 	ino64_t fileid,		/* inode number to generate unique mangle */
517 	char *name,		/* original file name */
518 	char *shortname,	/* mangled name (if applicable) */
519 	char *name83,		/* (mangled) name in 8.3 format */
520 	int force)		/* force mangling even if mangling is not */
521 				/* needed according to standard algorithm */
522 {
523 	int avail, len;
524 	unsigned char ch;
525 	unsigned char mangle_buf[SMB_NAME83_BASELEN];
526 	unsigned char *namep;
527 	unsigned char *manglep;
528 	unsigned char *out_short;
529 	unsigned char *out_83;
530 	char *dot_pos = NULL;
531 
532 	/*
533 	 * NOTE:
534 	 * This function used to consider filename case
535 	 * in order to mangle. I removed those checks.
536 	 */
537 
538 	*shortname = *name83 = 0;
539 
540 	/* Allow dot and dot dot up front */
541 	if (strcmp(name, ".") == 0) {
542 		/* no shortname */
543 		(void) strcpy(name83, ".       .   ");
544 		return (1);
545 	}
546 
547 	if (strcmp(name, "..") == 0) {
548 		/* no shortname */
549 		(void) strcpy(name83, "..      .   ");
550 		return (1);
551 	}
552 
553 	out_short = (unsigned char *)shortname;
554 	out_83 = (unsigned char *)name83;
555 
556 	if ((smb_needs_mangle(name, &dot_pos) == 0) && (force == 0)) {
557 		/* no mangle */
558 
559 		/* check if shortname is required or not */
560 		if (smb_needs_shortname(name)) {
561 			namep = (unsigned char *)name;
562 			while (*namep)
563 				*out_short++ = smb_toupper(*namep++);
564 			*out_short = '\0';
565 		}
566 
567 		out_83 = (unsigned char *)name83;
568 		(void) strcpy((char *)out_83, "        .   ");
569 		while (*name && *name != '.')
570 			*out_83++ = smb_toupper(*name++);
571 
572 		if (*name == '.') {
573 			/* copy extension */
574 			name++;
575 			out_83 = (unsigned char *)name83 + 9;
576 			while (*name)
577 				*out_83++ = smb_toupper(*name++);
578 		}
579 		return (1);
580 	}
581 
582 	len = smb_generate_mangle(fileid, mangle_buf, SMB_NAME83_BASELEN);
583 	avail = SMB_NAME83_BASELEN - len;
584 
585 	/*
586 	 * generated mangle part has always less than 8 chars, so
587 	 * use the chars before the first dot in filename
588 	 * and try to generate a full 8 char name.
589 	 */
590 
591 	/* skip the leading dots (if any) */
592 	for (namep = (unsigned char *)name; *namep == '.'; namep++)
593 		;
594 
595 	for (; avail && *namep && (*namep != '.'); namep++) {
596 		ch = smb_mangle_char(*namep);
597 		if (ch == 0)
598 			continue;
599 		*out_short++ = *out_83++ = ch;
600 		avail--;
601 	}
602 
603 	/* Copy in mangled part */
604 	manglep = mangle_buf;
605 
606 	while (*manglep)
607 		*out_short++ = *out_83++ = *(manglep++);
608 
609 	/* Pad any leftover in 8.3 name with spaces */
610 	while (avail--)
611 		*out_83++ = ' ';
612 
613 	/* Work on extension now */
614 	avail = 3;
615 	*out_83++ = '.';
616 	if (dot_pos) {
617 		namep = (unsigned char *)dot_pos + 1;
618 		if (*namep != 0) {
619 			*out_short++ = '.';
620 			for (; avail && *namep; namep++) {
621 				ch = smb_mangle_char(*namep);
622 				if (ch == 0)
623 					continue;
624 
625 				*out_short++ = *out_83++ = ch;
626 				avail--;
627 			}
628 		}
629 	}
630 
631 	while (avail--)
632 		*out_83++ = ' ';
633 
634 	*out_short = *out_83 = '\0';
635 
636 	return (1);
637 }
638 
639 /*
640  * smb_unmangle_name
641  *
642  * Given a mangled name, try to find the real file name as it appears
643  * in the directory entry.
644  *
645  * smb_unmangle_name should only be called on names for which
646  * smb_maybe_mangled_name() is true
647  *
648  * File systems which support VFSFT_EDIRENT_FLAGS will return the
649  * directory entries as a buffer of edirent_t structure. Others will
650  * return a buffer of dirent64_t structures. A union is used for the
651  * the pointer into the buffer (bufptr, edp and dp).
652  * The ed_name/d_name is NULL terminated by the file system.
653  *
654  * Returns:
655  *   0       - SUCCESS. Unmangled name is returned in namebuf.
656  *   EINVAL  - a parameter was invalid.
657  *   ENOTDIR - dnode is not a directory node.
658  *   ENOENT  - an unmangled name could not be found.
659  */
660 #define	SMB_UNMANGLE_BUFSIZE	(4 * 1024)
661 int
662 smb_unmangle_name(smb_node_t *dnode, char *name, char *namebuf,
663     int buflen, uint32_t flags)
664 {
665 	int		err, eof, bufsize, reclen;
666 	uint64_t	offset;
667 	ino64_t		ino;
668 	boolean_t	is_edp;
669 	char		*namep, *buf;
670 	char		shortname[SMB_SHORTNAMELEN];
671 	char		name83[SMB_SHORTNAMELEN];
672 	vnode_t		*vp;
673 	union {
674 		char		*bufptr;
675 		edirent_t	*edp;
676 		dirent64_t	*dp;
677 	} u;
678 #define	bufptr	u.bufptr
679 #define	edp		u.edp
680 #define	dp		u.dp
681 
682 	if (dnode == NULL || name == NULL || namebuf == NULL || buflen == 0)
683 		return (EINVAL);
684 
685 	ASSERT(smb_maybe_mangled_name(name) != 0);
686 
687 	vp = dnode->vp;
688 	if (vp->v_type != VDIR)
689 		return (ENOTDIR);
690 
691 	*namebuf = '\0';
692 	is_edp = vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS);
693 
694 	buf = kmem_alloc(SMB_UNMANGLE_BUFSIZE, KM_SLEEP);
695 	bufsize = SMB_UNMANGLE_BUFSIZE;
696 	offset = 0;
697 
698 	while ((err = smb_vop_readdir(vp, offset, buf, &bufsize,
699 	    &eof, flags, kcred)) == 0) {
700 		if (bufsize == 0) {
701 			err = ENOENT;
702 			break;
703 		}
704 
705 		bufptr = buf;
706 		reclen = 0;
707 
708 		while ((bufptr += reclen) < buf + bufsize) {
709 			if (is_edp) {
710 				reclen = edp->ed_reclen;
711 				offset = edp->ed_off;
712 				ino = edp->ed_ino;
713 				namep = edp->ed_name;
714 			} else {
715 				reclen = dp->d_reclen;
716 				offset = dp->d_off;
717 				ino = dp->d_ino;
718 				namep = dp->d_name;
719 			}
720 
721 			/* skip non utf8 filename */
722 			if (u8_validate(namep, strlen(namep), NULL,
723 			    U8_VALIDATE_ENTIRE, &err) < 0)
724 				continue;
725 
726 			(void) smb_mangle_name(ino, namep,
727 			    shortname, name83, 1);
728 
729 			if (smb_strcasecmp(name, shortname, 0) == 0) {
730 				(void) strlcpy(namebuf, namep, buflen);
731 				kmem_free(buf, SMB_UNMANGLE_BUFSIZE);
732 				return (0);
733 			}
734 		}
735 
736 		if (eof) {
737 			err = ENOENT;
738 			break;
739 		}
740 
741 		bufsize = SMB_UNMANGLE_BUFSIZE;
742 	}
743 
744 	kmem_free(buf, SMB_UNMANGLE_BUFSIZE);
745 	return (err);
746 }
747