xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_mangle_name.c (revision 8d0c3d29bb99f6521f2dc5058a7e4debebad7899)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/param.h>
27 #include <sys/sunddi.h>
28 #include <sys/errno.h>
29 #include <smbsrv/string.h>
30 #include <smbsrv/smb_vops.h>
31 #include <smbsrv/smb_kproto.h>
32 #include <smbsrv/smb_fsops.h>
33 
34 /*
35  * Characters we don't allow in DOS file names.
36  * If a filename contains any of these chars, it should get mangled.
37  *
38  * '.' is also an invalid DOS char but since it's a special
39  * case it doesn't appear in the list.
40  */
41 static char *invalid_dos_chars =
42 	"\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
43 	"\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
44 	" \"/\\:|<>*?";
45 
46 /*
47  * According to MSKB article #142982, Windows deletes invalid chars and
48  * spaces from file name in mangling process; and invalid chars include:
49  * ."/\[]:;=,
50  *
51  * But some of these chars and some other chars (e.g. +) are replaced
52  * with underscore (_). They are introduced here as special chars.
53  */
54 static char *special_chars = "[];=,+";
55 
56 #define	isinvalid(c)	(strchr(invalid_dos_chars, c) || (c & 0x80))
57 
58 static boolean_t smb_is_reserved_dos_name(const char *name);
59 static int smb_generate_mangle(uint64_t, char *, size_t);
60 static char smb_mangle_char(char);
61 
62 /*
63  * smb_match_name
64  *
65  * Don't match reserved dos filenames.
66  * Check name to see if it matches pattern.
67  * Generate the shortname (even if !smb_needs_mangled()) since names may
68  * be mangled to address case conflicts) and check if shortname matches
69  * pattern.
70  *
71  * Returns: B_TRUE  - if there is a match
72  *          B_FALSE - otherwise
73  */
74 boolean_t
75 smb_match_name(ino64_t fid, char *name, char *pattern)
76 {
77 	char shortname[SMB_SHORTNAMELEN];
78 
79 	if (smb_is_reserved_dos_name(name))
80 		return (B_FALSE);
81 
82 	if (smb_match_ci(pattern, name))
83 		return (B_TRUE);
84 
85 	smb_mangle(name, fid, shortname, SMB_SHORTNAMELEN);
86 	if (smb_match_ci(pattern, shortname))
87 		return (B_TRUE);
88 
89 	return (B_FALSE);
90 }
91 
92 /*
93  * Return true if name contains characters that are invalid in a file
94  * name or it is a reserved DOS device name.  Otherwise, returns false.
95  *
96  * Control characters (values 0 - 31) and the following characters are
97  * invalid:
98  *	< > : " / \ | ? *
99  */
100 boolean_t
101 smb_is_invalid_filename(const char *name)
102 {
103 	const char *p;
104 
105 	if ((p = strpbrk(name, invalid_dos_chars)) != NULL) {
106 		if (*p != ' ')
107 			return (B_TRUE);
108 	}
109 
110 	return (smb_is_reserved_dos_name(name));
111 }
112 
113 /*
114  * smb_is_reserved_dos_name
115  *
116  * This function checks if the name is a reserved DOS device name.
117  * The device name should not be followed immediately by an extension,
118  * for example, NUL.txt.
119  */
120 static boolean_t
121 smb_is_reserved_dos_name(const char *name)
122 {
123 	static char *cnames[] = { "CLOCK$", "COM1", "COM2", "COM3", "COM4",
124 		"COM5", "COM6", "COM7", "COM8", "COM9", "CON" };
125 	static char *lnames[] = { "LPT1", "LPT2", "LPT3", "LPT4", "LPT5",
126 		"LPT6", "LPT7", "LPT8", "LPT9" };
127 	static char *others[] = { "AUX", "NUL", "PRN" };
128 	char	**reserved;
129 	char	ch;
130 	int	n_reserved;
131 	int	len;
132 	int	i;
133 
134 	ch = smb_toupper(*name);
135 
136 	switch (ch) {
137 	case 'A':
138 	case 'N':
139 	case 'P':
140 		reserved = others;
141 		n_reserved = sizeof (others) / sizeof (others[0]);
142 		break;
143 	case 'C':
144 		reserved = cnames;
145 		n_reserved = sizeof (cnames) / sizeof (cnames[0]);
146 		break;
147 	case 'L':
148 		reserved = lnames;
149 		n_reserved = sizeof (lnames) / sizeof (lnames[0]);
150 		break;
151 	default:
152 		return (B_FALSE);
153 	}
154 
155 	for (i  = 0; i < n_reserved; ++i) {
156 		len = strlen(reserved[i]);
157 
158 		if (smb_strcasecmp(reserved[i], name, len) == 0) {
159 			ch = *(name + len);
160 			if ((ch == '\0') || (ch == '.'))
161 				return (B_TRUE);
162 		}
163 	}
164 
165 	return (B_FALSE);
166 }
167 
168 /*
169  * smb_needs_mangled
170  *
171  * A name needs to be mangled if any of the following are true:
172  * - the first character is dot (.) and the name is not "." or ".."
173  * - the name contains illegal or special charsacter
174  * - the name name length > 12
175  * - the number of dots == 0 and length > 8
176  * - the number of dots > 1
177  * - the number of dots == 1 and name is not 8.3
178  */
179 boolean_t
180 smb_needs_mangled(const char *name)
181 {
182 	int len, extlen, ndots;
183 	const char *p;
184 	const char *last_dot;
185 
186 	if ((strcmp(name, ".") == 0) || (strcmp(name, "..") == 0))
187 		return (B_FALSE);
188 
189 	if (*name == '.')
190 		return (B_TRUE);
191 
192 	len = 0;
193 	ndots = 0;
194 	last_dot = NULL;
195 	for (p = name; *p != '\0'; ++p) {
196 		if (smb_iscntrl(*p) ||
197 		    (strchr(special_chars, *p) != NULL) ||
198 		    (strchr(invalid_dos_chars, *p)) != NULL)
199 			return (B_TRUE);
200 
201 		if (*p == '.') {
202 			++ndots;
203 			last_dot = p;
204 		}
205 		++len;
206 	}
207 
208 	if ((len > SMB_NAME83_LEN) ||
209 	    (ndots == 0 && len > SMB_NAME83_BASELEN) ||
210 	    (ndots > 1)) {
211 		return (B_TRUE);
212 	}
213 
214 	if (last_dot != NULL) {
215 		extlen = strlen(last_dot + 1);
216 		if ((extlen == 0) || (extlen > SMB_NAME83_EXTLEN))
217 			return (B_TRUE);
218 
219 		if ((len - extlen - 1) > SMB_NAME83_BASELEN)
220 			return (B_TRUE);
221 	}
222 
223 	return (B_FALSE);
224 }
225 
226 /*
227  * smb_mangle_char
228  *
229  * If c is an invalid DOS character or non-ascii, it should
230  * not be used in the mangled name. We return -1 to indicate
231  * an invalid character.
232  *
233  * If c is a special chars, it should be replaced with '_'.
234  *
235  * Otherwise c is returned as uppercase.
236  */
237 static char
238 smb_mangle_char(char c)
239 {
240 	if (isinvalid(c))
241 		return (-1);
242 
243 	if (strchr(special_chars, c))
244 		return ('_');
245 
246 	return (smb_toupper(c));
247 }
248 
249 /*
250  * smb_generate_mangle
251  *
252  * Generate a mangle string containing at least 2 characters and
253  * at most (buflen - 1) characters.
254  *
255  * Returns the number of chars in the generated mangle.
256  */
257 static int
258 smb_generate_mangle(uint64_t fid, char *buf, size_t buflen)
259 {
260 	static char *base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
261 	char *p = buf;
262 	int i;
263 
264 	if (fid == 0)
265 		fid = (uint64_t)-1;
266 
267 	*p++ = '~';
268 	for (i = 2; (i < buflen) && (fid > 0); fid /= 36, ++i)
269 		*p++ = base36[fid % 36];
270 	*p = '\0';
271 
272 	return (i - 1);
273 }
274 
275 /*
276  * smb_maybe_mangled
277  *
278  * Mangled names should be valid DOS file names: less than 12 characters
279  * long, contain at least one tilde character and conform to an 8.3 name
280  * format.
281  *
282  * Returns true if the name looks like a mangled name.
283  */
284 boolean_t
285 smb_maybe_mangled(char *name)
286 {
287 	const char *p;
288 	boolean_t has_tilde = B_FALSE;
289 	int ndots = 0;
290 	int i;
291 
292 	for (p = name, i = 0; (*p != '\0') && (i < SMB_NAME83_LEN); i++, p++) {
293 		if ((strchr(special_chars, *p) != NULL) ||
294 		    (strchr(invalid_dos_chars, *p) != NULL))
295 			return (B_FALSE);
296 
297 		if (*p == '.') {
298 			if ((++ndots) > 1)
299 				return (B_FALSE);
300 		}
301 
302 		if ((*p == '~') && (i < SMB_NAME83_BASELEN))
303 			has_tilde = B_TRUE;
304 
305 		if (*p == '.' && !has_tilde)
306 			return (B_FALSE);
307 	}
308 
309 	return ((*p == '\0') && has_tilde);
310 }
311 
312 /*
313  * smb_mangle
314  *
315  * Microsoft knowledge base article #142982 describes how Windows
316  * generates 8.3 filenames from long file names. Some other details
317  * can be found in article #114816.
318  *
319  * This function will mangle the name whether mangling is required
320  * or not. Callers should use smb_needs_mangled() to determine whether
321  * mangling is required.
322  *
323  * name		original file name
324  * fid		inode number to generate unique mangle
325  * buf		output buffer (buflen bytes) to contain mangled name
326  */
327 void
328 smb_mangle(const char *name, ino64_t fid, char *buf, size_t buflen)
329 {
330 	int i, avail;
331 	const char *p;
332 	char c;
333 	char *pbuf;
334 	char mangle_buf[SMB_NAME83_BASELEN];
335 
336 	ASSERT(name && buf && (buflen >= SMB_SHORTNAMELEN));
337 
338 	avail = SMB_NAME83_BASELEN -
339 	    smb_generate_mangle(fid, mangle_buf, SMB_NAME83_BASELEN);
340 	name += strspn(name, ".");
341 
342 	/*
343 	 * Copy up to avail characters from the base part of name
344 	 * to buf then append the generated mangle string.
345 	 */
346 	p = name;
347 	pbuf = buf;
348 	for (i = 0; (i < avail) && (*p != '\0') && (*p != '.'); ++i, ++p) {
349 		if ((c = smb_mangle_char(*p)) == -1)
350 			continue;
351 		*pbuf++ = c;
352 	}
353 	*pbuf = '\0';
354 	(void) strlcat(pbuf, mangle_buf, SMB_NAME83_BASELEN);
355 	pbuf = strchr(pbuf, '\0');
356 
357 	/*
358 	 * Find the last dot in the name. If there is a dot and an
359 	 * extension, append '.' and up to SMB_NAME83_EXTLEN extension
360 	 * characters to the mangled name.
361 	 */
362 	if (((p = strrchr(name, '.')) != NULL) && (*(++p) != '\0')) {
363 		*pbuf++ = '.';
364 		for (i = 0; (i < SMB_NAME83_EXTLEN) && (*p != '\0'); ++i, ++p) {
365 			if ((c = smb_mangle_char(*p)) == -1)
366 				continue;
367 			*pbuf++ = c;
368 		}
369 	}
370 
371 	*pbuf = '\0';
372 }
373 
374 /*
375  * smb_unmangle
376  *
377  * Given a mangled name, try to find the real file name as it appears
378  * in the directory entry.
379  *
380  * smb_unmangle should only be called on names for which
381  * smb_maybe_mangled() is true
382  *
383  * File systems which support VFSFT_EDIRENT_FLAGS will return the
384  * directory entries as a buffer of edirent_t structure. Others will
385  * return a buffer of dirent64_t structures. A union is used for the
386  * the pointer into the buffer (bufptr, edp and dp).
387  * The ed_name/d_name is NULL terminated by the file system.
388  *
389  * Returns:
390  *   0       - SUCCESS. Unmangled name is returned in namebuf.
391  *   EINVAL  - a parameter was invalid.
392  *   ENOTDIR - dnode is not a directory node.
393  *   ENOENT  - an unmangled name could not be found.
394  */
395 #define	SMB_UNMANGLE_BUFSIZE	(4 * 1024)
396 int
397 smb_unmangle(smb_node_t *dnode, char *name, char *namebuf,
398     int buflen, uint32_t flags)
399 {
400 	int		err, eof, bufsize, reclen;
401 	uint64_t	offset;
402 	ino64_t		ino;
403 	boolean_t	is_edp;
404 	char		*namep, *buf;
405 	char		shortname[SMB_SHORTNAMELEN];
406 	vnode_t		*vp;
407 	union {
408 		char		*bufptr;
409 		edirent_t	*edp;
410 		dirent64_t	*dp;
411 	} u;
412 #define	bufptr	u.bufptr
413 #define	edp		u.edp
414 #define	dp		u.dp
415 
416 	if (dnode == NULL || name == NULL || namebuf == NULL || buflen == 0)
417 		return (EINVAL);
418 
419 	ASSERT(smb_maybe_mangled(name) == B_TRUE);
420 
421 	if (!smb_node_is_dir(dnode))
422 		return (ENOTDIR);
423 
424 	vp = dnode->vp;
425 	*namebuf = '\0';
426 	is_edp = vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS);
427 
428 	buf = kmem_alloc(SMB_UNMANGLE_BUFSIZE, KM_SLEEP);
429 	bufsize = SMB_UNMANGLE_BUFSIZE;
430 	offset = 0;
431 
432 	while ((err = smb_vop_readdir(vp, offset, buf, &bufsize,
433 	    &eof, flags, kcred)) == 0) {
434 		if (bufsize == 0) {
435 			err = ENOENT;
436 			break;
437 		}
438 
439 		bufptr = buf;
440 		reclen = 0;
441 
442 		while ((bufptr += reclen) < buf + bufsize) {
443 			if (is_edp) {
444 				reclen = edp->ed_reclen;
445 				offset = edp->ed_off;
446 				ino = edp->ed_ino;
447 				namep = edp->ed_name;
448 			} else {
449 				reclen = dp->d_reclen;
450 				offset = dp->d_off;
451 				ino = dp->d_ino;
452 				namep = dp->d_name;
453 			}
454 
455 			/* skip non utf8 filename */
456 			if (u8_validate(namep, strlen(namep), NULL,
457 			    U8_VALIDATE_ENTIRE, &err) < 0)
458 				continue;
459 
460 			smb_mangle(namep, ino, shortname, SMB_SHORTNAMELEN);
461 
462 			if (smb_strcasecmp(name, shortname, 0) == 0) {
463 				(void) strlcpy(namebuf, namep, buflen);
464 				kmem_free(buf, SMB_UNMANGLE_BUFSIZE);
465 				return (0);
466 			}
467 		}
468 
469 		if (eof) {
470 			err = ENOENT;
471 			break;
472 		}
473 
474 		bufsize = SMB_UNMANGLE_BUFSIZE;
475 	}
476 
477 	kmem_free(buf, SMB_UNMANGLE_BUFSIZE);
478 	return (err);
479 }
480