xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_mangle_name.c (revision 14b24e2b79293068c8e016a69ef1d872fb5e2fd5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/sunddi.h>
29 #include <sys/errno.h>
30 #include <sys/extdirent.h>
31 #include <smbsrv/string.h>
32 #include <smbsrv/smb_vops.h>
33 #include <smbsrv/smb_kproto.h>
34 #include <smbsrv/smb_fsops.h>
35 
36 /*
37  * Characters we don't allow in DOS file names.
38  * If a filename contains any of these chars, it should get mangled.
39  *
40  * '.' is also an invalid DOS char but since it's a special
41  * case it doesn't appear in the list.
42  */
43 static const char invalid_dos_chars[] =
44 	"\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
45 	"\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
46 	" \"/\\:|<>*?";
47 
48 /*
49  * According to MSKB article #142982, Windows deletes invalid chars and
50  * spaces from file name in mangling process; and invalid chars include:
51  * ."/\[]:;=,
52  *
53  * But some of these chars and some other chars (e.g. +) are replaced
54  * with underscore (_). They are introduced here as special chars.
55  */
56 static const char special_chars[] = "[];=,+";
57 
58 #define	isinvalid(c)	(strchr(invalid_dos_chars, c) || (c & 0x80))
59 
60 static int smb_generate_mangle(uint64_t, char *, size_t);
61 static char smb_mangle_char(char);
62 
63 /*
64  * Return true if name contains characters that are invalid in a file
65  * name or it is a reserved DOS device name.  Otherwise, returns false.
66  *
67  * Control characters (values 0 - 31) and the following characters are
68  * invalid:
69  *	< > : " / \ | ? *
70  */
71 boolean_t
72 smb_is_invalid_filename(const char *name)
73 {
74 	const char *p;
75 
76 	if ((p = strpbrk(name, invalid_dos_chars)) != NULL) {
77 		if (*p != ' ')
78 			return (B_TRUE);
79 	}
80 
81 	return (smb_is_reserved_dos_name(name));
82 }
83 
84 /*
85  * smb_is_reserved_dos_name
86  *
87  * This function checks if the name is a reserved DOS device name.
88  * The device name should not be followed immediately by an extension,
89  * for example, NUL.txt.
90  */
91 boolean_t
92 smb_is_reserved_dos_name(const char *name)
93 {
94 	static char *cnames[] = { "CLOCK$", "COM1", "COM2", "COM3", "COM4",
95 		"COM5", "COM6", "COM7", "COM8", "COM9", "CON" };
96 	static char *lnames[] = { "LPT1", "LPT2", "LPT3", "LPT4", "LPT5",
97 		"LPT6", "LPT7", "LPT8", "LPT9" };
98 	static char *others[] = { "AUX", "NUL", "PRN" };
99 	char	**reserved;
100 	char	ch;
101 	int	n_reserved;
102 	int	len;
103 	int	i;
104 
105 	ch = smb_toupper(*name);
106 
107 	switch (ch) {
108 	case 'A':
109 	case 'N':
110 	case 'P':
111 		reserved = others;
112 		n_reserved = sizeof (others) / sizeof (others[0]);
113 		break;
114 	case 'C':
115 		reserved = cnames;
116 		n_reserved = sizeof (cnames) / sizeof (cnames[0]);
117 		break;
118 	case 'L':
119 		reserved = lnames;
120 		n_reserved = sizeof (lnames) / sizeof (lnames[0]);
121 		break;
122 	default:
123 		return (B_FALSE);
124 	}
125 
126 	for (i  = 0; i < n_reserved; ++i) {
127 		len = strlen(reserved[i]);
128 
129 		if (smb_strcasecmp(reserved[i], name, len) == 0) {
130 			ch = *(name + len);
131 			if ((ch == '\0') || (ch == '.'))
132 				return (B_TRUE);
133 		}
134 	}
135 
136 	return (B_FALSE);
137 }
138 
139 /*
140  * smb_needs_mangled
141  *
142  * A name needs to be mangled if any of the following are true:
143  * - the first character is dot (.) and the name is not "." or ".."
144  * - the name contains illegal or special charsacter
145  * - the name name length > 12
146  * - the number of dots == 0 and length > 8
147  * - the number of dots > 1
148  * - the number of dots == 1 and name is not 8.3
149  */
150 boolean_t
151 smb_needs_mangled(const char *name)
152 {
153 	int len, extlen, ndots;
154 	const char *p;
155 	const char *last_dot;
156 
157 	if ((strcmp(name, ".") == 0) || (strcmp(name, "..") == 0))
158 		return (B_FALSE);
159 
160 	if (*name == '.')
161 		return (B_TRUE);
162 
163 	len = 0;
164 	ndots = 0;
165 	last_dot = NULL;
166 	for (p = name; *p != '\0'; ++p) {
167 		if (smb_iscntrl(*p) ||
168 		    (strchr(special_chars, *p) != NULL) ||
169 		    (strchr(invalid_dos_chars, *p)) != NULL)
170 			return (B_TRUE);
171 
172 		if (*p == '.') {
173 			++ndots;
174 			last_dot = p;
175 		}
176 		++len;
177 	}
178 
179 	if ((len > SMB_NAME83_LEN) ||
180 	    (ndots == 0 && len > SMB_NAME83_BASELEN) ||
181 	    (ndots > 1)) {
182 		return (B_TRUE);
183 	}
184 
185 	if (last_dot != NULL) {
186 		extlen = strlen(last_dot + 1);
187 		if ((extlen == 0) || (extlen > SMB_NAME83_EXTLEN))
188 			return (B_TRUE);
189 
190 		if ((len - extlen - 1) > SMB_NAME83_BASELEN)
191 			return (B_TRUE);
192 	}
193 
194 	return (B_FALSE);
195 }
196 
197 /*
198  * smb_mangle_char
199  *
200  * If c is an invalid DOS character or non-ascii, it should
201  * not be used in the mangled name. We return -1 to indicate
202  * an invalid character.
203  *
204  * If c is a special chars, it should be replaced with '_'.
205  *
206  * Otherwise c is returned as uppercase.
207  */
208 static char
209 smb_mangle_char(char c)
210 {
211 	if (isinvalid(c))
212 		return (-1);
213 
214 	if (strchr(special_chars, c))
215 		return ('_');
216 
217 	return (smb_toupper(c));
218 }
219 
220 /*
221  * smb_generate_mangle
222  *
223  * Generate a mangle string containing at least 2 characters and
224  * at most (buflen - 1) characters.
225  *
226  * Returns the number of chars in the generated mangle.
227  */
228 static int
229 smb_generate_mangle(uint64_t fid, char *buf, size_t buflen)
230 {
231 	static char *base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
232 	char *p = buf;
233 	int i;
234 
235 	if (fid == 0)
236 		fid = (uint64_t)-1;
237 
238 	*p++ = '~';
239 	for (i = 2; (i < buflen) && (fid > 0); fid /= 36, ++i)
240 		*p++ = base36[fid % 36];
241 	*p = '\0';
242 
243 	return (i - 1);
244 }
245 
246 /*
247  * smb_maybe_mangled
248  *
249  * Mangled names should be valid DOS file names: less than 12 characters
250  * long, contain at least one tilde character and conform to an 8.3 name
251  * format.
252  *
253  * Returns true if the name looks like a mangled name.
254  */
255 boolean_t
256 smb_maybe_mangled(char *name)
257 {
258 	const char *p;
259 	boolean_t has_tilde = B_FALSE;
260 	int ndots = 0;
261 	int i;
262 
263 	for (p = name, i = 0; (*p != '\0') && (i < SMB_NAME83_LEN); i++, p++) {
264 		if ((strchr(special_chars, *p) != NULL) ||
265 		    (strchr(invalid_dos_chars, *p) != NULL))
266 			return (B_FALSE);
267 
268 		if (*p == '.') {
269 			if ((++ndots) > 1)
270 				return (B_FALSE);
271 		}
272 
273 		if ((*p == '~') && (i < SMB_NAME83_BASELEN))
274 			has_tilde = B_TRUE;
275 
276 		if (*p == '.' && !has_tilde)
277 			return (B_FALSE);
278 	}
279 
280 	return ((*p == '\0') && has_tilde);
281 }
282 
283 /*
284  * smb_mangle
285  *
286  * Microsoft knowledge base article #142982 describes how Windows
287  * generates 8.3 filenames from long file names. Some other details
288  * can be found in article #114816.
289  *
290  * This function will mangle the name whether mangling is required
291  * or not. Callers should use smb_needs_mangled() to determine whether
292  * mangling is required.
293  *
294  * name		original file name
295  * fid		inode number to generate unique mangle
296  * buf		output buffer (buflen bytes) to contain mangled name
297  */
298 void
299 smb_mangle(const char *name, ino64_t fid, char *buf, size_t buflen)
300 {
301 	int i, avail;
302 	const char *p;
303 	char c;
304 	char *pbuf;
305 	char mangle_buf[SMB_NAME83_BASELEN];
306 
307 	ASSERT(name && buf && (buflen >= SMB_SHORTNAMELEN));
308 
309 	avail = SMB_NAME83_BASELEN -
310 	    smb_generate_mangle(fid, mangle_buf, SMB_NAME83_BASELEN);
311 	name += strspn(name, ".");
312 
313 	/*
314 	 * Copy up to avail characters from the base part of name
315 	 * to buf then append the generated mangle string.
316 	 */
317 	p = name;
318 	pbuf = buf;
319 	for (i = 0; (i < avail) && (*p != '\0') && (*p != '.'); ++i, ++p) {
320 		if ((c = smb_mangle_char(*p)) == -1)
321 			continue;
322 		*pbuf++ = c;
323 	}
324 	*pbuf = '\0';
325 	(void) strlcat(pbuf, mangle_buf, SMB_NAME83_BASELEN);
326 	pbuf = strchr(pbuf, '\0');
327 
328 	/*
329 	 * Find the last dot in the name. If there is a dot and an
330 	 * extension, append '.' and up to SMB_NAME83_EXTLEN extension
331 	 * characters to the mangled name.
332 	 */
333 	if (((p = strrchr(name, '.')) != NULL) && (*(++p) != '\0')) {
334 		*pbuf++ = '.';
335 		for (i = 0; (i < SMB_NAME83_EXTLEN) && (*p != '\0'); ++i, ++p) {
336 			if ((c = smb_mangle_char(*p)) == -1)
337 				continue;
338 			*pbuf++ = c;
339 		}
340 	}
341 
342 	*pbuf = '\0';
343 }
344 
345 /*
346  * smb_unmangle
347  *
348  * Given a mangled name, try to find the real file name as it appears
349  * in the directory entry.
350  *
351  * smb_unmangle should only be called on names for which
352  * smb_maybe_mangled() is true
353  *
354  * File systems which support VFSFT_EDIRENT_FLAGS will return the
355  * directory entries as a buffer of edirent_t structure. Others will
356  * return a buffer of dirent64_t structures. A union is used for the
357  * the pointer into the buffer (bufptr, edp and dp).
358  * The ed_name/d_name is NULL terminated by the file system.
359  *
360  * Returns:
361  *   0       - SUCCESS. Unmangled name is returned in namebuf.
362  *   EINVAL  - a parameter was invalid.
363  *   ENOTDIR - dnode is not a directory node.
364  *   ENOENT  - an unmangled name could not be found.
365  */
366 #define	SMB_UNMANGLE_BUFSIZE	(4 * 1024)
367 int
368 smb_unmangle(smb_node_t *dnode, char *name, char *namebuf,
369     int buflen, uint32_t flags)
370 {
371 	int		err, eof, bufsize, reclen;
372 	uint64_t	offset;
373 	ino64_t		ino;
374 	boolean_t	is_edp;
375 	char		*namep, *buf;
376 	char		shortname[SMB_SHORTNAMELEN];
377 	vnode_t		*vp;
378 	union {
379 		char		*u_bufptr;
380 		edirent_t	*u_edp;
381 		dirent64_t	*u_dp;
382 	} u;
383 #define	bufptr		u.u_bufptr
384 #define	edp		u.u_edp
385 #define	dp		u.u_dp
386 
387 	if (dnode == NULL || name == NULL || namebuf == NULL || buflen == 0)
388 		return (EINVAL);
389 
390 	ASSERT(smb_maybe_mangled(name) == B_TRUE);
391 
392 	if (!smb_node_is_dir(dnode))
393 		return (ENOTDIR);
394 
395 	vp = dnode->vp;
396 	*namebuf = '\0';
397 	is_edp = vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS);
398 
399 	buf = kmem_alloc(SMB_UNMANGLE_BUFSIZE, KM_SLEEP);
400 	bufsize = SMB_UNMANGLE_BUFSIZE;
401 	offset = 0;
402 
403 	while ((err = smb_vop_readdir(vp, offset, buf, &bufsize,
404 	    &eof, flags, zone_kcred())) == 0) {
405 		if (bufsize == 0) {
406 			err = ENOENT;
407 			break;
408 		}
409 
410 		bufptr = buf;
411 		reclen = 0;
412 
413 		while ((bufptr += reclen) < buf + bufsize) {
414 			if (is_edp) {
415 				reclen = edp->ed_reclen;
416 				offset = edp->ed_off;
417 				ino = edp->ed_ino;
418 				namep = edp->ed_name;
419 			} else {
420 				reclen = dp->d_reclen;
421 				offset = dp->d_off;
422 				ino = dp->d_ino;
423 				namep = dp->d_name;
424 			}
425 
426 			/* skip non utf8 filename */
427 			if (u8_validate(namep, strlen(namep), NULL,
428 			    U8_VALIDATE_ENTIRE, &err) < 0)
429 				continue;
430 
431 			smb_mangle(namep, ino, shortname, SMB_SHORTNAMELEN);
432 
433 			if (smb_strcasecmp(name, shortname, 0) == 0) {
434 				(void) strlcpy(namebuf, namep, buflen);
435 				kmem_free(buf, SMB_UNMANGLE_BUFSIZE);
436 				return (0);
437 			}
438 		}
439 
440 		if (eof) {
441 			err = ENOENT;
442 			break;
443 		}
444 
445 		bufsize = SMB_UNMANGLE_BUFSIZE;
446 	}
447 
448 	kmem_free(buf, SMB_UNMANGLE_BUFSIZE);
449 	return (err);
450 }
451