1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 *
4 * Copyright (c) International Business Machines Corp., 2000,2009
5 * Modified by Steve French (sfrench@us.ibm.com)
6 */
7 #include <linux/fs.h>
8 #include <linux/slab.h>
9 #include <linux/unaligned.h>
10 #include "cifs_fs_sb.h"
11 #include "cifs_unicode.h"
12 #include "cifsglob.h"
13 #include "cifs_debug.h"
14
15 /* Convert character using the SFU - "Services for Unix" remapping range */
16 static bool
convert_sfu_char(const __u16 src_char,char * target)17 convert_sfu_char(const __u16 src_char, char *target)
18 {
19 /*
20 * BB: Cannot handle remapping UNI_SLASH until all the calls to
21 * build_path_from_dentry are modified, as they use slash as
22 * separator.
23 */
24 switch (src_char) {
25 case UNI_COLON:
26 *target = ':';
27 break;
28 case UNI_ASTERISK:
29 *target = '*';
30 break;
31 case UNI_QUESTION:
32 *target = '?';
33 break;
34 case UNI_PIPE:
35 *target = '|';
36 break;
37 case UNI_GRTRTHAN:
38 *target = '>';
39 break;
40 case UNI_LESSTHAN:
41 *target = '<';
42 break;
43 default:
44 return false;
45 }
46 return true;
47 }
48
49 /* Convert character using the SFM - "Services for Mac" remapping range */
50 static bool
convert_sfm_char(const __u16 src_char,char * target)51 convert_sfm_char(const __u16 src_char, char *target)
52 {
53 if (src_char >= 0xF001 && src_char <= 0xF01F) {
54 *target = src_char - 0xF000;
55 return true;
56 }
57 switch (src_char) {
58 case SFM_COLON:
59 *target = ':';
60 break;
61 case SFM_DOUBLEQUOTE:
62 *target = '"';
63 break;
64 case SFM_ASTERISK:
65 *target = '*';
66 break;
67 case SFM_QUESTION:
68 *target = '?';
69 break;
70 case SFM_PIPE:
71 *target = '|';
72 break;
73 case SFM_GRTRTHAN:
74 *target = '>';
75 break;
76 case SFM_LESSTHAN:
77 *target = '<';
78 break;
79 case SFM_SPACE:
80 *target = ' ';
81 break;
82 case SFM_PERIOD:
83 *target = '.';
84 break;
85 default:
86 return false;
87 }
88 return true;
89 }
90
91
92 /*
93 * cifs_mapchar - convert a host-endian char to proper char in codepage
94 * @target - where converted character should be copied
95 * @src_char - 2 byte host-endian source character
96 * @cp - codepage to which character should be converted
97 * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2?
98 *
99 * This function handles the conversion of a single character. It is the
100 * responsibility of the caller to ensure that the target buffer is large
101 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
102 */
103 static int
cifs_mapchar(char * target,const __u16 * from,const struct nls_table * cp,int maptype)104 cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
105 int maptype)
106 {
107 int len = 1;
108 __u16 src_char;
109
110 src_char = *from;
111
112 if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
113 return len;
114 else if ((maptype == SFU_MAP_UNI_RSVD) &&
115 convert_sfu_char(src_char, target))
116 return len;
117
118 /* if character not one of seven in special remap set */
119 len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
120 if (len <= 0)
121 goto surrogate_pair;
122
123 return len;
124
125 surrogate_pair:
126 /* convert SURROGATE_PAIR and IVS */
127 if (strcmp(cp->charset, "utf8"))
128 goto unknown;
129 len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
130 if (len <= 0)
131 goto unknown;
132 return len;
133
134 unknown:
135 *target = '?';
136 len = 1;
137 return len;
138 }
139
140 /*
141 * cifs_from_utf16 - convert utf16le string to local charset
142 * @to - destination buffer
143 * @from - source buffer
144 * @tolen - destination buffer size (in bytes)
145 * @fromlen - source buffer size (in bytes)
146 * @codepage - codepage to which characters should be converted
147 * @mapchar - should characters be remapped according to the mapchars option?
148 *
149 * Convert a little-endian utf16le string (as sent by the server) to a string
150 * in the provided codepage. The tolen and fromlen parameters are to ensure
151 * that the code doesn't walk off of the end of the buffer (which is always
152 * a danger if the alignment of the source buffer is off). The destination
153 * string is always properly null terminated and fits in the destination
154 * buffer. Returns the length of the destination string in bytes (including
155 * null terminator).
156 *
157 * Note that some windows versions actually send multiword UTF-16 characters
158 * instead of straight UTF16-2. The linux nls routines however aren't able to
159 * deal with those characters properly. In the event that we get some of
160 * those characters, they won't be translated properly.
161 */
162 int
cifs_from_utf16(char * to,const __le16 * from,int tolen,int fromlen,const struct nls_table * codepage,int map_type)163 cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
164 const struct nls_table *codepage, int map_type)
165 {
166 int i, charlen, safelen;
167 int outlen = 0;
168 int nullsize = nls_nullsize(codepage);
169 int fromwords = fromlen / 2;
170 char tmp[NLS_MAX_CHARSET_SIZE];
171 __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
172
173 /*
174 * because the chars can be of varying widths, we need to take care
175 * not to overflow the destination buffer when we get close to the
176 * end of it. Until we get to this offset, we don't need to check
177 * for overflow however.
178 */
179 safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
180
181 for (i = 0; i < fromwords; i++) {
182 ftmp[0] = get_unaligned_le16(&from[i]);
183 if (ftmp[0] == 0)
184 break;
185 if (i + 1 < fromwords)
186 ftmp[1] = get_unaligned_le16(&from[i + 1]);
187 else
188 ftmp[1] = 0;
189 if (i + 2 < fromwords)
190 ftmp[2] = get_unaligned_le16(&from[i + 2]);
191 else
192 ftmp[2] = 0;
193
194 /*
195 * check to see if converting this character might make the
196 * conversion bleed into the null terminator
197 */
198 if (outlen >= safelen) {
199 charlen = cifs_mapchar(tmp, ftmp, codepage, map_type);
200 if ((outlen + charlen) > (tolen - nullsize))
201 break;
202 }
203
204 /* put converted char into 'to' buffer */
205 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
206 outlen += charlen;
207
208 /* charlen (=bytes of UTF-8 for 1 character)
209 * 4bytes UTF-8(surrogate pair) is charlen=4
210 * (4bytes UTF-16 code)
211 * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
212 * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */
213 if (charlen == 4)
214 i++;
215 else if (charlen >= 5)
216 /* 5-6bytes UTF-8 */
217 i += 2;
218 }
219
220 /* properly null-terminate string */
221 for (i = 0; i < nullsize; i++)
222 to[outlen++] = 0;
223
224 return outlen;
225 }
226
227 /*
228 * NAME: cifs_strtoUTF16()
229 *
230 * FUNCTION: Convert character string to unicode string
231 *
232 */
233 int
cifs_strtoUTF16(__le16 * to,const char * from,int len,const struct nls_table * codepage)234 cifs_strtoUTF16(__le16 *to, const char *from, int len,
235 const struct nls_table *codepage)
236 {
237 int charlen;
238 int i;
239 wchar_t wchar_to; /* needed to quiet sparse */
240
241 /* special case for utf8 to handle no plane0 chars */
242 if (!strcmp(codepage->charset, "utf8")) {
243 /*
244 * convert utf8 -> utf16, we assume we have enough space
245 * as caller should have assumed conversion does not overflow
246 * in destination len is length in wchar_t units (16bits)
247 */
248 i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
249 (wchar_t *) to, len);
250
251 /* if success terminate and exit */
252 if (i >= 0)
253 goto success;
254 /*
255 * if fails fall back to UCS encoding as this
256 * function should not return negative values
257 * currently can fail only if source contains
258 * invalid encoded characters
259 */
260 }
261
262 for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
263 charlen = codepage->char2uni(from, len, &wchar_to);
264 if (charlen < 1) {
265 cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n",
266 *from, charlen);
267 /* A question mark */
268 wchar_to = 0x003f;
269 charlen = 1;
270 }
271 put_unaligned_le16(wchar_to, &to[i]);
272 }
273
274 success:
275 put_unaligned_le16(0, &to[i]);
276 return i;
277 }
278
279 /*
280 * cifs_utf16_bytes - how long will a string be after conversion?
281 * @utf16 - pointer to input string
282 * @maxbytes - don't go past this many bytes of input string
283 * @codepage - destination codepage
284 *
285 * Walk a utf16le string and return the number of bytes that the string will
286 * be after being converted to the given charset, not including any null
287 * termination required. Don't walk past maxbytes in the source buffer.
288 */
289 int
cifs_utf16_bytes(const __le16 * from,int maxbytes,const struct nls_table * codepage)290 cifs_utf16_bytes(const __le16 *from, int maxbytes,
291 const struct nls_table *codepage)
292 {
293 int i;
294 int charlen, outlen = 0;
295 int maxwords = maxbytes / 2;
296 char tmp[NLS_MAX_CHARSET_SIZE];
297 __u16 ftmp[3];
298
299 for (i = 0; i < maxwords; i++) {
300 ftmp[0] = get_unaligned_le16(&from[i]);
301 if (ftmp[0] == 0)
302 break;
303 if (i + 1 < maxwords)
304 ftmp[1] = get_unaligned_le16(&from[i + 1]);
305 else
306 ftmp[1] = 0;
307 if (i + 2 < maxwords)
308 ftmp[2] = get_unaligned_le16(&from[i + 2]);
309 else
310 ftmp[2] = 0;
311
312 charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD);
313 outlen += charlen;
314 }
315
316 return outlen;
317 }
318
319 /*
320 * cifs_strndup_from_utf16 - copy a string from wire format to the local
321 * codepage
322 * @src - source string
323 * @maxlen - don't walk past this many bytes in the source string
324 * @is_unicode - is this a unicode string?
325 * @codepage - destination codepage
326 *
327 * Take a string given by the server, convert it to the local codepage and
328 * put it in a new buffer. Returns a pointer to the new string or NULL on
329 * error.
330 */
331 char *
cifs_strndup_from_utf16(const char * src,const int maxlen,const bool is_unicode,const struct nls_table * codepage)332 cifs_strndup_from_utf16(const char *src, const int maxlen,
333 const bool is_unicode, const struct nls_table *codepage)
334 {
335 int len;
336 char *dst;
337
338 if (is_unicode) {
339 len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage);
340 len += nls_nullsize(codepage);
341 dst = kmalloc(len, GFP_KERNEL);
342 if (!dst)
343 return NULL;
344 cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
345 NO_MAP_UNI_RSVD);
346 } else {
347 dst = kstrndup(src, maxlen, GFP_KERNEL);
348 }
349
350 return dst;
351 }
352
convert_to_sfu_char(char src_char)353 static __le16 convert_to_sfu_char(char src_char)
354 {
355 __le16 dest_char;
356
357 switch (src_char) {
358 case ':':
359 dest_char = cpu_to_le16(UNI_COLON);
360 break;
361 case '*':
362 dest_char = cpu_to_le16(UNI_ASTERISK);
363 break;
364 case '?':
365 dest_char = cpu_to_le16(UNI_QUESTION);
366 break;
367 case '<':
368 dest_char = cpu_to_le16(UNI_LESSTHAN);
369 break;
370 case '>':
371 dest_char = cpu_to_le16(UNI_GRTRTHAN);
372 break;
373 case '|':
374 dest_char = cpu_to_le16(UNI_PIPE);
375 break;
376 default:
377 dest_char = 0;
378 }
379
380 return dest_char;
381 }
382
convert_to_sfm_char(char src_char,bool end_of_string)383 static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
384 {
385 __le16 dest_char;
386
387 if (src_char >= 0x01 && src_char <= 0x1F) {
388 dest_char = cpu_to_le16(src_char + 0xF000);
389 return dest_char;
390 }
391 switch (src_char) {
392 case ':':
393 dest_char = cpu_to_le16(SFM_COLON);
394 break;
395 case '"':
396 dest_char = cpu_to_le16(SFM_DOUBLEQUOTE);
397 break;
398 case '*':
399 dest_char = cpu_to_le16(SFM_ASTERISK);
400 break;
401 case '?':
402 dest_char = cpu_to_le16(SFM_QUESTION);
403 break;
404 case '<':
405 dest_char = cpu_to_le16(SFM_LESSTHAN);
406 break;
407 case '>':
408 dest_char = cpu_to_le16(SFM_GRTRTHAN);
409 break;
410 case '|':
411 dest_char = cpu_to_le16(SFM_PIPE);
412 break;
413 case '.':
414 if (end_of_string)
415 dest_char = cpu_to_le16(SFM_PERIOD);
416 else
417 dest_char = 0;
418 break;
419 case ' ':
420 if (end_of_string)
421 dest_char = cpu_to_le16(SFM_SPACE);
422 else
423 dest_char = 0;
424 break;
425 default:
426 dest_char = 0;
427 }
428
429 return dest_char;
430 }
431
432 /*
433 * Convert 16 bit Unicode pathname to wire format from string in current code
434 * page. Conversion may involve remapping up the six characters that are
435 * only legal in POSIX-like OS (if they are present in the string). Path
436 * names are little endian 16 bit Unicode on the wire
437 */
438 int
cifsConvertToUTF16(__le16 * target,const char * source,int srclen,const struct nls_table * cp,int map_chars)439 cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
440 const struct nls_table *cp, int map_chars)
441 {
442 int i, charlen;
443 int j = 0;
444 char src_char;
445 __le16 dst_char;
446 wchar_t tmp;
447 wchar_t *wchar_to; /* UTF-16 */
448 int ret;
449 unicode_t u;
450
451 if (map_chars == NO_MAP_UNI_RSVD)
452 return cifs_strtoUTF16(target, source, PATH_MAX, cp);
453
454 wchar_to = kzalloc(6, GFP_KERNEL);
455
456 for (i = 0; i < srclen; j++) {
457 src_char = source[i];
458 charlen = 1;
459
460 /* check if end of string */
461 if (src_char == 0)
462 goto ctoUTF16_out;
463
464 /* see if we must remap this char */
465 if (map_chars == SFU_MAP_UNI_RSVD)
466 dst_char = convert_to_sfu_char(src_char);
467 else if (map_chars == SFM_MAP_UNI_RSVD) {
468 bool end_of_string;
469
470 /**
471 * Remap spaces and periods found at the end of every
472 * component of the path. The special cases of '.' and
473 * '..' are need to be handled because of symlinks.
474 * They are treated as non-end-of-string to avoid
475 * remapping and breaking symlinks pointing to . or ..
476 **/
477 if ((i == 0 || source[i-1] == '\\') &&
478 source[i] == '.' &&
479 (i == srclen-1 || source[i+1] == '\\'))
480 end_of_string = false; /* "." case */
481 else if (i >= 1 &&
482 (i == 1 || source[i-2] == '\\') &&
483 source[i-1] == '.' &&
484 source[i] == '.' &&
485 (i == srclen-1 || source[i+1] == '\\'))
486 end_of_string = false; /* ".." case */
487 else if ((i == srclen - 1) || (source[i+1] == '\\'))
488 end_of_string = true;
489 else
490 end_of_string = false;
491
492 dst_char = convert_to_sfm_char(src_char, end_of_string);
493 } else
494 dst_char = 0;
495 /*
496 * FIXME: We can not handle remapping backslash (UNI_SLASH)
497 * until all the calls to build_path_from_dentry are modified,
498 * as they use backslash as separator.
499 */
500 if (dst_char == 0) {
501 charlen = cp->char2uni(source + i, srclen - i, &tmp);
502 dst_char = cpu_to_le16(tmp);
503
504 /*
505 * if no match, use question mark, which at least in
506 * some cases serves as wild card
507 */
508 if (charlen > 0)
509 goto ctoUTF16;
510
511 /* convert SURROGATE_PAIR */
512 if (strcmp(cp->charset, "utf8") || !wchar_to)
513 goto unknown;
514 if (*(source + i) & 0x80) {
515 charlen = utf8_to_utf32(source + i, 6, &u);
516 if (charlen < 0)
517 goto unknown;
518 } else
519 goto unknown;
520 ret = utf8s_to_utf16s(source + i, charlen,
521 UTF16_LITTLE_ENDIAN,
522 wchar_to, 6);
523 if (ret < 0)
524 goto unknown;
525
526 i += charlen;
527 dst_char = cpu_to_le16(*wchar_to);
528 if (charlen <= 3)
529 /* 1-3bytes UTF-8 to 2bytes UTF-16 */
530 put_unaligned(dst_char, &target[j]);
531 else if (charlen == 4) {
532 /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
533 * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
534 * (charlen=3+4 or 4+4) */
535 put_unaligned(dst_char, &target[j]);
536 dst_char = cpu_to_le16(*(wchar_to + 1));
537 j++;
538 put_unaligned(dst_char, &target[j]);
539 } else if (charlen >= 5) {
540 /* 5-6bytes UTF-8 to 6bytes UTF-16 */
541 put_unaligned(dst_char, &target[j]);
542 dst_char = cpu_to_le16(*(wchar_to + 1));
543 j++;
544 put_unaligned(dst_char, &target[j]);
545 dst_char = cpu_to_le16(*(wchar_to + 2));
546 j++;
547 put_unaligned(dst_char, &target[j]);
548 }
549 continue;
550
551 unknown:
552 dst_char = cpu_to_le16(0x003f);
553 charlen = 1;
554 }
555
556 ctoUTF16:
557 /*
558 * character may take more than one byte in the source string,
559 * but will take exactly two bytes in the target string
560 */
561 i += charlen;
562 put_unaligned(dst_char, &target[j]);
563 }
564
565 ctoUTF16_out:
566 put_unaligned(0, &target[j]); /* Null terminate target unicode string */
567 kfree(wchar_to);
568 return j;
569 }
570
571 /*
572 * cifs_local_to_utf16_bytes - how long will a string be after conversion?
573 * @from - pointer to input string
574 * @maxbytes - don't go past this many bytes of input string
575 * @codepage - source codepage
576 *
577 * Walk a string and return the number of bytes that the string will
578 * be after being converted to the given charset, not including any null
579 * termination required. Don't walk past maxbytes in the source buffer.
580 */
581
582 static int
cifs_local_to_utf16_bytes(const char * from,int len,const struct nls_table * codepage)583 cifs_local_to_utf16_bytes(const char *from, int len,
584 const struct nls_table *codepage)
585 {
586 int charlen;
587 int i;
588 wchar_t wchar_to;
589
590 for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
591 charlen = codepage->char2uni(from, len, &wchar_to);
592 /* Failed conversion defaults to a question mark */
593 if (charlen < 1)
594 charlen = 1;
595 }
596 return 2 * i; /* UTF16 characters are two bytes */
597 }
598
599 /*
600 * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage
601 * @src - source string
602 * @maxlen - don't walk past this many bytes in the source string
603 * @utf16_len - the length of the allocated string in bytes (including null)
604 * @cp - source codepage
605 * @remap - map special chars
606 *
607 * Take a string convert it from the local codepage to UTF16 and
608 * put it in a new buffer. Returns a pointer to the new string or NULL on
609 * error.
610 */
611 __le16 *
cifs_strndup_to_utf16(const char * src,const int maxlen,int * utf16_len,const struct nls_table * cp,int remap)612 cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len,
613 const struct nls_table *cp, int remap)
614 {
615 int len;
616 __le16 *dst;
617
618 if (!src)
619 return NULL;
620
621 len = cifs_local_to_utf16_bytes(src, maxlen, cp);
622 len += 2; /* NULL */
623 dst = kmalloc(len, GFP_KERNEL);
624 if (!dst) {
625 *utf16_len = 0;
626 return NULL;
627 }
628 cifsConvertToUTF16(dst, src, strlen(src), cp, remap);
629 *utf16_len = len;
630 return dst;
631 }
632