xref: /freebsd/contrib/llvm-project/libcxx/src/support/ibm/mbsnrtowcs.cpp (revision 19fae0f66023a97a9b464b3beeeabb2081f575b3)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <cstddef> // size_t
10 #include <cwchar>  // mbstate_t
11 #include <limits.h> // MB_LEN_MAX
12 #include <string.h> // wmemcpy
13 
14 // Returns the number of wide characters found in the multi byte sequence `src`
15 // (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars`
16 // elements size). The count returned excludes the null terminator.
17 // When `dst` is NULL, no characters are copied to `dst`.
18 // Returns (size_t) -1 when an invalid sequence is encountered.
19 // Leaves *`src` pointing to the next character to convert or NULL
20 // if a null character was converted from *`src`.
21 _LIBCPP_FUNC_VIS
22 size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
23                    size_t src_size_bytes, size_t max_dest_chars,
24                    mbstate_t *__restrict ps) {
25   const size_t terminated_sequence = static_cast<size_t>(0);
26   const size_t invalid_sequence = static_cast<size_t>(-1);
27   const size_t incomplete_sequence = static_cast<size_t>(-2);
28 
29   size_t source_converted;
30   size_t dest_converted;
31   size_t result = 0;
32 
33   // If `dst` is null then `max_dest_chars` should be ignored according to the
34   // standard. Setting `max_dest_chars` to a large value has this effect.
35   if (dst == nullptr)
36     max_dest_chars = static_cast<size_t>(-1);
37 
38   for (dest_converted = source_converted = 0;
39        source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars);
40        ++dest_converted, source_converted += result) {
41     // Converts one multi byte character.
42     // If result (char_size) is greater than 0, it's the size in bytes of that character.
43     // If result (char_size) is zero, it indicates that the null character has been found.
44     // Otherwise, it's an error and errno may be set.
45     size_t source_remaining = src_size_bytes - source_converted;
46     size_t dest_remaining = max_dest_chars - dest_converted;
47 
48     if (dst == nullptr) {
49       result = mbrtowc(NULL, *src + source_converted, source_remaining, ps);
50     } else if (dest_remaining >= source_remaining) {
51       // dst has enough space to translate in-place.
52       result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps);
53     } else {
54       /*
55       * dst may not have enough space, so use a temporary buffer.
56       *
57       * We need to save a copy of the conversion state
58       * here so we can restore it if the multibyte
59       * character is too long for the buffer.
60       */
61       wchar_t buff[MB_LEN_MAX];
62       mbstate_t mbstate_tmp;
63 
64       if (ps != nullptr)
65         mbstate_tmp = *ps;
66       result = mbrtowc(buff, *src + source_converted, source_remaining, ps);
67 
68       if (result > dest_remaining) {
69         // Multi-byte sequence for character won't fit.
70         if (ps != nullptr)
71           *ps = mbstate_tmp;
72         break;
73       } else {
74         // The buffer was used, so we need copy the translation to dst.
75         wmemcpy(dst, buff, result);
76       }
77     }
78 
79     // Don't do anything to change errno from here on.
80     if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) {
81       break;
82     }
83   }
84 
85   if (dst) {
86     if (result == terminated_sequence)
87       *src = NULL;
88     else
89       *src += source_converted;
90   }
91   if (result == invalid_sequence)
92     return invalid_sequence;
93 
94   return dest_converted;
95 }
96