xref: /freebsd/contrib/llvm-project/libcxx/src/support/ibm/mbsnrtowcs.cpp (revision cb14a3fe5122c879eae1fb480ed7ce82a699ddb6)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <cstddef>  // size_t
10 #include <cwchar>   // mbstate_t
11 #include <limits.h> // MB_LEN_MAX
12 #include <string.h> // wmemcpy
13 
14 // Returns the number of wide characters found in the multi byte sequence `src`
15 // (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars`
16 // elements size). The count returned excludes the null terminator.
17 // When `dst` is NULL, no characters are copied to `dst`.
18 // Returns (size_t) -1 when an invalid sequence is encountered.
19 // Leaves *`src` pointing to the next character to convert or NULL
20 // if a null character was converted from *`src`.
21 _LIBCPP_EXPORTED_FROM_ABI size_t mbsnrtowcs(
22     wchar_t* __restrict dst,
23     const char** __restrict src,
24     size_t src_size_bytes,
25     size_t max_dest_chars,
26     mbstate_t* __restrict ps) {
27   const size_t terminated_sequence = static_cast<size_t>(0);
28   const size_t invalid_sequence    = static_cast<size_t>(-1);
29   const size_t incomplete_sequence = static_cast<size_t>(-2);
30 
31   size_t source_converted;
32   size_t dest_converted;
33   size_t result = 0;
34 
35   // If `dst` is null then `max_dest_chars` should be ignored according to the
36   // standard. Setting `max_dest_chars` to a large value has this effect.
37   if (dst == nullptr)
38     max_dest_chars = static_cast<size_t>(-1);
39 
40   for (dest_converted = source_converted = 0;
41        source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars);
42        ++dest_converted, source_converted += result) {
43     // Converts one multi byte character.
44     // If result (char_size) is greater than 0, it's the size in bytes of that character.
45     // If result (char_size) is zero, it indicates that the null character has been found.
46     // Otherwise, it's an error and errno may be set.
47     size_t source_remaining = src_size_bytes - source_converted;
48     size_t dest_remaining   = max_dest_chars - dest_converted;
49 
50     if (dst == nullptr) {
51       result = mbrtowc(NULL, *src + source_converted, source_remaining, ps);
52     } else if (dest_remaining >= source_remaining) {
53       // dst has enough space to translate in-place.
54       result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps);
55     } else {
56       /*
57        * dst may not have enough space, so use a temporary buffer.
58        *
59        * We need to save a copy of the conversion state
60        * here so we can restore it if the multibyte
61        * character is too long for the buffer.
62        */
63       wchar_t buff[MB_LEN_MAX];
64       mbstate_t mbstate_tmp;
65 
66       if (ps != nullptr)
67         mbstate_tmp = *ps;
68       result = mbrtowc(buff, *src + source_converted, source_remaining, ps);
69 
70       if (result > dest_remaining) {
71         // Multi-byte sequence for character won't fit.
72         if (ps != nullptr)
73           *ps = mbstate_tmp;
74         break;
75       } else {
76         // The buffer was used, so we need copy the translation to dst.
77         wmemcpy(dst, buff, result);
78       }
79     }
80 
81     // Don't do anything to change errno from here on.
82     if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) {
83       break;
84     }
85   }
86 
87   if (dst) {
88     if (result == terminated_sequence)
89       *src = NULL;
90     else
91       *src += source_converted;
92   }
93   if (result == invalid_sequence)
94     return invalid_sequence;
95 
96   return dest_converted;
97 }
98