xref: /freebsd/contrib/llvm-project/llvm/include/llvm/Support/TextEncoding.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- TextEncoding.h - Text encoding conversion class -----------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file provides a utility class to convert between different character
11 /// set encodings.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_SUPPORT_TEXT_ENCODING_H
16 #define LLVM_SUPPORT_TEXT_ENCODING_H
17 
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/Support/Compiler.h"
22 #include "llvm/Support/ErrorOr.h"
23 
24 #include <string>
25 #include <system_error>
26 
27 namespace llvm {
28 
29 template <typename T> class SmallVectorImpl;
30 
31 namespace details {
32 class TextEncodingConverterImplBase {
33 
34 private:
35   /// Converts a string.
36   /// \param[in] Source source string
37   /// \param[out] Result container for converted string
38   /// \return error code in case something went wrong
39   ///
40   /// The following error codes can occur, among others:
41   ///   - std::errc::argument_list_too_long: The result requires more than
42   ///     std::numeric_limits<size_t>::max() bytes.
43   ///   - std::errc::illegal_byte_sequence: The input contains an invalid
44   ///     multibyte sequence.
45   ///   - std::errc::invalid_argument: The input contains an incomplete
46   ///     multibyte sequence.
47   ///
48   /// If the destination encoding is stateful, the shift state will be set
49   /// to the initial state.
50   ///
51   /// In case of an error, the result string contains the successfully converted
52   /// part of the input string.
53   ///
54   virtual std::error_code convertString(StringRef Source,
55                                         SmallVectorImpl<char> &Result) = 0;
56 
57   /// Resets the converter to the initial state.
58   virtual void reset() = 0;
59 
60 public:
61   virtual ~TextEncodingConverterImplBase() = default;
62 
63   /// Converts a string and resets the converter to the initial state.
convert(StringRef Source,SmallVectorImpl<char> & Result)64   std::error_code convert(StringRef Source, SmallVectorImpl<char> &Result) {
65     auto EC = convertString(Source, Result);
66     reset();
67     return EC;
68   }
69 };
70 } // namespace details
71 
72 // Names inspired by https://wg21.link/p1885.
73 enum class TextEncoding {
74   /// UTF-8 character set encoding.
75   UTF8,
76 
77   /// IBM EBCDIC 1047 character set encoding.
78   IBM1047
79 };
80 
81 /// Utility class to convert between different character encodings.
82 class TextEncodingConverter {
83   std::unique_ptr<details::TextEncodingConverterImplBase> Converter;
84 
TextEncodingConverter(std::unique_ptr<details::TextEncodingConverterImplBase> Converter)85   TextEncodingConverter(
86       std::unique_ptr<details::TextEncodingConverterImplBase> Converter)
87       : Converter(std::move(Converter)) {}
88 
89 public:
90   /// Creates a TextEncodingConverter instance.
91   /// Returns std::errc::invalid_argument in case the requested conversion is
92   /// not supported.
93   /// \param[in] From the source character encoding
94   /// \param[in] To the target character encoding
95   /// \return a TextEncodingConverter instance or an error code
96   LLVM_ABI static ErrorOr<TextEncodingConverter> create(TextEncoding From,
97                                                         TextEncoding To);
98 
99   /// Creates a TextEncodingConverter instance.
100   /// Returns std::errc::invalid_argument in case the requested conversion is
101   /// not supported.
102   /// \param[in] From name of the source character encoding
103   /// \param[in] To name of the target character encoding
104   /// \return a TextEncodingConverter instance or an error code
105   LLVM_ABI static ErrorOr<TextEncodingConverter> create(StringRef From,
106                                                         StringRef To);
107 
108   TextEncodingConverter(const TextEncodingConverter &) = delete;
109   TextEncodingConverter &operator=(const TextEncodingConverter &) = delete;
110 
TextEncodingConverter(TextEncodingConverter && Other)111   TextEncodingConverter(TextEncodingConverter &&Other)
112       : Converter(std::move(Other.Converter)) {}
113 
114   TextEncodingConverter &operator=(TextEncodingConverter &&Other) {
115     if (this != &Other)
116       Converter = std::move(Other.Converter);
117     return *this;
118   }
119 
120   ~TextEncodingConverter() = default;
121 
122   /// Converts a string.
123   /// \param[in] Source source string
124   /// \param[out] Result container for converted string
125   /// \return error code in case something went wrong
convert(StringRef Source,SmallVectorImpl<char> & Result)126   std::error_code convert(StringRef Source,
127                           SmallVectorImpl<char> &Result) const {
128     return Converter->convert(Source, Result);
129   }
130 
convert(StringRef Source)131   ErrorOr<std::string> convert(StringRef Source) const {
132     SmallString<100> Result;
133     auto EC = Converter->convert(Source, Result);
134     if (!EC)
135       return std::string(Result);
136     return EC;
137   }
138 };
139 
140 } // namespace llvm
141 
142 #endif
143