1 //===-- TextEncoding.h - Text encoding conversion class -----------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file provides a utility class to convert between different character 11 /// set encodings. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_SUPPORT_TEXT_ENCODING_H 16 #define LLVM_SUPPORT_TEXT_ENCODING_H 17 18 #include "llvm/ADT/SmallString.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Config/config.h" 21 #include "llvm/Support/Compiler.h" 22 #include "llvm/Support/ErrorOr.h" 23 24 #include <string> 25 #include <system_error> 26 27 namespace llvm { 28 29 template <typename T> class SmallVectorImpl; 30 31 namespace details { 32 class TextEncodingConverterImplBase { 33 34 private: 35 /// Converts a string. 36 /// \param[in] Source source string 37 /// \param[out] Result container for converted string 38 /// \return error code in case something went wrong 39 /// 40 /// The following error codes can occur, among others: 41 /// - std::errc::argument_list_too_long: The result requires more than 42 /// std::numeric_limits<size_t>::max() bytes. 43 /// - std::errc::illegal_byte_sequence: The input contains an invalid 44 /// multibyte sequence. 45 /// - std::errc::invalid_argument: The input contains an incomplete 46 /// multibyte sequence. 47 /// 48 /// If the destination encoding is stateful, the shift state will be set 49 /// to the initial state. 50 /// 51 /// In case of an error, the result string contains the successfully converted 52 /// part of the input string. 53 /// 54 virtual std::error_code convertString(StringRef Source, 55 SmallVectorImpl<char> &Result) = 0; 56 57 /// Resets the converter to the initial state. 58 virtual void reset() = 0; 59 60 public: 61 virtual ~TextEncodingConverterImplBase() = default; 62 63 /// Converts a string and resets the converter to the initial state. convert(StringRef Source,SmallVectorImpl<char> & Result)64 std::error_code convert(StringRef Source, SmallVectorImpl<char> &Result) { 65 auto EC = convertString(Source, Result); 66 reset(); 67 return EC; 68 } 69 }; 70 } // namespace details 71 72 // Names inspired by https://wg21.link/p1885. 73 enum class TextEncoding { 74 /// UTF-8 character set encoding. 75 UTF8, 76 77 /// IBM EBCDIC 1047 character set encoding. 78 IBM1047 79 }; 80 81 /// Utility class to convert between different character encodings. 82 class TextEncodingConverter { 83 std::unique_ptr<details::TextEncodingConverterImplBase> Converter; 84 TextEncodingConverter(std::unique_ptr<details::TextEncodingConverterImplBase> Converter)85 TextEncodingConverter( 86 std::unique_ptr<details::TextEncodingConverterImplBase> Converter) 87 : Converter(std::move(Converter)) {} 88 89 public: 90 /// Creates a TextEncodingConverter instance. 91 /// Returns std::errc::invalid_argument in case the requested conversion is 92 /// not supported. 93 /// \param[in] From the source character encoding 94 /// \param[in] To the target character encoding 95 /// \return a TextEncodingConverter instance or an error code 96 LLVM_ABI static ErrorOr<TextEncodingConverter> create(TextEncoding From, 97 TextEncoding To); 98 99 /// Creates a TextEncodingConverter instance. 100 /// Returns std::errc::invalid_argument in case the requested conversion is 101 /// not supported. 102 /// \param[in] From name of the source character encoding 103 /// \param[in] To name of the target character encoding 104 /// \return a TextEncodingConverter instance or an error code 105 LLVM_ABI static ErrorOr<TextEncodingConverter> create(StringRef From, 106 StringRef To); 107 108 TextEncodingConverter(const TextEncodingConverter &) = delete; 109 TextEncodingConverter &operator=(const TextEncodingConverter &) = delete; 110 TextEncodingConverter(TextEncodingConverter && Other)111 TextEncodingConverter(TextEncodingConverter &&Other) 112 : Converter(std::move(Other.Converter)) {} 113 114 TextEncodingConverter &operator=(TextEncodingConverter &&Other) { 115 if (this != &Other) 116 Converter = std::move(Other.Converter); 117 return *this; 118 } 119 120 ~TextEncodingConverter() = default; 121 122 /// Converts a string. 123 /// \param[in] Source source string 124 /// \param[out] Result container for converted string 125 /// \return error code in case something went wrong convert(StringRef Source,SmallVectorImpl<char> & Result)126 std::error_code convert(StringRef Source, 127 SmallVectorImpl<char> &Result) const { 128 return Converter->convert(Source, Result); 129 } 130 convert(StringRef Source)131 ErrorOr<std::string> convert(StringRef Source) const { 132 SmallString<100> Result; 133 auto EC = Converter->convert(Source, Result); 134 if (!EC) 135 return std::string(Result); 136 return EC; 137 } 138 }; 139 140 } // namespace llvm 141 142 #endif 143